御风灰灰
Published on 2024-07-11 / 42 Visits
0
0

python爬虫示例

  1. 爬取Android常用库
import requests
from bs4 import BeautifulSoup


response = requests.get('https://hndeveloper.github.io/2017/github-android-ui.html')

# with open('txt.html','w',encoding='utf-8') as file:
#     file.write(response.text)
file=open('txt.html','r',encoding='utf-8')
datatext=file.read()
soup = BeautifulSoup(datatext, 'html.parser')
outdoc=open("ui.md",'w')
titlelists = soup.find_all(["h2", "ul"])
count=1
for h2item in titlelists:

    if h2item.name=='h2':
        outdoc.write(f"## 1.{count} {h2item.text}\n")
        outdoc.write("| 名称 |  星数  |  说明 |\n")
        outdoc.write("| --- |  ---  |  --- |\n")    
        count+=1
        continue
    if h2item.name=='ul':
        for item in h2item.find_all("li"):
            name=item.a.text
            href=item.a['href']
            text=str.replace(str.replace(item.get_text(),name,''),"\n",'')
            startnumber=str.strip(str.split(text,'-')[0])
            des=str.strip(str.split(text,'-')[1])
            outdoc.write(f"| [{name}]({href} \"{name}\") | {startnumber} | {des} |\n")
        outdoc.write("\n")

Comment