- 爬取Android常用库
import requests
from bs4 import BeautifulSoup
response = requests.get('https://hndeveloper.github.io/2017/github-android-ui.html')
# with open('txt.html','w',encoding='utf-8') as file:
# file.write(response.text)
file=open('txt.html','r',encoding='utf-8')
datatext=file.read()
soup = BeautifulSoup(datatext, 'html.parser')
outdoc=open("ui.md",'w')
titlelists = soup.find_all(["h2", "ul"])
count=1
for h2item in titlelists:
if h2item.name=='h2':
outdoc.write(f"## 1.{count} {h2item.text}\n")
outdoc.write("| 名称 | 星数 | 说明 |\n")
outdoc.write("| --- | --- | --- |\n")
count+=1
continue
if h2item.name=='ul':
for item in h2item.find_all("li"):
name=item.a.text
href=item.a['href']
text=str.replace(str.replace(item.get_text(),name,''),"\n",'')
startnumber=str.strip(str.split(text,'-')[0])
des=str.strip(str.split(text,'-')[1])
outdoc.write(f"| [{name}]({href} \"{name}\") | {startnumber} | {des} |\n")
outdoc.write("\n")