pyquery——以jQuery的语法来操作解析xml文档

pyquery允许对xml文档进行jquery查询。该API尽可能类似于jquery。pyquery使用lxml进行快速的xml和html操作,
能够以jQuery的语法来操作解析 HTML 文档。  实例:爬取疫情报告
https://voice.baidu.com/act/newpneumonia/newpneumonia (今天报错还未调试成功,明天继续)

 

 

import requestsfrom pyquery import PyQuery as pqdef get_page(url): """发起请求 获得源码""" r = requests.get(url) r.encoding = utf8 html = r.text return htmldef parse(text): """解析数据 写入文件""" doc = pq(text) # 获得每一行的tr标签 ths = doc(table.table thead tr.VirusTable_1-1-156_26gN5Z).items() for th in ths: area = th.find(span).text() # 地区 confirm = th.find(td:nth-child(2)).text() # 确诊 death = th.find(td:nth-child(3)).text() # 死亡 cure = th.find(td:nth-child(4)).text() # 治愈 with open(D:\yiqing.csv, a+, encoding=utf8) as f: f.write(area + \t\t) f.write(confirm + \t\t) f.write(death + \t\t) f.write(cure + \t\t\n) print("写入完成") """ tds = doc(‘table.table tbody tr‘).items() for td in tds: rank = td.find(‘td:first-child‘).text() # 排名 name = td.find(‘div‘).text() # 大学名称 city = td.find(‘td:nth-child(3)‘).text() # 城市 score = td.find(‘td:nth-child(4)‘).text() # 总分 with open(‘D:\yiqing.csv‘, ‘a+‘, encoding=‘utf8‘) as f: f.write(rank + ‘\t\t‘) f.write(name + ‘\t\t‘) f.write(city + ‘\t\t‘) f.write(score + ‘\t\t\n‘) print("写入完成") """if __name__ == "__main__": url = "https://voice.baidu.com/act/newpneumonia/newpneumonia" text = get_page(url) parse(text)

 

相关文章