Requests-html 设置 headers

要求安装Requests-html,Python版本高于或等于3.6。

 1 # -*- coding -*- 2  3 from requests_html import HTMLSession 4  5  6 def get_web_page_elements(url, headers={}, xpath_expression=‘‘): 7 ‘‘‘通过 xpath expression 获取 网页元素‘‘‘ 8 session = HTMLSession() 9 response = session.get(url, headers=headers)10 elements_list = response.html.xpath(xpath_expression)11 return elements_list12 13 14 if __name__ == __main__:15 url = https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c00016 # headers 设置17 referer = url18 cookie = Cookie: atsp=1548864427226_1548863599220; Hm_lvt_2efddd14a5f2b304677462d06fb4f964=1548863599; Hm_lpvt_2efddd14a5f2b304677462d06fb4f964=154886359919 user_agent = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.3620 headers = {21 Referer: referer,22 Cookie: cookie,23 User-Agent: user_agent24  }25 # 获取 目录26 index_xpath_expression = "//a[@class=‘x-wiki-index-item‘]"27 index_data = get_web_page_elements(url, headers=headers, xpath_expression=index_xpath_expression)28 for each_index in index_data:29 print(each_index.text + \t\t + each_index.url)

 

相关文章