get
host_url = ‘https://www.pearvideo.com/‘#浏览器的版本等信息headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"}res = requests.get(host_url, headers=headers)
post
r = requests.post(‘http://httpbin.org/post‘, data = {‘key‘:‘value‘})
delete
r = requests.delete(‘http://httpbin.org/delete‘)
put
r = requests.put(‘http://httpbin.org/put‘, data = {‘key‘:‘value‘})
import requestsrespone=requests.get(‘http://www.jianshu.com‘)# respone属性#获得响应的文本为字符串格式print(respone.text)#获得响应的文本为二进制格式print(respone.content)#获得响应的状态码print(respone.status_code)print(respone.headers)print(respone.cookies)#已字典的形式获得响应的cookieprint(respone.cookies.get_dict())print(respone.cookies.items())print(respone.url)print(respone.history)print(respone.encoding)
import osimport refrom concurrent.futures import ThreadPoolExecutorimport requestshost_url = ‘https://www.pearvideo.com/‘headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"}def get_index(): res = requests.get(host_url, headers=headers) return res.textdef parser_index(text): res = re.findall(‘<a href="(.*?)" class="vervideo-lilink actplay">‘, text) res = [host_url + i for i in res] return resdef get_detail(html_text): # 获得视频的下载地址 download_index = re.search(r‘srcUrl="(.*?.mp4)"‘, html_text).group(1) # 获取标题 title = re.search(‘<h1 class="video-tt">(.*?)</h1>‘, html_text).group(1) dic = { ‘download_index‘: download_index, ‘title‘: title } print(‘成功链接到[%s]视频文件‘ % title) return dicdef get_video(video_url, title): video_bytes = requests.get(video_url).content if not os.path.exists(‘down_pearvideos‘): os.mkdir(‘down_pearvideos‘) file_path = os.path.join(‘down_pearvideos‘, title) + ‘.mp4‘ with open(file_path, ‘wb‘) as f: f.write(video_bytes) print(file_path + ‘下载成功!‘)if __name__ == ‘__main__‘: pool = ThreadPoolExecutor(10) text = get_index() url_list = parser_index(text) for url in url_list: response = requests.get(url, headers=headers).text content_dic = get_detail(response) # get_video(content_dic[‘download_index‘],content_dic[‘title‘]) # 开启多线程快速的爬取数据 pool.submit(get_video, content_dic[‘download_index‘], content_dic[‘title‘])
import reimport requestsheaders = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"}login_url = ‘https://github.com/login‘login_response = requests.get(login_url, headers=headers)login_token = re.search(‘name="authenticity_token" value="(.*?)"‘, login_response.text).group(1)print(login_token)login_cookie = login_response.cookies.get_dict()print(login_cookie)session_url = ‘https://github.com/session‘session_response = requests.post( session_url, headers=headers, cookies=login_cookie, data={ "commit": "Sign in", "utf8": "?", "authenticity_token": login_token, "login": "yangyuanhu", "password": "123654asd" })print(session_response.text)