爬虫01-urllib常用函数

from urllib import requestfrom urllib import parse#1.读取网页url="http://www.baidu.com"resp=request.urlopen(url)# print(resp.getcode())#获取响应码# print(resp.read())#读取网页# print(resp.readline())#读取一行# print(resp.readlines()[1])#读取网页返回列表#2.下载request.urlretrieve("http://www.baidu.com","baidu.html")#下载网页request.urlretrieve("https://bkimg.cdn.bcebos.com/pic/38dbb6fd5266d0167927ca029b2bd40735fa35d9?x-bce-process=image/resize,m_lfit,w_268,limit_1/format,f_jpg","Jay.jpg")#3.编码解码params={"wd":"周杰伦"}qs=parse.urlencode(params)#编码sq=parse.parse_qs(qs)#解码print(sq)#4.获取请求头信息result=parse.urlparse(url)print(result)#全部信息print(result.scheme)print(result.netloc)print(result.path)print(result.params)print(result.query)print(result.fragment)result2=parse.urlsplit(url)print(result2)#全部信息print(result2.scheme)print(result2.netloc)print(result2.path)#print(result2.params) 没有这一项剩下和parse一样print(result2.query)print(result2.fragment)

  

相关文章