- urllib库详解 PYTHON内置的库
- urllib.request 请求模块
- urllib.error 异常处理模块
- urllib.parse URL解析模块
- urllib.robotparser robots.txt解析模块 哪些可以查 哪些不可以查
urlopen
import urllib.request response=urllib.request("http://www.baidu.com") print(response.read().decode("utf-8"))
urllib.parse封装POST数据
import urllib.parse,urllib.request data=bytes(urllib.parse.urlencode({'world':'hello'}),encoding='utf-8') response=urllib.request.urlopen("http://httpbin.org/post",data=data) response.read().decode("utf-8")
urlparse("http://www.baidu.com").netloc #www.baidu.com
urlparse("http://www.baidu.com").scheme #http
urllib.request设置超时间,并且捕获错误
import urllib.request import socket import urllib.error try: responst=urllib.request.urlopen("http://httpbin.org/get",timeout=0.1) except urllib.error.URLError as e: if isinstance(e.reason,socket.timeout): print("time out")
响应
响应类型
from urllib.request import urlopen response=urlopen('http://www.python.org') print(type(response)) #<class 'http.client.HTTPResponse'> response.getheaders() # 得到响应头 response.getheaders("Server") #ngix response.read().decode() #得到响应内容
打开一个连接
import urllib.request response=urllib.request.urlopen("https://www.python.org") print(response.read().decode('utf-8'))
打开更加复杂的请求可以
import urllib.request request=urllib.request.Request("https://python.org") response=urllib.request.urlopen(request) print(response.read().decode("utf-8")) #得到的结果 是一样的,比如要加一个HEADERS
现在增加内容在请求里面
from urllib import request,parse url='http://httpbin.org/post' headers={'User-Agent':'Mozilla/4.0(compatible;MSIE 5.5;Windows nt)', 'Host':'httpbin.org'} dict={'name':'Germey'} data=bytes(parse.urlencode(dict),encoding='utf-8') req=request.Request(url=url,data=data,headers=headers,method='POST') response=request.urlopen(req) print(response.read().decode('utf-8'))
另一个方法是直接增加HEADERS内容
from urllib import request,parse url='http://httpbin.org/post' dict={'name':'Germey'} data=bytes(parse.urlencode(dict),encoding='utf-8') req=request.Request(url=url,data=data,headers=headers,method='POST') req.add_header('User-Agent':'Mozilla/4.0(compatible;MSIE 5.5;Windows nt)','Host':'httpbin.org') response=request.urlopen(req) print(response.read().decode('utf-8'))