python学习笔记（urllib库）

一、什么是Urllib？

Python内置的HTTP请求库

urllib.request 请求模块

urllib.error 异常处理模块

urllib.parse url解析模块

urllib.robotparser robots.txt解析模块

二、相比Python2变化

Python2

import urllib2

response = urllib2.urlopen('http://www.baidu.com')

Python3

import urllib.request

response = urllib.request.urlopen('http://www.baidu.com')

#urlopen

#1、get请求
import urllib.request
response = urllib.request.urlopen('http://www.baidu.com')
print(response.read().decode('utf-8'))

#2、post请求
import urllib.request
import urllib.parse
data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8')
response = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(response.read().decode('utf-8'))

#3、请求超时
import urllib.request
import urllib.error
import socket
try:
    response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1)
except urllib.error.URLError as e:
    if isinstance(e.reason,socket.timeout):
        print('TIME OUT')

#响应
#1、响应类型
import urllib.request
response = urllib.request.urlopen('http://www.baidu.com')
type(response)
#>><class 'http.client.HTTPResponse'>

#2、状态码，响应头（返回为列表类型），获取响应头中对应的信息
print(response.status)
#>>200
print(response.getheaders())
print(response.getheader('Server'))

#3、响应体内容，返回的是二进制形式
response.read()

#Request类
from urllib import request,parse
url = "http://httpbin.org/post"
headers = {}
dict = {
    'name':'Tim'
}
data = bytes(parse.urlencode(dict),encoding='utf8')
req = request.Request(url=url,headers=headers,data=data,method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))