基本库使用（urllib,requests）

urllib（request,error,parse,robotparse）

　　request模块

　　　　方法:urlopen() {read(),readinto(),getheader(name),getheaders(),fileno()等方法， msg，status，reason,debuglevel,closed 等属性}

　　　　　　　最基本http请求方法，利用它可以模拟浏览器的一个请求发起过程，同时他还带有助力授权验证authentication,重定向redirection,浏览器cookie 以及其他内容。

import urllib.request
response = urllib.request.urlopen("https://www.baidu.com")
print(response.read().decode("utf-8"))
print(type(response))



--->>>
<html>
<head>
    <script>
        location.replace(location.href.replace("https://","http://"));
    </script>
</head>
<body>
    <noscript><meta http-equiv="refresh" content="0;url=http://www.baidu.com/"></noscript>
</body>
</html>


<class 'http.client.HTTPResponse'>

urlopen()

import urllib.request
response = urllib.request.urlopen("https://www.baidu.com")
print(response.getheaders())
print(response.getheader("server"))
print(response.status)

　　　data 参数（post 请求 get请求没有data ）

import urllib.parse
import urllib.request


data = bytes(urllib.parse.urlencode({"word": 'hello'}), encoding="utf-8")
response = urllib.request.urlopen("http://httpbin.org/post", data=data)
print(response.read())



---》
b'{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "word": "hello"
  }, 
  "headers": {
    "Accept-Encoding": "identity", 
    "Content-Length": "10", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "Python-urllib/3.6"
  }, 
  "json": null, 
  "origin": "60.218.161.81, 60.218.161.81", 
  "url": "https://httpbin.org/post"
}
'

　　　　timeout 参数用于设置超时时间，单位为秒，（通常设置这个超市模块用来控制一个网页响应时间如果长时间未响应就会报错异常处理跳过它的抓取）

import urllib.parse
import urllib.request, urllib.error
import socket


try:
    response = urllib.request.urlopen("httpS://httpbin.org/get",timeout=0.1)
except urllib.error.URLError as e:
    if isinstance(e.reason,socket.timeout):
        print('TIME OUT')

·　　　　　Request 方法（在urlopen 的技术处上可以增加 headers={}等信息）

　　　　　　urllib.request(url,data,headers={},origin_req_host=NONE,unverifiable=Flase,method=NONE)

from urllib import request, parse

url = "https://www.taobao.com/post"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'}
dict = {'name':'word'}
data= bytes(parse.urlencode(dict),encoding="utf-8")  //（（需要转成字节流）
req = request.Request(url =url,data=data,headers=headers,method='POST')    //(psot 一定要大写)
response=request.urlopen(req)
print(response.read().decode('utf-8'))



也可以：
req = request.request(url =url,data=data,method='POST')
req.add_header('user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36')

　　　　　　高级用法：