python urllib2/urllib实现

urllib2和urllib是Python中的两个内置模块，要实现HTTP功能,实现方式是以urllib2为主,urllib为辅

urllib2提供一个基础函数urlopen,通过向指定的url发出请求来获取数据。最简单的形式是

get 请求

import urllib2

url = "http://www.zhihu.com"

#请求
request = urllib2.Request(url,headers={})
#响应
response = urllib2.urlopen(request)

html = response.read()
#将下载的内容保存到zhihu.html中
with open('zhihu.html','wb') as f:
    f.write(html)

post请求

import urllib
import urllib2

url = 'https://passport.csdn.net/account/verify'

data = {'username': '******', 'password': '*****', 'lt': "LT-24129-fLph1VM5RqhCgq4OdpuReGFC0p5Hbh"}

#user-gent和referer是写入头信息
#user-agent,有些服务器或proxy会通过改值来判断是否是浏览器发出的请求

user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

#referer服务器有时候会检查防盗链
referer = 'https://passport.csdn.net'

data = urllib.urlencode(data)

try:

    request = urllib2.Request(url)

    request.add_header('User-agent', user_agent)
    request.add_header('Referer', referer)
    request.add_data(data)

    response = urllib2.urlopen(request)

    print response.read()

except urllib2.HTTPError, e:

    print e.code

3：Cookie处理

urllib2对Cookie的处理也是自动的,使用Cookiejar函数进行cookie管理

import urllib2
import cookielib

cookie = cookielib.CookieJar()

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

response = opener.open('https://www.zhihu.com')
for item in cookie:

    print item.name+':'+item.value