爬虫第三篇:requests模块

requests模块其实就是对urllib.request模块的进步一不优化,提供了很多可选的参数,同时简化了操作。下面我还是贴上具体操作的代码。

requests GET请求

GET请求html文件

import requests

baseurl = "http://www.baidu.com/s?"
headers = {"User-Agent":"Mozilla/5.0"}

key = "关键字"
pn = 1
pn = (int(pn) - 1) * 10
params = {
        "wd" : key,
        "pn" : pn,
    }
res = requests.get(baseurl,
                   params=params,
                   headers=headers)
res.encoding = "utf-8"
html = res.text
print(html)
# res.status_code # HTTP响应码
# res.url # url地址

GET请求二进制文件

import requests

url = "https://ss0.bdstatic.com/70cFvHSh_Q1YnxGkpoWK1HF6hhy/it/u=2914738751,1449131471&fm=26&gp=0.jpg"
headers = {"User-Agent":"Mozilla/5.0"}
# 三步走,发请求,指编码,获内容
res = requests.get(url,headers=headers)
res.encoding = "utf-8"
html = res.content
# 以 wb 的方式写入本地文件
with open("./mmm.jpg","wb") as f:
    f.write(html)

requests POST 请求

import requests
import json

key = 'this is a demo'data = {
        "i":key,
        "from":"AUTO",
        "to":"AUTO",
        "smartresult":"dict",
        "client":"fanyideskweb",
        "salt":"15458120942800",
        "sign":"108feafc7c01c7461a41034463a8df9b",
        "ts":"1545812094280",
        "bv":"363eb5a1de8cfbadd0cd78bd6bd43bee",
        "doctype":"json",
        "version":"2.1",
        "keyfrom":"fanyi.web",
        "action":"FY_BY_REALTIME",
        "typoResult":"false"
    }
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
headers = {"User-Agent":"Mozilla/5.0"}
res = requests.post(url,data=data,headers=headers)
res.encoding = "utf-8"
html = res.text

# 把json格式的字符串转为python中字典
rDict = json.loads(html)
result = rDict["translateResult"][0][0]["tgt"]

原文地址:https://www.cnblogs.com/leijing0607/p/7742527.html