爬虫之爬取某道技术分析

方案:

  该网站用来进行翻译。通过对js代码进行分析,其主要是采用md5对输入的salt和sign参数进行加密实现的。因此技术方案有两种:

方案一:自己添加md5函数进行加密post数据;方案二:采用js2py包进行添加js文件,模拟执行相关js代码

class YYY():
    def __init__(self):
        self.headers = {
            "X-Requested-With": "XMLHttpRequest",
            "Referer": "http://fanyi.×××.com/",
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
            ,
            "Origin":"http://fanyi.×××.com",
            "Host": "fanyi.youdao.com"
        }
        self.session = requests.session()
        self.session.headers=self.headers


   #发送post之前需要进入某道界面,携带上cookie信息,否则post报错。
    def enter_yuuu(self):
        resp=self.session.get("http://fanyi.×××.com/")


    def download(self):
        input_word=input("请输入要翻译的内容:")
        time1 = str(int(time.time()*1000))
        num = random.randint(0, 9)
        salt = "%s%s" % (time1, num)
        # ""fanyideskweb" + self.word + salt + "@6f#X3=cCuncYssPsuRUE""
        sign = hashlib.md5(("fanyideskweb" + input_word + salt + "n%A-rKaT5fb[Gy?;N5@Tj").encode()).hexdigest()
        data = {
            "i": input_word,
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": salt,  # 时间戳加随机数"
            "sign": sign,  # n.md5("fanyideskweb" + e + i + "n%A-rKaT5fb[Gy?;N5@Tj")
            "ts": time1,  # 时间戳1568621840962
            "bv": "3ca2e6bf257529213f041a4416ab18ca",
            # 32位小写,md5(version) version是浏览器版本号,其网站输入的信息是;5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36
            "doctype": "json",
            "version": "2.1",
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTlME "

        }
        url = "http://fanyi.×××.com/translate_o?smartresult=dict&smartresult=rule"
        resp = self.session.post(url, data=data)
        print(resp.content.decode())


if __name__ == '__main__':
    yyy = YYY()
    yyy.enter_yuuu()
    yyy.download()
原文地址:https://www.cnblogs.com/xuehaiwuya0000/p/11528325.html