python爬虫练习1-获取百度翻译结果

提示:python脚本需与js脚本放在同一目录
依赖包:execjs  安装命令:pip install PyExecJS

import requests, execjs, re


class Translate:
    gtk = 0

    def __init__(self, trans_str):
        self.trans_str = trans_str

    def get_gtk(self):
        """获取gtk"""
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
        }
        session = requests.session()
        r = session.get("https://fanyi.baidu.com/translate", headers=headers)
        self.gtk = re.findall(r"window.gtk = '(.*?)'", r.content.decode())[0]

    def get_sign(self):
        """根据gtk和机密算法获取sign"""
        file = 'b.js'
        ctx = execjs.compile(open(file).read())
        str = "e('1112', '" + self.gtk + "')"
        self.sign = ctx.eval(str)

    def main(self):
        self.get_gtk()
        self.get_sign()
        # 请求数据获取翻译结果
        # cookie、headers使用https://curlconverter.com/获取更方便
        cookies = {
            'BIDUPSID': '3D18ACCC2A9A99FDAD0FCC31752F76E5',
            'PSTM': '1635406274',
            'BAIDUID': 'CF9D511F01E42BADA556A3221A4ACAE5:FG=1',
            '__yjs_duid': '1_b4837114a08ad6d1ca0d506cf68048841635487632765',
            'REALTIME_TRANS_SWITCH': '1',
            'FANYI_WORD_SWITCH': '1',
            'HISTORY_SWITCH': '1',
            'SOUND_SPD_SWITCH': '1',
            'SOUND_PREFER_SWITCH': '1',
            'MCITY': '-268%3A131%3A',
            'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
            'Hm_lvt_64ecd82404c51e03dc91cb9e8c025574': '1635759140,1635766530,1635766535,1636341014',
            'H_PS_PSSID': '34448_34068_31254_35062_34505_34917_34606_26350_34971_22159_35018',
            'BDSFRCVID': 'OT-OJeC6263U6FnHiyFM5NkfZKhZCYvTH6aoF2bhaGlNhJ7aLA0eEG0PMU8g0Ku-S2-cogKK0mOTHv-F_2uxOjjg8UtVJeC6EG0Ptf8g0f5',
            'H_BDCLCKID_SF': 'tJ-JVC0XfC-3fP36q4cqMJDtbxQXKK62aKDs0PoYBhcqEIL4ejrfbt4Vy-QjBUAtBD3h_-QHBlbFjxbSj4Qobj-4barnbMbHtNnJXIo_5l5nhMJR257JDMn3-l3b2t6y523ion6vQpn-KqQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0Djb-DG_Dt6nfb5kX3Rr_bRvqKROvhjRG558gyxoObtRxtTcgQbcd0RvbsI58MPDbbUPU2GJNLU3kBgTMKT6oJDL-_U3SehjJ3M_tQttjQn345etJhtKEa-nPsR7TyU42hf47yhjd0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OuJRLD_KI-JCDhbnoRq45HMt00qxby26npMg69aJ5y-J7nh-QbKP61LtPF0Howa-Kf523iXD34QpbZ8h5O5p7mKnOyX-JZWp5MQg60Kl0MLPbpel4mWR_VhnkzQxnMBMPjamOnaPQ73fAKftnOM46JehL3346-35543bRTLnLy5KJYMDFle5_BD5QLeHR22PRt5PoJ0RrJ5RrMKROvhjRCQKugyxomtjDfQ25y-nOd24TnJK58MPjADxASjxufLUkqKCOZ0qcz-xopSx7jXn6cD4DNQttjQIrOfIkja-5taK3Ohb7TyU42hf47yhDL0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OhJRLDoKPhtK0WbnO1hnofq4D_MfOtetJyaR39-T6vWJ5TMC_CDp_-Q4CnMqrlW6Qw2Kczabnu0MT-ShPC-tnPD4DJ-tJQWtvZJK6r5CJH3l02Vh6ae-t2ynQD0NKL5-RMW20jWl7mWPLVsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjj6jK4JKDH-eq6vP',
            'BDSFRCVID_BFESS': 'OT-OJeC6263U6FnHiyFM5NkfZKhZCYvTH6aoF2bhaGlNhJ7aLA0eEG0PMU8g0Ku-S2-cogKK0mOTHv-F_2uxOjjg8UtVJeC6EG0Ptf8g0f5',
            'H_BDCLCKID_SF_BFESS': 'tJ-JVC0XfC-3fP36q4cqMJDtbxQXKK62aKDs0PoYBhcqEIL4ejrfbt4Vy-QjBUAtBD3h_-QHBlbFjxbSj4Qobj-4barnbMbHtNnJXIo_5l5nhMJR257JDMn3-l3b2t6y523ion6vQpn-KqQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0Djb-DG_Dt6nfb5kX3Rr_bRvqKROvhjRG558gyxoObtRxtTcgQbcd0RvbsI58MPDbbUPU2GJNLU3kBgTMKT6oJDL-_U3SehjJ3M_tQttjQn345etJhtKEa-nPsR7TyU42hf47yhjd0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OuJRLD_KI-JCDhbnoRq45HMt00qxby26npMg69aJ5y-J7nh-QbKP61LtPF0Howa-Kf523iXD34QpbZ8h5O5p7mKnOyX-JZWp5MQg60Kl0MLPbpel4mWR_VhnkzQxnMBMPjamOnaPQ73fAKftnOM46JehL3346-35543bRTLnLy5KJYMDFle5_BD5QLeHR22PRt5PoJ0RrJ5RrMKROvhjRCQKugyxomtjDfQ25y-nOd24TnJK58MPjADxASjxufLUkqKCOZ0qcz-xopSx7jXn6cD4DNQttjQIrOfIkja-5taK3Ohb7TyU42hf47yhDL0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OhJRLDoKPhtK0WbnO1hnofq4D_MfOtetJyaR39-T6vWJ5TMC_CDp_-Q4CnMqrlW6Qw2Kczabnu0MT-ShPC-tnPD4DJ-tJQWtvZJK6r5CJH3l02Vh6ae-t2ynQD0NKL5-RMW20jWl7mWPLVsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjj6jK4JKDH-eq6vP',
            'delPer': '0',
            'PSINO': '2',
            'BAIDUID_BFESS': 'CF9D511F01E42BADA556A3221A4ACAE5:FG=1',
            '__yjs_st': '2_MDAwNTliNDg1MTdkY2FkNzg0N2JlZDkyZjdlMjNjZDEzOWI1ZTcyZmE0ZDIxOWUwYTg3MTI3ZDI5NzliZDA5YWEwYjhkZjM4M2JiOGExMWFhNjc5NDUxNGJjM2Y5ZTYyMzZmOTk4MjU2MjJhOGI0YmVhMDJiOWM4YjM2NGNjZDJhNmU5MzcwOWViYTk4OTdhZTg1Nzk0YzVjZGY0MzI3ZjI2MzQ1OTcyYmNiMzliZjBlNjFjNDQwZjlhODYyMzI0NDRiN2YzZDgxNzAxNWUyMjQ3MGE5ZTgyYjlkZWVlYmU3OGZjYzRiNDUzNmJiZmQ0Zjc3OTUyNTMwOTdiMGQyZF83XzAyZDRmMjIy',
            'Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574': '1636386209',
            'ab_sr': '1.0.1_YTljZDNlMjM3NzdiYmQ5NTk4NTMxNTc2ZGM1Y2Q2MDAwMTE2OTFkYjZiZDkyMmIzYmE5MmIyYjMwOWU2MjI4ZjBhOTM0NmU4ODU4N2JlNmI0MGE3NWJhNWY0NmQ2ZWNmMjY1ZWE3OTk5YTAzNjg3ZmUwZWE3MzkwZWVhNTg4NzI3NzYyOThiNzYyOTY5MGQxNzNmZTk3NDg5MWQ4OWVmZA==',
            'BA_HECTOR': '0k218480010k2h858f1goii6u0q',
        }

        headers = {
            'Connection': 'keep-alive',
            'sec-ch-ua': '"Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"',
            'Accept': '*/*',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'X-Requested-With': 'XMLHttpRequest',
            'sec-ch-ua-mobile': '?0',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
            'sec-ch-ua-platform': '"macOS"',
            'Origin': 'https://fanyi.baidu.com',
            'Sec-Fetch-Site': 'same-origin',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Dest': 'empty',
            'Referer': 'https://fanyi.baidu.com/translate?aldtype=16047&query=&keyfrom=baidu&smartresult=dict&lang=auto2zh',
            'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',
        }

        params = (
            ('from', 'zh'),
            ('to', 'en'),
        )

        data = {
            'from': 'zh',
            'to': 'en',
            'query': '1112',
            'transtype': 'translang',
            'simple_means_flag': '3',
            'sign': self.sign,
            'token': 'bccfb308f37f4fc8546087e497580fdb',
            'domain': 'common'
        }

        dd = requests.post('https://fanyi.baidu.com/v2transapi', headers=headers, params=params, cookies=cookies,
                           data=data).json()
        return dd["trans_result"]["data"][0]["dst"]


if __name__ == "__main__":
    trans_str = "123456"
    my_translate = Translate(trans_str)
    result = my_translate.main()
    print(result)

  

b.js

var i = null;

function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
        a = "+" === o.charAt(t + 1) ? r >>> a: r << a,
        r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}
function e(r, gtk) {
    var o = r.match(/[uD800-uDBFF][uDC00-uDFFF]/g);
    if (null === o) {
        var t = r.length;
        t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
    } else {
        for (var e = r.split(/[uD800-uDBFF][uDC00-uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
        C !== h - 1 && f.push(o[C]);
        var g = f.length;
        g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice( - 10).join(""))
    }
    var u = void 0,
    l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    u = null !== i ? i: (i = gtk || "") || "";
    for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
        var A = r.charCodeAt(v);
        128 > A ? S[c++] = A: (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
    }
    for (var p = m,
    F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],
    p = n(p, F);
    return p = n(p, D),
    p ^= s,
    0 > p && (p = (2147483647 & p) + 2147483648),
    p %= 1e6,
    p.toString() + "." + (p ^ m)
}


function s(a){
    return a
}

  

原文地址:https://www.cnblogs.com/fuchenjie/p/15527066.html