Python爬去有道翻译

注:传入的类型为POST类型,所以需要使用urllib.parse.urlencode(),将字典转换成URL可用参数;

  使用json.loads(),将输出的json格式,转换为字典类型

import urllib.request
import urllib.parse
import json

content = input("请输入要翻译的内容:")
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
data = {}
data["i"] = content
data["from"] = "AUTO"
data["to"] = "AUTO"
data["smartresult"] = "dict"
data["client"] = "fanyideskweb"
data["salt"] = "1537624423057"
data["sign"] = "0c1989ed937485dd570fc4b5907cf567"
data["doctype"] = "json"
data["version"] = "2.1"
data["keyfrom"] = "fanyi.web"
data["action"] = "FY_BY_REALTIME"
data["typoResult"] = "false"
#因传入的类型为POST类型,所以需要将字典转换为url需要使用的参数
data = urllib.parse.urlencode(data).encode("utf-8")
response = urllib.request.urlopen(url, data)
html = response.read().decode("utf-8")
#将JSON格式转换成字典类型
translate = json.loads(html)
translate_src = translate["translateResult"][0][0]["src"]
translate_Result = translate["translateResult"][0][0]["tgt"]
print("翻译内容:%s" % translate_src)
print("翻译结果:%s" % translate_Result)

==============================使用代理访问======================================

import urllib.request

import urllib.parse

import json

while True:
  content = input("请输入要翻译的内容(输入!退出):")
  if content == "!":
  break
  url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
  data = {}
  data["i"] = content
  data["from"] = "AUTO"
  data["to"] = "AUTO"
  data["smartresult"] = "dict"
  data["client"] = "fanyideskweb"
  data["salt"] = "1537624423057"
  data["sign"] = "0c1989ed937485dd570fc4b5907cf567"
  data["doctype"] = "json"
  data["version"] = "2.1"
  data["keyfrom"] = "fanyi.web"
  data["action"] = "FY_BY_REALTIME"
  data["typoResult"] = "false"
  #因传入的类型为POST类型,所以需要将字典转换为url需要使用的参数
  data = urllib.parse.urlencode(data).encode("utf-8")
  #---------------------------------代理---------------------------------------------------
  #urllib.request.ProxyHandler的参数师一个数组
  proxy_support = urllib.request.ProxyHandler({"https": "27.17.45.90:43411"})
  #创建一个opener
  opener = urllib.request.build_opener(proxy_support)
  #添加user-agent
  opener.addheaders = [("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                   "AppleWebKit/537.36 (KHTML, like Gecko) "
                   "Chrome/69.0.3497.81 Safari/537.36")]
  #全局加载代理IP
  urllib.request.install_opener(opener)

  #----------------------------------代理结束--------------------------------------------------
  req = urllib.request.Request(url, data)
  req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/69.0.3497.81 Safari/537.36")
  response = urllib.request.urlopen(req)
  html = response.read().decode("utf-8")
  #将JSON格式转换成字典类型
  translate = json.loads(html)
  translate_src = translate["translateResult"][0][0]["src"]
  translate_Result = translate["translateResult"][0][0]["tgt"]
  print("翻译内容:%s" % translate_src)
  print("翻译结果:%s" % translate_Result)

==============================分割线===========================================

1、在运行中报{"errorCode":50},将审查元素中获取的URL里面,将translate_o的_o去掉就不会报错。
2、data["salt"] = "1537624423057"  :时间戳
3、data["sign"] = "0c1989ed937485dd570fc4b5907cf567"   :时间戳 + 翻译内容加密生成的字符串
4、urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>,没有连接到服务器,需要更换代理IP
5、urllib.error.HTTPError: HTTP Error 403: Forbidden:需要添加user-agent
 
 
原文地址:https://www.cnblogs.com/kongjiangbing/p/9691536.html