英译中批量翻译

Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门

https://item.taobao.com/item.htm?spm=a1z38n.10677092.0.0.482434a6EmUbbW&id=564564604865

中文翻译为英语容易出错,还是提供API接口方便些,不容易出错。爬虫抓取容易出错

# -*- coding: utf-8 -*-
"""
Created on Tue Apr 19 09:05:56 2016
有道翻译爬虫(英译中)
@author: Administrator
"""

import requests,bs4,time
#有中文会出错,原因不明
#words_list=["python","job","hello world"," amoxicillin","阿莫西林","clarithromycin","克拉霉素"]

words_list=["metformin hydrochloride","amoxicillin","clarithromycin","Viagra","sildenafil"]
translation_list=[]

word="python"
word2='n. 巨蟒;大蟒n. (法)皮东(人名)'

#单词去除空格,名词符号,等等
def word_format(word):
    word1=word.strip()
    #去掉空格
    word2=word1.replace('
','')
    word3=word2.replace('n.','')
    word4=word3.replace(" ",'')
    return word4
'''
word2='n. 巨蟒;大蟒n. (法)皮东(人名)'
word_format(word2)
Out[90]: '巨蟒;大蟒 (法)皮东(人名)'
'''

#翻译一个单词全面版本 
def Get_full_translation(word):
    url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word)
    res=requests.get(url)
    soup=bs4.BeautifulSoup(res.text,"lxml")
    elems=soup.select('.trans-container')
    translation=elems[0].text
    translation1=translation.replace('
','')
    return translation1
    
#批量翻译所有单词全面版本   
def Get_all_full_translation(words_list):
    for word in words_list:
        try:
            translation=Get_full_translation(word)
            translation_list.append(translation)
        except:
            print("exception:",word)
            continue

#翻译简单版本
def Get_simple_translation(word):
    url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word)
    res=requests.get(url)
    soup=bs4.BeautifulSoup(res.text,"lxml")
    elems=soup.select('.trans-container')
    translation=elems[0].text
    translation1=word_format(translation)
    #分割解释的意思
    wordTranslation_list=translation1.split(";")
    #索取第一个翻译意思
    translation2=wordTranslation_list[0]
    return translation2
'''
Get_simple_translation(word)
Out[108]: '巨蟒'

'''

#批量翻译所有单词精简版本 def Get_all_simple_translation(words_list): for word in words_list: try: translation=Get_simple_translation(word) translation_list.append(translation) except: print("exception:",word) continue time.sleep(3) print("congradulation!")

'''

Get_all_simple_translation(words_list)
congradulation!

translation_list
Out[126]: ['盐酸二甲双胍', '阿莫西林', '克拉霉素', '万艾可', '西地那非']


'''

  

原文地址:https://www.cnblogs.com/webRobot/p/5407193.html