关于Python selenium实现类似比价软件的功能

偶然间想实现比价的功能,正常requests途径比较难实现,于是乎想到可以selenium可以简易实现,下面是代码。

import requests
from selenium import webdriver
import json
import time
import threading

import matplotlib.pyplot as plt
import numpy as np


s = str(raw_input('请输入要比价内容:'))
print s
list = []

#京东价格,主要是selenium模拟,定位搜索框然后输入比价内容。其中还有error的处理。
def jd(s): driver = webdriver.Chrome() driver.get('https://www.jd.com/') driver.find_element_by_id('key').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button').click() time.sleep(2) try: print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[3]/strong/i').text print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[7]/span/a').text list.append(driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[3]/strong/i').text) except: print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[2]/strong/i').text print "京东价格:",driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[5]/span/a').text list.append(driver.find_element_by_xpath('//*[@id="J_goodsList"]/ul/li[1]/div/div[2]/strong/i').text) finally: driver.quit()
#淘宝价格,跟京东类似
def taobao(s): driver = webdriver.Chrome() driver.get('https://www.taobao.com/') driver.find_element_by_id('q').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click() time.sleep(2) try: print "淘宝价格:",driver.find_element_by_xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div[1]/div[2]/div[1]/div[1]/strong').text list.append(driver.find_element_by_xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div[1]/div[2]/div[1]/div[1]/strong').text) except: print "淘宝价格:",driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[2]/p/a').text print "淘宝价格:",driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[3]/div[1]/span/strong').text list.append(driver.find_element_by_xpath('//*[@id="J_itemlistListItem0"]/div[3]/div[1]/span/strong').text) finally: driver.quit() #亚马逊价格 def amz(s): driver = webdriver.Chrome() driver.get('https://www.amazon.cn/') driver.find_element_by_id('twotabsearchtextbox').send_keys(s.decode('gbk')) driver.find_element_by_xpath('//*[@id="nav-search"]/form/div[2]/div/input').click() time.sleep(2) print "亚马逊价格:",driver.find_element_by_xpath('//*[@id="result_0"]/div/div[4]/div[1]/a/span[2]').text list.append(driver.find_element_by_xpath('//*[@id="result_0"]/div/div[4]/div[1]/a/span[2]').text.replace(',','')[1:]) driver.quit()

#用到多线程处理。 threads
= [] t1 = threading.Thread(target=jd,args=(s,)) threads.append(t1) t2 = threading.Thread(target=taobao,args=(s,)) threads.append(t2) t3 = threading.Thread(target=amz,args=(s,)) threads.append(t3) for i in range(len(threads)): threads[i].start() for i in range(len(threads)): threads[i].join() print list

#图像展示 list
= [int(i.split('.')[0]) for i in list] index=np.arange(3) plt.bar(left=index,height=list,width=0.5) plt.xticks((0,1,2),('Jd','Taobao','Amazon')) plt.show()

效果图:

简单的功能实现了,不足之处(有待改善):

1、等待时间太长,体验不佳,即使多线程也要18s左右才能返回结果,太慢。

2、开3个webdriver,占用资源有点多,也导致比较慢。

3、错误处理未完善。

原文地址:https://www.cnblogs.com/vhills/p/7568200.html