爬取深圳交易所

#encoding:utf8
import re
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import time
dr = webdriver.PhantomJS()
dr.maximize_window()
dr.get('http://www.szse.cn/main/marketdata/jypz/colist/')
num = ['2','3','4','6']
for k in num:
WebDriverWait(dr,10).until(lambda the_driver:
the_driver.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').is_displayed())
dr.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').click()
time.sleep(2)
thepage = dr.page_source
demo1 = re.compile('<td.*?>当前第.*?页 共(.*?)页</td>',re.S)
list1 = demo1.findall(thepage)
for j in range(1,int(list1[0])):
for i in range(2,12):
WebDriverWait(dr, 10).until(lambda the_driver:
the_driver.find_element_by_xpath(
'//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').is_displayed())
dr.find_element_by_xpath('//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').click()
WebDriverWait(dr, 10).until(lambda the_driver:
the_driver.find_element_by_xpath(
'//*[@id="1743_detail_smetab1"]/tbody/tr[1]/td[2]').is_displayed())
yuan = dr.page_source
demo = re.compile('<table.*?><tbody>.*?<tr><td.*?>公司名称</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?><a.*?>(.*?)</a></td></tr><tr></tr>.*?</tbody></table>',re.S)
list = demo.findall(yuan)
print(list)
WebDriverWait(dr, 10).until(lambda the_driver:
the_driver.find_element_by_xpath(
'//*[@id="REPORT_ID_1743_detail_sme"]/div/input').is_displayed())
dr.find_element_by_xpath('//*[@id="REPORT_ID_1743_detail_sme"]/div/input').click()
WebDriverWait(dr, 10).until(lambda the_driver:
the_driver.find_element_by_css_selector('.cls-navigate-next').is_displayed())
dr.find_element_by_css_selector('.cls-navigate-next').click()
time.sleep(2)

原文地址:https://www.cnblogs.com/zhisy/p/6880247.html