


# -*- coding: utf-8 -*-from selenium import webdriver
import time
import requests
import random
import os
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import traceback
import urllib.request
import pymysql
import socket
#import win32api #pip install pypiwin32
#from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#DesiredCapabilities.INTERNETEXPLORER['ignoreProtectedModeSettings'] = True
#rasdial 宽带连接 19ab68----643534
def connect():
    cmd_str = "rasdial %s %s %s" % (g_adsl_account['name'], g_adsl_account['username'], g_adsl_account['password'])
#"rasdial 断开宽带连接 /disconnect"
def disconnect():
    cmd_str = "rasdial %s /disconnect" % g_adsl_account['name']
def get_ip():
    #return ['ip','address']
    fp = urllib.request.urlopen("http://ip.chinaz.com/getip.aspx")
    mybytes = fp.read()
    # note that Python3 does not read the html code as string
    # but as html code bytearray, convert to string with
    mystr = mybytes.decode("utf8")
    ip = mystr.find("ip")
    add = mystr.find("address")
    ip = mystr[ip+4:add-2]
    address = mystr[add+9:-2]
    return [ip,address]
def insert_db(ipdate):
        cur=conn.cursor()                              #获取一个游标对象
        #cur.execute("CREATE DATABASE zongzong")          #执行对应的SQL语句
        cur.execute("USE zongzong")
        #cur.execute("CREATE TABLE `ip_log` (`id` int(11) NOT NULL AUTO_INCREMENT,`ip` varchar(32) DEFAULT NULL,`address` varchar(64) DEFAULT NULL,`keyword` varchar(64) DEFAULT '',`url` varchar(256) DEFAULT '',`error` varchar(64) DEFAULT '',`created_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8;")
        ISOTIMEFORMAT='%Y-%m-%d %X'
        ipdate.append( time.strftime( ISOTIMEFORMAT, time.localtime() ))
        cur.execute("INSERT INTO ip_log(ip,address,keyword,url,error,page,rank,created_at) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)",ipdate)
        #cur.execute("SELECT * FROM ip_log")
    #   print("发生异常")   
def get_search_url(driver):
    urls = []
    real = []
    real_url = []
    click_link = []
    content = driver.find_element_by_css_selector("div[id="content_left"]")
    links = content.find_elements_by_tag_name("a")
    for link in links:
        if link.get_attribute('class') == "c-showurl":
            url = link.get_attribute('href')
            header = requests.head(url).headers
            is_append = True
            for out_url in out_urls:
                if out_url in header['location']:
                    is_append = False
            if is_append == True:
    #return urls
    return [real_url,click_link]
# def get_real_url(urls):
    # real_url = []
    # for url in urls:
        # header = requests.head(url).headers
        # is_append = True
        # for out_url in out_urls:
            # if out_url in header['location']:
                # is_append = False
                # break
        # if is_append == True:
            # real_url.append(header['location'])
    # return real_url
#function 目标地址是否在某个list中
def get_urlIndex(tagurl,urls):
    i = 0
    has = -1
    for url in urls:
        if tagurl in url:
            has = True
            return i
        i = i+1
    return has 
def click_nextBtn(driver):
    div = driver.find_element_by_css_selector("div[id="page"]")
    a = div.find_elements_by_tag_name("a")
    for item in a:
        if item.text == "下一页>":
    return driver
def click_search_url(driver,items):
    urls = []
    real = []
    content = driver.find_element_by_css_selector("div[id="content_left"]")
    links = content.find_elements_by_tag_name("a")
    nowhandle = driver.current_window_handle
    #for handle in allhandles:
    #   print('....当前窗口....',handle.title)
    for link in links:
        if link.get_attribute('class') == "c-showurl":
            if i in items:
                #for handle in allhandles:
                #   print('....当前窗口....',handle.title)
                for handle in allhandles:
                    if handle != nowhandle:
def get_random_index(index,len):
    if index >= 8:
        random_index = [
    elif index>=4:
        random_index = [
    elif index>=0:
        random_index = [
    elif index == -1:
        if len <=5:
            random_index = [
            random_index = [
    return random_index
def getUA():
    uaList = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
        #"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
        #"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
        "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
        #"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
        #"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
        #"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/",
        #"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"
    headers = random.choice(uaList)
    return headers
def getWindowSize():
    wind_size = [
    headers = random.choice(wind_size)
    return headers
def setDisplay():
    display_size = [
    d_size = random.choice(display_size)
    dm = win32api.EnumDisplaySettings(None, 0)
    dm.PelsWidth = d_size[0]
    dm.PelsHeight = d_size[1]
    dm.BitsPerPel = 32
    dm.DisplayFixedOutput = 0
    win32api.ChangeDisplaySettings(dm, 0)
#拨号 19ab68----643534        
g_adsl_account = {
out_urls = [
targetURL = [
for targetInfo in targetURL:
        #driver = webdriver.Ie()
        #driver = webdriver.Chrome()
        #driver = webdriver.Firefox()
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        user_agent = getUA()
        dcap["phantomjs.page.settings.userAgent"] = (
        #dcap["phantomjs.page.settings.resourceTimeout"] = (15000)
        dcap["phantomjs.page.settings.loadImages"] = (False)
        driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--load-images=no'])
        # UA = getUA()
        # print(UA)
        # webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.User-Agent'] = UA
        # driver = webdriver.PhantomJS()
        #driver.maximize_window() # 浏览器全屏显示
        window_size = getWindowSize()
        driver.set_window_size(window_size[0], window_size[1])
        target = targetInfo[0]
        keyword = targetInfo[1]
        if len(targetInfo)>2:
                error_keyword = targetInfo[random.randint(2,len(targetInfo)-1)]       
        if len(targetInfo)>2:
        #获取搜索结果页 0:着陆页  1:对应的链接对象
        urls_res = get_search_url(driver)
        real_urls = urls_res[0]
        #real_urls = get_real_url(urls)
        index = get_urlIndex(target,real_urls)
        page = 1
        while index == -1 and page <= 4:
            if page == 1:
                items = get_random_index(index,len(real_urls))
                #items = [4]
            driver = click_nextBtn(driver)
            urls_res = get_search_url(driver)
            real_urls = urls_res[0]
            #real_urls = get_real_url(urls)
            index = get_urlIndex(target,real_urls)
            page = page+1
        if index > 4 and page == 1:
            int = random.randint(1,2)
            if int == 2:
                items = get_random_index(index,len(real_urls))
                items = [1]
        if page >=5:
            data = get_ip()
        nowhandle = driver.current_window_handle
        allhandles = driver.window_handles
        for handle in allhandles:
            if handle != nowhandle:
                stime = random.randint(15,25)
                #stime = 5;
        cookie= driver.get_cookies()
        cookie= driver.get_cookies()
        data = get_ip()
        data = get_ip()

