selenium爬取京东商品信息

selenium爬取京东商品信息

京东https://www.jd.com/

from selenium import webdriver
import time
# 模拟键盘输入
from selenium.webdriver.common.keys import Keys

bro = webdriver.Chrome(executable_path='./chromedriver.exe')
# 设置隐士等待
bro.implicitly_wait(10)


def get_goods_info(bro):
    # li_list=bro.find_element_by_class_name('gl-warp').find_elements_by_tag_name('li')
    # goods=bro.find_elements_by_class_name('gl-item')
    goods = bro.find_elements_by_css_selector('.gl-item')
    # print(len(goods))
    for good in goods:
        try:
            price = good.find_element_by_css_selector('.p-price i').text
            name = good.find_element_by_css_selector('.p-name em').text
            url = good.find_element_by_css_selector('.p-img a').get_attribute('href')
            commits = good.find_element_by_css_selector('.p-commit strong>a').text
            photo_url = good.find_element_by_css_selector('.p-img img').get_attribute('src')

            print('''
            商品名字:%s
            商品价格:%s
            商品地址:%s
            商品评论数:%s
            商品图片地址:%s

            ''' % (name, price, url, commits, photo_url))
        except Exception as e:
            continue

    next_button = bro.find_element_by_partial_link_text('下一页')
    time.sleep(1)
    next_button.click()

    get_goods_info(bro)


try:
    bro.get('https://www.jd.com/')

    input_k = bro.find_element_by_id('key')

    input_k.send_keys('奶牛')
    # 模拟键盘的回车键
    input_k.send_keys(Keys.ENTER)
    get_goods_info(bro)


except Exception as e:
    print(e)

finally:
    bro.close()

自动登录12306



from selenium import webdriver
import time
#pillow
from PIL import Image

# 引入超级鹰

from chaojiying import Chaojiying_Client


from selenium.webdriver import ActionChains
bro=webdriver.Chrome(executable_path='./chromedriver.exe')
bro.implicitly_wait(10)
try:
    bro.get('https://kyfw.12306.cn/otn/resources/login.html')
    bro.maximize_window()  # 窗口最大化,全屏
    button_z=bro.find_element_by_css_selector('.login-hd-account a')
    button_z.click()
    time.sleep(2)
    # 截取整个屏幕
    bro.save_screenshot('./main.png')
    # 验证码的位置和大小
    img_t=bro.find_element_by_id('J-loginImg')
    print(img_t.size)
    print(img_t.location)

    size=img_t.size
    location=img_t.location

    img_tu = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height']))
    # # 抠出验证码
    # #打开
    img = Image.open('./main.png')
    # 抠图
    fram = img.crop(img_tu)
    # 截出来的小图
    fram.save('code.png')

    # 调用超级鹰破解
    chaojiying = Chaojiying_Client('306334678', 'lqz12345', '903641')	#用户中心>>软件ID 生成一个替换 96001
    im = open('code.png', 'rb').read()													#本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
    # print(chaojiying.PostPic(im, 9004))

    ## 返回结果如果有多个 260,133|123,233,处理这种格式[[260,133],[123,233]]
    res=chaojiying.PostPic(im, 9004)
    print(res)
    result=res['pic_str']

    all_list = []
    if '|' in result:
        list_1 = result.split('|')
        count_1 = len(list_1)
        for i in range(count_1):
            xy_list = []
            x = int(list_1[i].split(',')[0])
            y = int(list_1[i].split(',')[1])
            xy_list.append(x)
            xy_list.append(y)
            all_list.append(xy_list)
    else:
        x = int(result.split(',')[0])
        y = int(result.split(',')[1])
        xy_list = []
        xy_list.append(x)
        xy_list.append(y)
        all_list.append(xy_list)
    print(all_list)
    # 用动作链,点击图片
    # [[260,133],[123,233]]
    for a in all_list:
        x = a[0]
        y = a[1]
        ActionChains(bro).move_to_element_with_offset(img_t, x, y).click().perform()
        time.sleep(1)

    username=bro.find_element_by_id('J-userName')
    username.send_keys('306334678')
    password=bro.find_element_by_id('J-password')
    password.send_keys('lqz12345')
    time.sleep(3)
    submit_login=bro.find_element_by_id('J-login')
    submit_login.click()
    time.sleep(3)

    print(bro.get_cookies())
    time.sleep(10)
    bro.get('https://www.12306.cn/index/')
    time.sleep(5)

except Exception as e:
    print(e)
finally:
    bro.close()

cookie池的搭建

# 如何搭建cookie池
# selenium写一套(一堆小号),跑起脚本,自动登录,手动参与
# 拿到cookie,放到redis中
# django搭建一个服务:127.0.0.0/get,随机返回一个cookie
# request发送请求爬数据(selenium拿到的cookie),cookie失效
原文地址:https://www.cnblogs.com/surpass123/p/13437256.html