selenium登录csdn,urllib抓取数据

   selenium登录csdn,urllib抓取数据:

import selenium
import requests
import selenium.webdriver
import selenium.webdriver.common.keys
import time
#需要手动滑动验证码
driver = selenium.webdriver.Chrome()
driver.get("https://passport.csdn.net/login?code=public")
login=driver.find_element_by_link_text("账号密码登录")
login.click()
time.sleep(5)
username=driver.find_element_by_id("all")
username.send_keys("用户名")
time.sleep(3)
password=driver.find_element_by_id("password-number")
password.send_keys("密码")
time.sleep(5)
logins=driver.find_element_by_xpath("//*[@id="app"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button")
time.sleep(10) #手动滑动验证码
logins.click()
print(driver.page_source)
time.sleep(15)  #等待cookie加载

print("开始会话")
req=requests.session()  #会话   打开一个网页,直到关闭浏览器之前 都是会话
cookies=driver.get_cookies()  #抓取全部的cookie
for  cookie  in cookies:
    req.cookies.set(cookie['name'],cookie["value"])
req.headers.clear()#清空头
newpage=req.get("http://my.csdn.net/")
print("会话完成")
print(newpage.text)  #页面
time.sleep(10)
driver.close()

 urllib保存cookie:

#coding:utf-8
import selenium
import selenium.webdriver
import time
import lxml
import lxml.etree
import requests
import urllib.request

'''


driver = selenium.webdriver.Chrome()
driver.get("https://passport.csdn.net/account/login?")
time.sleep(3)


user=driver.find_element_by_id("username")
password=driver.find_element_by_id("password")
submit=driver.find_element_by_class_name("logging")
user.clear()
password.clear()
time.sleep(1)
user.send_keys("yincheng01@163.com")
password.send_keys("yinchengak47.net")
time.sleep(1)
submit.click()
time.sleep(10) #等待页面加载,
cookies=driver.get_cookies()#抓取全部的cookie
print cookies
print "------------------------"
driver.close()

'''


print("开始会话")
headers={                  #
"Host": "my.csdn.net",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"DNT": "1",
"Referer": "http://www.csdn.net/",
#"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.8",
    u"cookie":u"uuid_tt_dd=-1734079490838081701_20171010; bdshare_firstime=1507966544895; UserName=yinghuming; UserInfo=LZTCl6p9mr%2BUgX1cEEgqwIand1mBReKkuogvIYHivh6MdgAq8c4Y4%2Fmx1uhFT%2FmWFuTu%2BCna36D%2BZ7ssW7xuzAjlIwc7Vgjd7Y7zTDJqy%2FakzOPFEGR52GRrp8sf0i9NK7p2hdvM39vRq5Y7NLJObQ%3D%3D; UserNick=%E8%8B%B1%E9%9B%84%E6%97%A0%E6%95%8C2017; AU=821; UD=%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80; UN=yincheng0571; UE="yincheng01@163.com"; BT=1508039179648; access-token=8260e0b7-a35c-419d-b4af-1f02d51c677d; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1507965242,1507969974,1508038063,1508039035; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508039041; dc_tos=oxuidd; dc_session_id=1508039034960_0.6956040327941211" }
request=urllib.request.Request("http://my.csdn.net/",headers=headers)
response=urllib.request.urlopen(request)
newpagetext=response.read()
file=open("csdn.txt","wb")
file.write(newpagetext)
file.close()
print(newpagetext)
print("会话完成")

time.sleep(10)





time.sleep(10)
#driver.close()

selenium+urllib 模拟登录 抓取数据:

import selenium
import selenium.webdriver
import selenium.webdriver.common.keys
import urllib.request
import time
#需要手动滑动验证码
driver = selenium.webdriver.Chrome()
driver.get("https://passport.csdn.net/login?code=public")
login=driver.find_element_by_link_text("账号密码登录")
login.click()
time.sleep(5)
username=driver.find_element_by_id("all")
username.send_keys("用户名")
time.sleep(3)
password=driver.find_element_by_id("password-number")
password.send_keys("密码")
time.sleep(5)
logins=driver.find_element_by_xpath("//*[@id="app"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button")
time.sleep(10) #手动滑动验证码
logins.click()
print(driver.page_source)
time.sleep(15)  #等待cookie加载
cookies=driver.get_cookies()  #抓取全部的cookie
print(cookies)
cookiestr=""
for  cookie  in cookies:   #每一条cookie信息
    print(cookie['name'],cookie["value"])   #一般用于登录的信息都在name和value里
    cookiestr += (str(cookie["name"]) + "=" + str(cookie["value"]) + ";")
print("------------------------")

#cookies
print("开始会话")
headers={
"Host": "my.csdn.net",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"DNT": "1",
"Referer": "http://www.csdn.net/",
#"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.8",
    u"cookie": cookiestr   # u为中文转义
    }
request=urllib.request.Request("http://my.csdn.net/",headers=headers)
response=urllib.request.urlopen(request)
newpagetext=response.read()
file=open("csdn.txt","wb")
file.write(newpagetext)
file.close()
print(newpagetext)
print("会话完成")

time.sleep(10)
driver.close()
原文地址:https://www.cnblogs.com/my-global/p/12484149.html