Selenium3+python3自动化(四十三)--爬取我的博客园粉丝的名称,并写入.text文件

 爬取目标

1.爬取目标网站:我的博客:https://home.cnblogs.com/u/canglongdao/followers/

爬取内容:爬取我的博客的所有粉丝的名称,并保存到txt

3.由于博客园的登录时需要人机验证的,所以无法直接使用账号自动登录

4.可以先使用selenium代码,在需要输入验证码处,停留几秒,手动验证

5.获取登录成功的cookies,并复制保持登录相关的cookie(字典格式)

代码如下:

# coding:utf-8
from selenium import webdriver
import time
driver=webdriver.Chrome()
driver.get("https://account.cnblogs.com/signin")
nlc=driver.get_cookies()
print(len(nlc),nlc)
driver.find_element_by_id("mat-input-0").send_keys("xxx@qq.com")
driver.find_element_by_id("mat-input-1").send_keys("P@ssw0rd")
driver.find_element_by_xpath("//span[@class='mat-button-wrapper']").click()
time.sleep(6)
lc=driver.get_cookies()
print(len(lc),lc)

 运行结果:

5 [{'domain': 'account.cnblogs.com', 'expiry': 1599210767, 'httpOnly': False, 'name': '4271c12252a544478175bac9772afc3d', 'path': '/', 'secure': False, 'value': '010720fb-f7e4-4f4b-b206-0e991ecf6f5b'}, {'domain': 'account.cnblogs.com', 'httpOnly': False, 'name': 'SERVERID', 'path': '/', 'secure': False, 'value': 'daace45bf36fef87f4742d8b633fdae3|1599208967|1599208966'}, {'domain': 'account.cnblogs.com', 'httpOnly': True, 'name': '.Cnblogs.Account.Session', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF%2BQEQcGP%2Bk0zUGAelDZwKkZE07wn7bgYbw56biK9%2FwoxKcs%2FmFFb%2B21xAjYxIXXQJeai7NvLoyDfgSr45CxhE9nwRKokI1nqtUdlD5wk2MHtHUO4kIFOTpe9gzKU%2F%2BDs%2B65eSMPAU62bfOS86QdUoNXH5qL'}, {'domain': 'account.cnblogs.com', 'httpOnly': False, 'name': 'XSRF-TOKEN', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcyb192CHuZwpo_t9r1Ps07m_GVYNh15x2atqF3hGcynCnlxxqVFCWmUT5OqBV0zfYfYC3BjZ-7WUDux6AI1xLaMad3ETT6_MyakbxByaS76Nim_y5-i1_oX0aBl2U91xs'}, {'domain': 'account.cnblogs.com', 'httpOnly': True, 'name': '.Cnblogs.Account.Antiforgery', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQek9XiBoWIQkti8vvTbpqx-CFIWKb39vrCeVudMwHbcPXBWb8LBrlnlM0JzKwWlUlgaD5ioMqre_sd1nEFtrTGhAMmUsVWYxYta1gs4DkuYVinqEL6omAaSnZIJhoxLfp8'}]
5 [{'domain': '.cnblogs.com', 'expiry': 1599295374, 'httpOnly': False, 'name': '_gid', 'path': '/', 'secure': False, 'value': 'GA1.2.683535015.1599208975'}, {'domain': '.cnblogs.com', 'expiry': 1662280974, 'httpOnly': False, 'name': '_ga', 'path': '/', 'secure': False, 'value': 'GA1.2.1985506889.1599208975'}, {'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.CNBlogsCookie', 'path': '/', 'secure': False, 'value': 'EB2AB3A42B8CEE723552C8644CAD13CEFA311FA3955FDB7A25A33EEB87199C843967F5791CB012543FD9AC374F535F23C228D4AC5E0373CAA6855768E5713BDF88D82BB97C38A668CDDEB72E0D5055467339189E'}, {'domain': '.cnblogs.com', 'expiry': 1599209034, 'httpOnly': False, 'name': '_gat', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.Cnblogs.AspNetCore.Cookies', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcWJIxPwl8mHGJMd1DItl4C_m7X5ixG1-4yGpDWsiv3n9Iung2Yxk7eaHqXJ1rAGWYXeQF3OyXzSXfYHkPc7A7RPLekmvNk0dNucu8ssSF7ldaY1Nqsnx-q9O3U6JpZ_GCz8ed5jwuq1g8V_StxqpEq2ell4jFdrMmgA1GQudbiFYE1aPVcf1Rs5U7xUJ6UjMJijwG3_OAfQJ9DSibuDqYuhvaS0wwbR6OUfQIBI6NFDdwXz5GL0wJZ82wmPjkKKrrX3ADNm1jsdJxb9fceZC2CfDC2aqe-XotiNwzbsA2vhkDpB5m3JOLYA_P7mWfSexjGKs6ii9E2fNjgYgqZA8TG-1CqvApZjzkCgWklntSP71W5Xrc8zSNkRPiSuoMEKtzVecH65t9utYA2ZneK-mVParwkydH3_hcx1l03CYj6p7HP33S5MsWtvDagWN3waRPfRtdUx2KTDUTKl0Rpt-Gb1cL8RWSctfQxrg8gGKmWYwGqoPhLcDmtPc7D1C6EmZaxp61YODRup2mIzFdRdCvoU8F3Ll9Tsgb8ja7gHra03g'}]

 添加登录的cookie,并获取粉丝名称

# coding:utf-8
from selenium import webdriver
import time
driver=webdriver.Chrome()

#手动从登陆后,获取到的cookie中,复制如下内容,赋值给c1,c2
c1={'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.CNBlogsCookie', 'path': '/', 'secure': False, 'value': 'EB2AB3A42B8CEE723552C8644CAD13CEFA311FA3955FDB7A25A33EEB87199C843967F5791CB012543FD9AC374F535F23C228D4AC5E0373CAA6855768E5713BDF88D82BB97C38A668CDDEB72E0D5055467339189E'}
c2={'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.Cnblogs.AspNetCore.Cookies', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcWJIxPwl8mHGJMd1DItl4C_m7X5ixG1-4yGpDWsiv3n9Iung2Yxk7eaHqXJ1rAGWYXeQF3OyXzSXfYHkPc7A7RPLekmvNk0dNucu8ssSF7ldaY1Nqsnx-q9O3U6JpZ_GCz8ed5jwuq1g8V_StxqpEq2ell4jFdrMmgA1GQudbiFYE1aPVcf1Rs5U7xUJ6UjMJijwG3_OAfQJ9DSibuDqYuhvaS0wwbR6OUfQIBI6NFDdwXz5GL0wJZ82wmPjkKKrrX3ADNm1jsdJxb9fceZC2CfDC2aqe-XotiNwzbsA2vhkDpB5m3JOLYA_P7mWfSexjGKs6ii9E2fNjgYgqZA8TG-1CqvApZjzkCgWklntSP71W5Xrc8zSNkRPiSuoMEKtzVecH65t9utYA2ZneK-mVParwkydH3_hcx1l03CYj6p7HP33S5MsWtvDagWN3waRPfRtdUx2KTDUTKl0Rpt-Gb1cL8RWSctfQxrg8gGKmWYwGqoPhLcDmtPc7D1C6EmZaxp61YODRup2mIzFdRdCvoU8F3Ll9Tsgb8ja7gHra03g'}
driver.get("https://account.cnblogs.com/signin")
driver.add_cookie(c1)
driver.add_cookie(c2)
time.sleep(3)
driver.get("https://home.cnblogs.com/u/canglongdao/followers/")
f=driver.find_elements_by_xpath("//div[@class='avatar_list']/ul/li/a")
result=[]
for i in f:
    name=i.get_attribute("title")
    print(name)
    result.append(name)
print(result)

 运行结果:

['偏爱也例外', '', '岑欢', '', 'NiuBiBoy!', '', '知识在于点滴的积累', '', '浅唱蛰伏', '', 'linofficer', '', '龙骑士大哥', '', '给明天的自己', '', '小熊软糖', '']

 将粉丝名称写入.txt文档

# rs=['偏爱也例外', '', '岑欢', '', 'NiuBiBoy!', '', '知识在于点滴的积累', '', '浅唱蛰伏', '', 'linofficer', '', '龙骑士大哥', '', '给明天的自己', '', '小熊软糖', '']
# print(rs[::2])
for i in result[::2]:
    with open("a.txt","a") as f: #追加#无a.txt,则新建;a.txt存在,则追加
        f.write(i+"
")

 

 

参考代码:

# coding:utf-8
from selenium import webdriver
import time
driver=webdriver.Chrome()
# driver.get("https://account.cnblogs.com/signin")
# nlc=driver.get_cookies()
# print(len(nlc),nlc)
# driver.find_element_by_id("mat-input-0").send_keys("xxx@qq.com")
# driver.find_element_by_id("mat-input-1").send_keys("P@ssw0rd")
# driver.find_element_by_xpath("//span[@class='mat-button-wrapper']").click()
# time.sleep(6)
# lc=driver.get_cookies()
# print(len(lc),lc)
# 5 [{'domain': 'account.cnblogs.com', 'expiry': 1599210767, 'httpOnly': False, 'name': '4271c12252a544478175bac9772afc3d', 'path': '/', 'secure': False, 'value': '010720fb-f7e4-4f4b-b206-0e991ecf6f5b'}, {'domain': 'account.cnblogs.com', 'httpOnly': False, 'name': 'SERVERID', 'path': '/', 'secure': False, 'value': 'daace45bf36fef87f4742d8b633fdae3|1599208967|1599208966'}, {'domain': 'account.cnblogs.com', 'httpOnly': True, 'name': '.Cnblogs.Account.Session', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF%2BQEQcGP%2Bk0zUGAelDZwKkZE07wn7bgYbw56biK9%2FwoxKcs%2FmFFb%2B21xAjYxIXXQJeai7NvLoyDfgSr45CxhE9nwRKokI1nqtUdlD5wk2MHtHUO4kIFOTpe9gzKU%2F%2BDs%2B65eSMPAU62bfOS86QdUoNXH5qL'}, {'domain': 'account.cnblogs.com', 'httpOnly': False, 'name': 'XSRF-TOKEN', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcyb192CHuZwpo_t9r1Ps07m_GVYNh15x2atqF3hGcynCnlxxqVFCWmUT5OqBV0zfYfYC3BjZ-7WUDux6AI1xLaMad3ETT6_MyakbxByaS76Nim_y5-i1_oX0aBl2U91xs'}, {'domain': 'account.cnblogs.com', 'httpOnly': True, 'name': '.Cnblogs.Account.Antiforgery', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQek9XiBoWIQkti8vvTbpqx-CFIWKb39vrCeVudMwHbcPXBWb8LBrlnlM0JzKwWlUlgaD5ioMqre_sd1nEFtrTGhAMmUsVWYxYta1gs4DkuYVinqEL6omAaSnZIJhoxLfp8'}]
# 5 [{'domain': '.cnblogs.com', 'expiry': 1599295374, 'httpOnly': False, 'name': '_gid', 'path': '/', 'secure': False, 'value': 'GA1.2.683535015.1599208975'}, {'domain': '.cnblogs.com', 'expiry': 1662280974, 'httpOnly': False, 'name': '_ga', 'path': '/', 'secure': False, 'value': 'GA1.2.1985506889.1599208975'}, {'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.CNBlogsCookie', 'path': '/', 'secure': False, 'value': 'EB2AB3A42B8CEE723552C8644CAD13CEFA311FA3955FDB7A25A33EEB87199C843967F5791CB012543FD9AC374F535F23C228D4AC5E0373CAA6855768E5713BDF88D82BB97C38A668CDDEB72E0D5055467339189E'}, {'domain': '.cnblogs.com', 'expiry': 1599209034, 'httpOnly': False, 'name': '_gat', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.Cnblogs.AspNetCore.Cookies', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcWJIxPwl8mHGJMd1DItl4C_m7X5ixG1-4yGpDWsiv3n9Iung2Yxk7eaHqXJ1rAGWYXeQF3OyXzSXfYHkPc7A7RPLekmvNk0dNucu8ssSF7ldaY1Nqsnx-q9O3U6JpZ_GCz8ed5jwuq1g8V_StxqpEq2ell4jFdrMmgA1GQudbiFYE1aPVcf1Rs5U7xUJ6UjMJijwG3_OAfQJ9DSibuDqYuhvaS0wwbR6OUfQIBI6NFDdwXz5GL0wJZ82wmPjkKKrrX3ADNm1jsdJxb9fceZC2CfDC2aqe-XotiNwzbsA2vhkDpB5m3JOLYA_P7mWfSexjGKs6ii9E2fNjgYgqZA8TG-1CqvApZjzkCgWklntSP71W5Xrc8zSNkRPiSuoMEKtzVecH65t9utYA2ZneK-mVParwkydH3_hcx1l03CYj6p7HP33S5MsWtvDagWN3waRPfRtdUx2KTDUTKl0Rpt-Gb1cL8RWSctfQxrg8gGKmWYwGqoPhLcDmtPc7D1C6EmZaxp61YODRup2mIzFdRdCvoU8F3Ll9Tsgb8ja7gHra03g'}]
# #
#手动从登陆后,获取到的cookie中,复制如下内容,赋值给c1,c2
c1={'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.CNBlogsCookie', 'path': '/', 'secure': False, 'value': 'EB2AB3A42B8CEE723552C8644CAD13CEFA311FA3955FDB7A25A33EEB87199C843967F5791CB012543FD9AC374F535F23C228D4AC5E0373CAA6855768E5713BDF88D82BB97C38A668CDDEB72E0D5055467339189E'}
c2={'domain': '.cnblogs.com', 'expiry': 1600504974, 'httpOnly': True, 'name': '.Cnblogs.AspNetCore.Cookies', 'path': '/', 'secure': False, 'value': 'CfDJ8K5MrGQfPjpFvRyctF-QEQcWJIxPwl8mHGJMd1DItl4C_m7X5ixG1-4yGpDWsiv3n9Iung2Yxk7eaHqXJ1rAGWYXeQF3OyXzSXfYHkPc7A7RPLekmvNk0dNucu8ssSF7ldaY1Nqsnx-q9O3U6JpZ_GCz8ed5jwuq1g8V_StxqpEq2ell4jFdrMmgA1GQudbiFYE1aPVcf1Rs5U7xUJ6UjMJijwG3_OAfQJ9DSibuDqYuhvaS0wwbR6OUfQIBI6NFDdwXz5GL0wJZ82wmPjkKKrrX3ADNm1jsdJxb9fceZC2CfDC2aqe-XotiNwzbsA2vhkDpB5m3JOLYA_P7mWfSexjGKs6ii9E2fNjgYgqZA8TG-1CqvApZjzkCgWklntSP71W5Xrc8zSNkRPiSuoMEKtzVecH65t9utYA2ZneK-mVParwkydH3_hcx1l03CYj6p7HP33S5MsWtvDagWN3waRPfRtdUx2KTDUTKl0Rpt-Gb1cL8RWSctfQxrg8gGKmWYwGqoPhLcDmtPc7D1C6EmZaxp61YODRup2mIzFdRdCvoU8F3Ll9Tsgb8ja7gHra03g'}
driver.get("https://account.cnblogs.com/signin")
driver.add_cookie(c1)
driver.add_cookie(c2)
time.sleep(3)
driver.get("https://home.cnblogs.com/u/canglongdao/followers/")
f=driver.find_elements_by_xpath("//div[@class='avatar_list']/ul/li/a")
result=[]
for i in f:
    name=i.get_attribute("title")
    print(name)
    result.append(name)
print(result)

# rs=['偏爱也例外', '', '岑欢', '', 'NiuBiBoy!', '', '知识在于点滴的积累', '', '浅唱蛰伏', '', 'linofficer', '', '龙骑士大哥', '', '给明天的自己', '', '小熊软糖', '']
# print(rs[::2])
for i in result[::2]:
    with open("a.txt","a") as f:
        f.write(i+"
")

 运行结果:

偏爱也例外

岑欢

NiuBiBoy!

知识在于点滴的积累

浅唱蛰伏

linofficer

龙骑士大哥

给明天的自己

小熊软糖

['偏爱也例外', '', '岑欢', '', 'NiuBiBoy!', '', '知识在于点滴的积累', '', '浅唱蛰伏', '', 'linofficer', '', '龙骑士大哥', '', '给明天的自己', '', '小熊软糖', '']

  

越努力,越幸运!!! good good study,day day up!!!
原文地址:https://www.cnblogs.com/canglongdao/p/13614415.html