Python爬虫-爬取开心网主页(有登录界面-利用cookie)

爬取开心网主页内容

==========================================

=======================================

===================================

 1 '''
 2 登录开心网
 3 利用cookie
 4 免除ssl
 5 '''
 6 from urllib import request, parse
 7 import ssl
 8 '''sd
 9 步骤：
10 1， 寻找登录入口， 通过搜查相应文字可以快速定位
11   login_url = "https://security.kaixin001.com/login/login_post.php"
12   相应的用户名和密码对应名称为email, password
13 2. 构造opener
14 3. 构造login函数
15 '''
16 
17 import ssl
18 # 忽略安全问题
19 ssl._create_default_https_context = ssl._create_unverified_context
20 
21 from http import cookiejar
22 
23 cookie = cookiejar.CookieJar()
24 cookie_handler = request.HTTPCookieProcessor(cookie)
25 http_handler = request.HTTPHandler()
26 https_handler = request.HTTPSHandler()
27 
28 opener = request.build_opener(http_handler, https_handler, cookie_handler)
29 
30 
31 
32 def login():
33 
34     login_url = "https://security.kaixin001.com/login/login_post.php"
35 
36     data = {
37         "email":"13119144223",
38         "password": "123456"
39     }
40 
41 
42     # 对post的data内容进行编码
43     data = parse.urlencode(data)
44 
45     # http协议的请求头
46     headers = {
47         "Content-Length": len(data),
48         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36"
49     }
50 
51     # 构造请求Request对象
52     # data要求是一个bytes对象，所以需要进行编码
53     req = request.Request(login_url, data=data.encode(), headers=headers)
54 
55     rsp = opener.open(req)
56 
57     html = rsp.read()
58     html = html.decode()
59 
60 def getHomePage():
61     base_url = "http://www.kaixin001.com/home/?_profileuid=181697221"
62 
63     rsp = opener.open(base_url)
64     html = rsp.read()
65     html = html.decode()
66 
67     print(html)
68 
69 if __name__ == '__main__':
70     login()
71     getHomePage()