爬取开心网

#注册一个开心网的账号,并且爬取主页内容
from urllib import request,parse
from http import cookiejar
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
#request.urlopen()#并不能够保存cookie

#(一)定义请求管理器
http_handler = request.HTTPHandler()
https_handler = request.HTTPSHandler()
cookie = cookiejar.CookieJar()
cookie_handler = request.HTTPCookieProcessor(cookie)
#生成一个请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)

#(二)登录
def login():
    #(1)
    login_url = 'https://security.kaixin001.com/login/login_post.php'
    loginemail = input('请输入用户名:')
    password = input("请输入密码:")
    data = {
        'loginemail':loginemail,
        'password':password
    }
    data = parse.urlencode(data)
    headers = {

    }
    req = request.Request(url=login_url,data=bytes(data,encoding='utf-8'),headers=headers)
    #(2)
    response = opener.open(req)

    #(3)
    html = response.read()

    #(4)
    html = html.decode('utf-8')
    print(html)

def homepage():
    base_url = 'http://www.kaixin001.com/home/?_profileuid=181673967&t=90'

    loginemail = input('请输入用户名:')
    password = input("请输入密码:")
    data = {
        'loginemail': loginemail,
        'password': password
    }
    data = parse.urlencode(data)
    headers = {

    }
    req = request.Request(url=base_url,data=bytes(data,encoding='utf-8'),headers=headers)
    #(2)
    response = opener.open(req)

    #(3)
    html = response.read()

    #(4)
    html = html.decode('utf-8')
    print(html)
if __name__ == '__main__':
    login()
    homepage()
原文地址:https://www.cnblogs.com/zhangboblogs/p/8542151.html