爬虫-模拟登录(13)

模拟登录的代码实现:

#socket服务端
import socket
import json
import threading
server = socket.socket()
#绑定到0.0.0.0:8000端口上
server.bind(('0.0.0.0', 8002))
server.listen()

#服务在用户登录成功之后,给用户返回一段字符串sessionid(够复杂,生成算法别人伪造不了)
user_info = {
    "sessionid":"bobby"
}
#浏览器每一次请求(所有的url)都自动带上这个sessionid
#1.如何告知浏览器这个sessionid
#2.如何确保浏览器每一次请求都带上这个sessionid

#session和cookie的区别
#1. session是由服务器维护的,并由服务器解释,通过set-cookie交给浏览器
#2. cookie是浏览器的工具,并在后续的每一次请求中都带上这些值


def handle_sock(sock, addr):
    while True:
        # recv方法是阻塞的
        tmp_data = sock.recv(1024)
        print(tmp_data.decode("utf8"))
        response_template = '''HTTP/1.0 200 OK  
Content-type: text/html  
Set-Cookie: name=bobby
Set-Cookie: course_id=78
Set-Cookie: sessionid=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT

{}

'''
        data = [
            {
                "name":"django打造在线教育",
                "teacher":"bobby",
                "url":"https://coding.imooc.com/class/78.html"
            },
            {
                "name": "python高级编程",
                "teacher": "bobby",
                "url": "https://coding.imooc.com/class/200.html"
            },
            {
                "name": "scrapy分布式爬虫",
                "teacher": "bobby",
                "url": "https://coding.imooc.com/class/92.html"
            },
            {
                "name": "django rest framework打造生鲜电商",
                "teacher": "bobby",
                "url": "https://coding.imooc.com/class/131.html"
            },
            {
                "name": "tornado从入门到精通",
                "teacher": "bobby",
                "url": "https://coding.imooc.com/class/290.html"
            },
        ]
        sock.send(response_template.format(json.dumps(data)).encode("utf8"))
        sock.close()
        break

#获取客户端连接并启动线程去处理
while True:
    # 阻塞等待连接
    sock, addr = server.accept()

    #启动一个线程去处理新的用户连接
    client_thread = threading.Thread(target=handle_sock, args=(sock, addr))
    client_thread.start()

requests+session模拟登录豆瓣:

import json
import pickle

import requests


def login():
    session = requests.session()
    username = "18782902568"
    password = "admin123"
    url = "https://accounts.douban.com/j/mobile/login/basic"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    }

    post_data = {
        "ck": "",
        "name": username,
        "password": password,
        "remember": "true",
        "ticket": ""
    }

    res = session.post(url, data=post_data, headers=headers)
    res_json = json.loads(res.text)
    if res_json["status"] == "success":
        print("登录成功")
        with open("douban.cookie", "wb") as f:
            pickle.dump(res.cookies, f)
    else:
        print("登录失败")

    with open("douban.cookie", "rb") as f:
        cookies = pickle.load(f)
        html = requests.get("https://www.douban.com/", cookies=cookies).text
        if "bobby_liyao" in html:
            print("已经登录")
        else:
            print("未登录")


if __name__ == "__main__":
    login()

注意:

1】使用requests.session()而不是requests。是为了使得登录的cookie能够实现共享。

2】使用pickle,是为了存储与获取序列化数据更加的便捷。

3】当然数据在无需存储到文件里面,可以直接使用res.cookies或者序列化为字典res.cookie.get_dic()。获取cookie并传递。

selenium模拟登录:

import time

import requests
from selenium import webdriver

url = "https://www.douban.com/"
browser = webdriver.Chrome(executable_path="E:/in32/chromedriver.exe")


def login():
    #通过selenium模拟登录都豆瓣
    username = "18782902568"
    password = "admin123"
    browser.get(url)
    time.sleep(3)
  #切换到frame browser.switch_to.frame(browser.find_element_by_tag_name(
"iframe")) login_ele = browser.find_element_by_xpath("//li[@class='account-tab-account']") login_ele.click() username_ele = browser.find_element_by_xpath("//input[@id='username']") password_ele = browser.find_element_by_xpath("//input[@id='password']") username_ele.send_keys(username) password_ele.send_keys(password) #解决按钮属性变动 submit_btn = browser.find_element_by_xpath("//a[@class='btn btn-account btn-active']") submit_btn.click() time.sleep(10)
#得到[{}],转化为字典进行传送 cookies
= browser.get_cookies() cookie_dict = {} for item in cookies: cookie_dict[item["name"]] = item["value"] res = requests.get(url, cookies=cookie_dict) if "bobby_liyao" in res.text: print("已经登录") if __name__ == "__main__": login()
好好学习,天天向上
原文地址:https://www.cnblogs.com/topass123/p/13345778.html