使用bmp和selenium抓取浏览器的请求和结果


from browsermobproxy import Server
from selenium import webdriver
import re
from bs4 import BeautifulSoup
from pprint import pprint
import time
import tkinter
from tkinter.scrolledtext import ScrolledText
server = Server(r"D:rowsermob-proxy-2.1.4inrowsermob-proxy.bat")
server.start()
proxy = server.create_proxy()



driver_path = r'C:rowser_driverchromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('--proxy-server={}'.format(proxy.proxy))
options.add_argument('--ignore-certificate-errors')
options.add_argument('--headless')
driver = webdriver.Chrome(driver_path,options=options)



proxy.new_har("yjy",options={'captureHeaders': True,'captureContent':True,'captureBinaryContent': True})
driver.get("https://www.jiucaigongshe.com/action/detail/93bb12242ad84abfb93d10eee7a9271d")
time.sleep(5)
result = proxy.har['log']['entries']
final = [i['response'] for i in result if 'https://app.jiucaigongshe.com/jystock-app/api/v1/article/detail' == i['request']['url'] and i['request']['method']=='POST']
data = eval(final[0]['content']['text'].replace('null','None'))
html = data['data']['content']
html = BeautifulSoup(html, features='html.parser')
artical = html.text
# pprint(artical)

#显示文本
top = tkinter.Tk()
top.title("韭菜公社")
top.geometry("700x800+10+20")
text = ScrolledText(top,width=100,height=80,padx=15,spacing1=5,spacing2=5,spacing3=5,font='宋体 12')
# text.configure()
text.insert('insert',artical)
text.pack(side=tkinter.LEFT)
top.mainloop()

tmp = input('按任意键继续')
server.stop()
driver.quit()
原文地址:https://www.cnblogs.com/yjybupt/p/13864405.html