05数据爬去

代码：

import requests

import pymysql

from bs4 import BeautifulSoup

connect = pymysql.Connect(

host = 'localhost',

port = 3306,

user = 'root',

passwd = 'root',

db = 'python',

charset = 'utf8'

)

r = requests.get("http://openaccess.thecvf.com/CVPR2019.py")

r.encoding = "utf8"

result = r.text

bs = BeautifulSoup(result, 'html.parser')

data = bs.find_all('dt', 'ptitle')

j = 0

cursor = connect.cursor()

for i in data:

j = j + 1

id = j

link = 'http://openaccess.thecvf.com/' + i.find('a')['href']

r2 = requests.get(link)

r2.encoding = "utf8"

result2 = r2.text

bs2 = BeautifulSoup(result2, 'html.parser')

data1 = bs2.find_all(id="papertitle")

for k in data1:

title = k.text

data2 = bs2.find_all('i')

for l in data2:

c = l.text

author = pymysql.escape_string(c)

data3 = bs2.find_all(id="abstract")

for m in data3:

b = m.text

abstract = pymysql.escape_string(b)

data4 = bs2.find_all('dd')

for n in data4:

address = 'http://openaccess.thecvf.com/' + n.find('a')['href']

connect.commit()

connect.close()

print('成功')

运行结果：