05数据爬去

代码:

import requests

import pymysql

from bs4 import BeautifulSoup

connect = pymysql.Connect(

    host = 'localhost',

    port = 3306,

    user = 'root',

    passwd = 'root',

    db = 'python',

    charset = 'utf8'

)

r = requests.get("http://openaccess.thecvf.com/CVPR2019.py")

r.encoding = "utf8"

result = r.text

bs = BeautifulSoup(result, 'html.parser')

data = bs.find_all('dt', 'ptitle')

j = 0

cursor = connect.cursor()

for i in data:

    j = j + 1

    id = j

    link = 'http://openaccess.thecvf.com/' + i.find('a')['href']

    r2 = requests.get(link)

    r2.encoding = "utf8"

    result2 = r2.text

    bs2 = BeautifulSoup(result2, 'html.parser')

    data1 = bs2.find_all(id="papertitle")

    for k in data1:

        title = k.text

    data2 = bs2.find_all('i')

    for l in data2:

        c = l.text

        author = pymysql.escape_string(c)

    data3 = bs2.find_all(id="abstract")

    for m in data3:

        b = m.text

        abstract = pymysql.escape_string(b)

    data4 = bs2.find_all('dd')

    for n in data4:

        address = 'http://openaccess.thecvf.com/' + n.find('a')['href']

    connect.commit()

    connect.close()

print('成功')

运行结果:

原文地址:https://www.cnblogs.com/Lhxxx/p/13237765.html