4/23学习总结

今天学会了爬虫,昨天只能得到一行一列,今天可以得到所有分页的内容

遇到的困难:今天想做一个轮播图,还没研究明白

import bs4
import pymysql
import requests
from bs4 import BeautifulSoup
import pandas as pd

def download_all_htmls():
    htmls=[];
    for idx in range(2):
     url=f"https://yz.chsi.com.cn/sch/?start={idx*20}"
     print("craw html",url)
     r=requests.get(url)
     if r.status_code!=200:
        raise Exception("error")
     htmls.append(r.text)
    return htmls
htmls=download_all_htmls()

def parse_single_html(html):
    name1=[];
    soup=BeautifulSoup(html,'html.parser')

    tbody=soup.find('tbody')
    print("1")

    if isinstance(tbody, bs4.element.Tag):
      for tr in tbody.find_all("tr"):
            a=tr.find("td").find("a")
            name1.append(a.getText().replace(' ', ''))
      print(name1)
      return name1
    '''
for idex in range(2):
        name2 = parse_single_html(htmls[idex])
       
         name=tbody.find('tr').find('td').find('a')
        '''

conn = pymysql.connect(
        host='localhost',  # 我的IP地址
        port=3306,  # 不是字符串不需要加引号。
        user='root',
        password='******',
        db='yanhu',
        charset='utf8'
    )
cursor = conn.cursor()  # 获取一个光标
for idex in range(2):
  name = parse_single_html(htmls[idex])
  for a in range(20):
   sql = 'insert into school (name,fenshu,bili) values (%s,%s,%s);'
   name3=name[a]
   print(name3)
   fenshu=420
   bili=0.2
   cursor.execute(sql, [name3,fenshu,bili])
print()

conn.commit()

cursor.close()
conn.close()
原文地址:https://www.cnblogs.com/wangzhaojun1670/p/12762758.html