Python抓取目前全国大学名单

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#@Time  : 2020/4/23 8:57
#@Author: ltl
#@File  : daxueDownload.py


import urllib.request
import threading
from time import ctime
from bs4 import BeautifulSoup


def downloadDxmd():
    site = 'http://www.chinadegrees.cn/xwyyjsjyxx/xwsytjxx/qgptgxmd/qgptgxmd.html'
    html = urllib.request.urlopen(site)
    soup = BeautifulSoup(html,'html.parser')

    content = soup.find('tbody')

    for id,tr in enumerate(soup.find_all('tr')):
        if id != 0:
            tds = tr.find_all('td')
            tplt = "{0:{5}^20}	{1:{5}^20}	{2:{5}^20}	{3:{5}^20}	{4:{5}^20}"
            if len(tds) == 5:
                print(tplt.format(tds[0].contents[0], tds[1].contents[0], tds[2].contents[0], tds[3].contents[0], tds[4].contents[0], chr(12288)))


if __name__ == '__main__':
    downloadDxmd()

运行结果为:

愿我们漂泊半生, 归来仍少年!
原文地址:https://www.cnblogs.com/Lonnn/p/12758690.html