爬取校园新闻首页的新闻

import requests
from bs4 import BeautifulSoup
import string
import time
import datetime


head = {}
head['user-agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'

r=requests.get("http://news.gzcc.cn/html/xiaoyuanxinwen/",headers=head)
r.encoding='utf-8'
soup=BeautifulSoup(r.text,'html.parser')
for i in soup.select('li'):
    if len(i.select(".news-list-title"))>0:
        a=i.select(".news-list-title")[0].text
        b=i.select(".news-list-info")[0].contents[0].text
        c=i.select(".news-list-info")[0].contents[1].text
        d=i.select("a")[0].attrs['href']

        print("标题:"+a+'
'+"时间:"+b+'
'+"来源:"+c+'
'+"链接:"+d+'

')
        print()
        print()
        print()

        r1=requests.get(d,headers=head)
        r1.encoding='utf-8'
        soup=BeautifulSoup(r1.text,'html.parser')
        e=soup.select(".show-info")[0].text
        f=e.split()
        for i in range(len(f)-1):
            print(f[i],end=' ')
        print()


        print()
        #时间类型转换
        dt=e.lstrip('发布时间:')[:19]
        dt = datetime.datetime.strptime(dt,'%Y-%m-%d %H:%M:%S')
        print("datetime类型时间:",end=' ')
        print(dt)

        print()
        #作者
        i=e.find('作者:')
        if i>0:
            s=e[e.find('作者:'):].split()[0].lstrip('作者:')
        print("作者:",end=' ')
        print(s)


        print()
        #审核
        i=e.find('审核:')
        if i>0:
            s=e[e.find('审核:'):].split()[0].lstrip('审核:')
        print("审核:",end=' ')
        print(s)


        print()
        #来源
        i=e.find('来源:')
        if i>0:
            s=e[e.find('来源:'):].split()[0].lstrip('来源:')
        print("来源:",end=' ')
        print(s)


        print()
        #摄影
        i=e.find('摄影:')
        if i>0:
            s=e[e.find('摄影:'):].split()[0].lstrip('摄影:')
        print("摄影:",end=' ')
        print(s)


        print()
        #打印文章主体
        print(soup.select("#content")[0].text)
        print()
        print()
        print()

原文地址:https://www.cnblogs.com/wban48/p/8691850.html