用requests库和BeautifulSoup4库爬取新闻列表

import urllib.request as urllib2
from bs4 import BeautifulSoup
url='http://news.gzcc.cn/html/xiaoyuanxinwen/'
request=urllib2.Request(url)
response=urllib2.urlopen(request)
bsObj=BeautifulSoup(response.read(),"html.parser")
for i in bsObj.select('li'):
    if len(i.select('.news-list-title')) > 0:
        time = i.select('.news-list-info')[0].contents[0].text
        source=i.select('.news-list-info')[0].contents[1].text
        title = i.select('.news-list-title')[0].text
        describe=i.select('.news-list-description')[0].text
        url = i.select('a')[0]['href']
        print('时间:'+time,'来源:'+source,'标题:'+title,'简要描述:'+describe,'url:'+ url)

  

原文地址:https://www.cnblogs.com/amzinghui/p/7601574.html