爬取新浪网的标题和网址

import requests
from bs4 import BeautifulSoup

url = 'https://news.sina.com.cn/china' #爬取的新闻网址
res = requests.get(url)
res.encoding="UTF-8"
# 使用剖析器为html.parser
soup = BeautifulSoup(res.text, 'html.parser')

print ('开始爬取')
#print(len(soup.select("li")))

for news in soup.select("a"):
    if len(news.text) > 5:
        title =news.text
        href = news['href']
        print (title,href)
原文地址:https://www.cnblogs.com/1gaoyu/p/12522084.html