案例2 新浪新闻爬虫

# 引入模块
import requests

def get_news_data(kw):
    """获取关键词kw的新闻数据"""
    for i in range(1, 6):
        url = f"https://search.sina.com.cn/?q={kw}&c=news&from=&col=&range=all&source=&country=&" 
              f"size=10&stime=&etime=&time=&dpc=0&a=&ps=0&pf=0&page={i}"
        # 采集目标数据
        resp = requests.get(url)
        # 存储采集到的数据
        _save_data("data/" + url[-6:] + ".html", resp.content)


def _save_data(name, content):
    """将数据content存储到名称为name的文件中"""
    with open(name, mode="wb") as file:
        file.write(content)


kw = input("请输入要搜索的关键词:")
get_news_data(kw)

原文地址:https://www.cnblogs.com/duxiangjie/p/13924796.html