一个比较简单的爬虫

import requests

class QiushiSpider:
    """
    事件初始化
    """
    def __init__(self,qiushi_name_crawl):
        self.qiushi_name =qiushi_name_crawl
        self.url_base = 'https://www.qiushibaike.com/8hr/page/{}'
        self.headers = {'User-Agent': 'WSF'}
    #生成路径列表
    def make_url_lists(self):
        return [self.url_base.format(i) for i in range(10)]
    #下载路径
    def download_url(self,url_str):
        result=requests.get(url_str,headers=self.headers)#获取路径和响应头
        return  result.content#返回对象内容
    #保存对象和页码
    def save_result(self,result,page_num):
        file_path="{}-第{}页码.html".format(self.qiushi_name,page_num)
        with open(file_path,'wb') as f:
            f.write(result)
    #运行
    def run(self):
        url_lists = self.make_url_lists()
        for url_str in url_lists:
            result_str = self.download_url(url_str)
            p_num = url_lists.index(url_str) +1
            self.save_result(result_str, p_num)

if __name__ == '__main__':
    qiushi_spider = QiushiSpider('p_num')
    qiushi_spider.run()
原文地址:https://www.cnblogs.com/liangliangzz/p/10137755.html