下载微博内容的图片

wb_id就是微博内容所在的id,html标签属性为mid


#图片区域,多图
self.multi_media_xpath="//div[@mid='{}' and not(@minfo)]//div[@class='WB_detail']//div[@class='media_box']/ul/li/img/@src"
# 图片区域,单图
self.single_media_xpath="//div[@mid='{}' and not(@minfo)]//div[@class='WB_detail']//div[@class='media_box']/ul/li//img/@src"

    @decorator
    def get_img_list(self,root,wb_id):
        # 判断是否为单图
        imgurllist = []
        single_img_node_list = root.xpath(self.single_media_xpath.format(wb_id))
        # 不包含360长图
        multi_img_node_list = root.xpath(self.multi_media_xpath.format(wb_id))
        if len(multi_img_node_list) > 1:
            imgurllist = ["http:" + i.replace("thumb150", "mw690") for i in multi_img_node_list]
            return imgurllist
        elif single_img_node_list:
            #单图的链接形式
            imgurllist =  ["http:" + i.replace("orj360", "mw690") for i in multi_img_node_list]
        else:
            print("该条内容没有图片")
        return imgurllist

    def save_imge(self,url,id_path,retry=1):
        if retry>3:
            print("重试三次以上,该图片下载失败")
            return None
        filepath=id_path
        urlname=url.split('/')[-1]
        filename=os.path.join(filepath,urlname)
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        if not os.path.exists(filename):
            while retry<3:
                try:
                   ir=requests.get(url,timeout=10)
                   print("当前下载的url", url, "id", id_path)
                   with open(filename, "wb") as fs:
                        fs.write(ir.content)
                   break
                except:
                    time.sleep(3)
                    print(f"图片下载超时,开始重试,重试次数",retry)
                    retry+=1
                    self.save_imge(url,id_path,retry)
        else:
            print("图片已经存在")

  

原文地址:https://www.cnblogs.com/c-x-a/p/9146192.html