2、Python request、BeautifulSoup(download mm_pic)

import requests
from bs4 import BeautifulSoup
import os
class DownLoadImg(object):

    def __init__(self):
        self.url = 'http://jandan.net/ooxx/'
        self.header = {
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36'}
    def get_Page(self):
        response = self.open_url(self.url)
        page = response.text
        # print(page)
        soup = BeautifulSoup(page)
        page_c = soup.select('span.current-comment-page')
        # page_num = str(page_num).split('[')[-1].split(']')[0]
        page_num = page_c[0].next[1:-1]
        return page_num

    def open_url(self,url):
        response = requests.get(url,self.header)
        return response

    def get_and_save_Img(self):
        dir = os.path.exists('download_mm')
        if dir == False:

            os.mkdir('download_mm')
        os.chdir('download_mm')

        times = input('Please enter the number of pages to download:')
        i = 0
        num = int(self.get_Page())
        while i < int(times):
            url = self.url + 'page-' +str(num)
            print(url)
            response = self.open_url(url)

            html = response.text
            # print(html)
            bs = BeautifulSoup(html)
            img_list = bs.select('img')
            for img in img_list:
                src ='http:' + img.attrs['src']
                img_name = str(src).split('/')[-1]
                get_img = requests.get(src, self.header)

                with open(img_name,'wb') as dl:
                    dl.write(get_img.content)

            num -= 1
            i += 1

if __name__ == "__main__":

    dl = DownLoadImg()
    dl.get_and_save_Img()

  

原文地址:https://www.cnblogs.com/royfans/p/7417928.html