【爬虫】花瓣采集下载器

做UI的朋友说花瓣访问不了, 但是个人采集还是能获得的。
赶紧下载下来备份吧。

花瓣采集下载器for windows

提取码: muy1
by the way ,接口可能变动,2019年1月29日 验证有效。

python3+requests

# -*- encoding:utf-8 -*-
'''
   author:thewindkee
'''
import requests
import urllib
import json
import re
import time
import random
# import queue
import os
import sys

INDEX_URL='http://login.meiwu.co/xx'
USER_PIN_URL_FORMAT='http://login.meiwu.co/%s/pins/'
DOWNLOAD_URL='http://img.hb.aicdn.com/'

global HEADERS
HEADERS={
    "User-Agent:":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
    "Accept:":"application/json"
}
#所有的采集
global PIN_MAP
PIN_MAP={}
#下载失败的采集
global FAILED_TO_DOWN
FAILED_TO_DOWN=[]
FAILED_TXT="fail.txt"
#保存登陆后的cookie
global LOGIN_COOKIES
LOGIN_COOKIES={}
seed = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
global EXISTED_PIC
EXISTED_PIC=[]
PIC_POSTFIX='.jpg'
DOWANLOAD_DIR='huaban'


def login(email,password):
    LOGIN_URL = "http://login.meiwu.co/auth/"
    r=requests.post(LOGIN_URL, headers=HEADERS,data = {'email':email,'password':password,'_ref':'loginPage'},timeout=10)
    if r.status_code == requests.codes.ok:
        # print("cookies:")
        for key,value in r.cookies.items():
            # print('%s,%s'%(key,value))
            LOGIN_COOKIES[key]=value
        urlname=get_urlname(r.text)
        if urlname:
            return USER_PIN_URL_FORMAT%urlname
    print("登陆失败")
    sys.exit(0)


def build_url_for_test():
    LOGIN_COOKIES['sid']='xx'
    urlname='xx'
    return USER_PIN_URL_FORMAT%urlname

def get_pin_max(content):
    # "pin_count": 3466
    r = re.search('"pin_id": *(d+)', content)
    # print(r)
    if r:  # 有id才录入
        return r.group(1)

def get_urlname(content):
    r = re.search('"urlname": *"(S+)"', content)
    # print(r)
    if r:
        return r.group(1)

def randomStr(len):
    return ''.join(random.sample(seed, len))


def build_headers_for_pin():
    HEADERS['X-Requested-With']='XMLHttpRequest'
    HEADERS['X-Request']='JSON'
    return HEADERS


def get_page_pins(user_pin_url,max):
    try:
        user_pin_url='%s?%s&max=%s&limit=100&wfl=1'%(user_pin_url,randomStr(8),max)
        print('收集下载信息 url:%s'%user_pin_url)
        r=requests.get(user_pin_url, headers=HEADERS,cookies=LOGIN_COOKIES,timeout=30)
        r.encoding='UTF-8'
        last_pin_id = max
        if r.status_code == requests.codes.ok:
            d = json.loads(r.text,encoding="UTF-8")
            pins = d['user']['pins']
            if(pins):
                for pinItem in pins:
                    last_pin_id=pinItem['pin_id']
                    PIN_MAP[str(last_pin_id)]=pinItem['file']['key']
                return last_pin_id
            else:
                return None
    except Exception as e:
        print(str(e))

def save_pin_map(data):
    with open("all.txt",'w') as f:
        f.write(str(data))

def download(url,name):
    try:
        r=requests.get(url, headers=HEADERS)
        with open(name,'wb') as f:
            f.write(r.content)
    except Exception as e:
        print(str(e))
        FAILED_TO_DOWN.append(url)

def download_all(total):
    i=1
    for pin_id,key in PIN_MAP.items():
        url=DOWNLOAD_URL+key
        print('%d/%d 下载 %s'%(i,total,url))
        i+=1
        if(not downloaded(pin_id)):
            download(url,pin_id+PIC_POSTFIX)
            time.sleep(0.5)


def downloaded(pin_id):
    # path=os.getcwd()+os.sep+pin_id+PIC_POSTFIX
    file_name=pin_id+PIC_POSTFIX
    if file_name in EXISTED_PIC:
        print('	%s已经存在'%file_name)
        return True
    else:
        return False

def save_failed_to_down_url():
    with open(FAILED_TXT,'w') as f:
        f.write(",".join(FAILED_TO_DOWN) )


def prepare_pic_dir(DOWANLOAD_PATH):
    if not os.path.exists(DOWANLOAD_PATH): 
        print("创建下载目录:%s"%DOWANLOAD_PATH)
        os.makedirs(DOWANLOAD_PATH)
    os.chdir(DOWANLOAD_PATH)

def get_first_max(user_pin_url_index):
    r=requests.get(user_pin_url_index, headers=HEADERS,cookies=LOGIN_COOKIES,timeout=30)
    #为了获得max那一张
    return int(get_pin_max(r.text))+1


def main():
    try:
        # urlname=build_url_for_test()
        DOWANLOAD_PATH=(os.getcwd()+os.sep+DOWANLOAD_DIR).strip()
        print("下载花瓣采集到文件夹:%s "%DOWANLOAD_PATH)
        EMAIL=input('请输入账号
')
        PASSWORD=input('请输入密码
')
        USER_PIN_URL=login(EMAIL,PASSWORD)
        MAX=get_first_max(USER_PIN_URL)
        print(MAX)
        build_headers_for_pin()
        while(True):
             MAX=get_page_pins(USER_PIN_URL,MAX)
             time.sleep(0.5)
             if not MAX:
                break
        prepare_pic_dir(DOWANLOAD_PATH)
        build_existed_pic(DOWANLOAD_PATH)
        save_pin_map(PIN_MAP)
        total=len(PIN_MAP)
        print('总共:%d张,开始下载!'%total)
        download_all(total)
        if FAILED_TO_DOWN:
            print("%s张下载失败,查看%s文件"%(len(FAILED_TO_DOWN),FAILED_TXT))
        else:
            print("下载完成!")
        print('图片下载目录:%s'%DOWANLOAD_PATH)
        save_failed_to_down_url()
    except Exception as e:
        print(str(e))

def build_existed_pic(DOWANLOAD_PATH):
    for file in os.listdir(DOWANLOAD_PATH):
        if os.path.isfile(file) and file.endswith(PIC_POSTFIX):
            EXISTED_PIC.append(file)


if __name__=='__main__':
    try:
        main()
    finally:
        input("任意键退出")

效果如图:
在这里插入图片描述

原文地址:https://www.cnblogs.com/thewindkee/p/12873155.html