创意抓取及导出

# _*_ coding=utf-8 _*_
import requests
import time
import math
import os
import pandas as pd

cookies = input('请输入Cookie:')

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    'Cookie': cookies,
    'Host': 'xgop.in.zhihu.com',
    'Referer': '***'
}
tempmap = {6: '大图', 7: '文字链', 8: '小图', 10: '多图', 11: '视频'}
zonemap = {8:'知乎 APP 信息流',152:'知乎极速版首页',153:'知乎极速版回答页',20:'推荐阅读',33:'搜索',30:'App问题页信息流'}
positionmap = {1:'首页',3:'回答页',2:'问题页'}
osmap = {1:'安卓',2:'苹果'}
networkmap = {1:'wifi',2:'2G',3:'3G',4:'4G'}
equipmentPricemap = {1:'1500元以下',2:'1500-2500元',3:'2500-3500元',4:'3500元-4500元',5:'4500元以上'}
mobileOperatormap = {0:'中国移动',1:'中国联通',2:'中国电信'}
gendermap = {0:'',1:''}

all_data = []


def get_single_data(url):
    try:
        res = requests.get(url, headers=headers)
    except Exception as e:
        print('异常请求链接--->' + url + str(e))
    else:
        data = res.json().get('result',0)
        if data:
            for i in data:
                single_data = {}
                try:
                    single_data['创意id'] = i['id']
                    single_data['账户id'] = i['userId']
                    single_data['目标类型'] = i['targetType']
                    single_data['标题'] = i['asset']['title']['value']
                    single_data['描述'] = i['asset']['desc']['value']
                    try:
                        single_data['图片url'] = i['asset']['main']['url']
                    except Exception as e:
                        single_data['图片url'] = ''
                    single_data['cta'] = i['asset']['cta']['value']
                    single_data['状态'] = i['status']
                    #single_data['落地页url'] = i['url']
                    single_data['创意名称'] = i['name']
                    single_data['曝光'] = i['counter']['impression']
                    single_data['点击'] = i['counter']['click']
                    single_data['点击率'] = i['counter']['clickRatio']
                    single_data['点击价格'] = i['counter']['clickPrice']/100
                    single_data['花费'] = i['counter']['cost']/100
                    single_data['样式'] = tempmap.get(i['ad']['templateId']) #数字
                    single_data['推广开始日期'] = i['ad']['dateBegin']
                    single_data['产品id'] = i['productId']
                    single_data['出价'] = i['ad']['price']/100
                    single_data['投放平台'] =  ', '.join([osmap.get(d,'不限, ') for d in i['ad']['targeting']['os']]) or '不限'
                    try:
                        single_data['app行为'] = i['ad']['targeting']['appCategory']
                    except Exception as e:
                        single_data['app行为'] = ''
                    try:
                        single_data['自定义人群'] = i['ad']['targeting']['crowd']
                    except Exception as e:
                        single_data['自定义人群']  = ''
                    single_data['性别'] = ', '.join([gendermap.get(d,'不限, ') for d in i['ad']['targeting']['gender']]) or '不限'
                    try:
                        single_data['兴趣'] = i['ad']['targeting']['interest'] if len(i['ad']['targeting']['interest']) > 0 else '不限'
                    except Exception as e:
                        single_data['兴趣'] = ''
                    single_data['网络'] = ', '.join([networkmap.get(d,'不限, ') for d in i['ad']['targeting']['network']]) or '不限'
                    try:
                        single_data['运营商'] =  ', '.join([mobileOperatormap.get(d,'不限, ') for d in i['ad']['targeting']['mobileOperator']]) or '不限'
                    except Exception as e:
                        single_data['运营商'] = ''
                    try:
                        single_data['设备价格'] = ', '.join([equipmentPricemap.get(d,'不限, ') for d in i['ad']['targeting']['equipmentPrice']]) or '不限'
                    except Exception as e:
                        single_data['设备价格'] = ''
                    single_data['关键词'] = ', '.join(i['ad']['targeting']['keyword'])
                    single_data['创意展现方式'] = i['ad']['strategy']['creative']
                    single_data['编辑页面地址'] = '****'.format(single_data['账户id'],single_data['创意id'])
                    single_data['展现位置'] = ', '.join([positionmap.get(d,'未知') for d in i['ad']['zoneIds']])
                except Exception as e:
                    print('异常解析链接--->' + url+ str(e))
                    pass
                if float(single_data.get('花费',0)) >=0:
                    all_data.append(single_data)
                    print(len(all_data))


def get_all_urls(userid, start_time, end_time):
    base_url = '******'
    first_page_url = base_url.format(page=1, userid=int(userid), start_time=str(start_time), end_time=str(end_time))
    try:
        res = requests.get(first_page_url, headers=headers)
    except Exception as e:
        print('异常all链接--->' + first_page_url + str(e))
    else:

        total_page = math.ceil(res.json()['totalCount'] / 10)

        all_url = [base_url.format(page=int(page), userid=int(userid), start_time=str(start_time), end_time=str(end_time)) for
         page in range(1, int(total_page))]

        return all_url



def main():
    uids = input('请输入uids(格式:111,222,333):')
    start_time = input('请输入开始时间(格式:2018-01-01):')
    end_time = input('请输入结束时间(格式:2018-07-03):')
    if len(uids) > 0:
        for userid in uids.split(','):
            for url in get_all_urls(userid.strip(), start_time, end_time):
                time.sleep(3)
                get_single_data(url)

    df1 = pd.DataFrame(all_data)

    if not os.path.exists(uids):
        os.mkdir(os.path.join(os.getcwd(),uids))

    df1.to_excel(uids+'/'+str(uids)+ "-" + time.strftime("%Y%m%d%H%M") +'有消费创意' + '.xlsx',
        index=False)
    print('done')


if __name__ == '__main__':
    main()
原文地址:https://www.cnblogs.com/Erick-L/p/9390223.html