京东

# -*- conding:utf-8 -*-
import requests
import urllib.request
import json
import xlwt
import time
import datetime
# list_url=[]
# list_title=[]
# list_img=[]
# wb = xlwt.Workbook()
# ws = wb.add_sheet('电脑数码')
# url='http://f.3.cn/index-floor?argv=basic_3'
# url_data = urllib.request.urlopen(url).read().decode("utf-8")
# # url_data=requests.get(url)
# # url_data=url_data.text
# url_data=url_data.replace('jsonCallBackbasic_6(','')
# url_data=url_data.replace(')','')
# jsDict = json.loads(url_data)
# jsdata=jsDict['data']
def one_floor(jsdata,wb,ws):
    list_url=[]
    list_title=[]
    list_img=[]
    jscols = jsdata['cols'][0]
    jscols_brand = jscols['brand']
    jscols_content = jscols['content']
    jscols_tag = jscols['tag']
    print(jscols_tag)
    print(jscols_content)
    print(len(jscols_content))
    jscols_content1 = jscols_content[0]
    jscols_content2 = jscols_content[1]
    print(jscols_content1)
    print(jscols_content2)
    jscols_content1_cover = jscols_content1['cover']
    jscols_content2_cover = jscols_content2['cover']
    jscols_content1_bottom = jscols_content1['bottom']
    jscols_content2_bottom = jscols_content2['bottom']
    print(jscols_content1_cover)
    print(jscols_content2_cover)
    print(jscols_content1_bottom)
    print(jscols_content2_bottom)
    for each in range(len(jscols_content1_cover)):
        list_url.append(jscols_content1_cover[each]['url'])
        list_url.append(jscols_content2_cover[each]['url'])
        list_img.append('http:' + jscols_content1_cover[each]['imgUrl'])
        list_img.append('http:' + jscols_content2_cover[each]['imgUrl'])
        list_title.append('SL21' + jscols_content1_cover[each]['title'])
        list_title.append('SR21' + jscols_content2_cover[each]['title'])
    for each in range(len(jscols_brand)):
        list_url.append(jscols_brand[each]['href'])
        list_img.append('http:' + jscols_brand[each]['src'])
        list_title.append('S4' + str(each + 1))
    for each in jscols_content1_bottom:
        for every in range(len(each)):
            list_url.append(each[every]['url'])
            list_img.append('http:' + each[every]['imgUrl'])
            list_title.append('SL3' + str(every + 1))
            # print(each[every])
    for each in jscols_content2_bottom:
        for every in range(len(each)):
            list_url.append(each[every]['url'])
            list_img.append('http:' + each[every]['imgUrl'])
            list_title.append('SR3' + str(every + 1))
    for each in range(len(jscols_tag)):
        list_url.append(jscols_tag[each]['url'])
        list_img.append('no img')
        list_title.append('S1' + str(each + 1) + jscols_tag[each]['title'])
    ws.write(0, 0, '地点')
    ws.write(0, 1, '位置')
    ws.write(0, 2, 'URL')
    ws.write(0, 3, '照片')
    ws.write(0, 4, '主题')
    ws.write(0, 5, '时间')
    for each in range(len(list_title)):
        Add_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        ws.write(each + 1, 0, '广州')
        ws.write(each + 1, 1, '京东pc楼层')
        ws.write(each + 1, 2, list_url[each])
        ws.write(each + 1, 3, list_img[each])
        ws.write(each + 1, 4, list_title[each])
        ws.write(each + 1, 5, Add_time)
    wb.save('D:\jd\jd_pc.xls')
    print(list_title)
    print(list_img)
    print(list_url)
    x = 0
    for j in range(len(list_img)):
        if list_img[j] == 'no img':
            print(list_img[j])
        else:
            print(list_img[j])
            urllib.request.urlretrieve(list_img[j], 'D:\jd_pc_pic\' + list_title[j] + '.jpg')
            # urllib.request.urlretrieve(j, 'D:\jd_pc_pic\' + '\%s.jpg' % x)
            x = x + 1

# print(jscols_brand)
# # print(len(jscols))
# print(jscols)
原文地址:https://www.cnblogs.com/caicaihong/p/6030861.html