爬虫面试案例系列01

### 需求:爬取https://m.vmall.com/help/hnrstoreaddr.htm荣耀线下门店中的门店详情信息。页面显示如下:

- 首页显示

- 详情页显示

 ### 基于抓包工具分析如下:

 ### 查看定位到数据包的请求头信息:

- 请求的url和请求方式如下:

- 请求携带的请求参数如下:

   - 注意:请求参数为字典格式并非常规的键值对,所以在代码实现中需要使用dumps将字典转成json串作为请求参数

### 代码实现:爬取到首页对应的门店信息

import requests
import json
url = 'https://openapi.vmall.com/mcp/offlineshop/getShopList'
data = {
    "portal":"2",
    "lang":"zh-CN",
    "country":"CN",
    "brand":"1",
    "province":"河北省",
    "city":"邯郸市",
    "pageNo":"2",
    "pageSize":"20"}

#必须使用dumps操作
json_data = requests.post(url,data=json.dumps(data)).json()

 - 请求到的数据为:

{
	"code": "0",
	"shopInfos": [{
		"address": "河北省邯郸市磁县建设西路与京深线交叉口美食林一楼底商",
		"brand": 1,
		"city": "邯郸市",
		"dist": "磁县",
		"distance": 0.0,
		"id": 111616,
		"isStar": 1,
		"latitude": "36.364752",
		"longitude": "114.390096",
		"name": "荣耀授权体验店(建设西路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "9:00-18:00",
		"shopCode": "RYRA03100602",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市邱县育新街100号移动公司斜对过",
		"brand": 1,
		"city": "邯郸市",
		"dist": "邱县",
		"distance": 0.0,
		"id": 111897,
		"isStar": 1,
		"latitude": "36.815125",
		"longitude": "115.173926",
		"name": "荣耀授权体验店(育新街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100954",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市磁县友谊北大街199号",
		"brand": 1,
		"city": "邯郸市",
		"dist": "磁县",
		"distance": 0.0,
		"id": 108067,
		"isStar": 1,
		"latitude": "36.387417",
		"longitude": "114.379099",
		"name": "荣耀授权体验店(友谊北大街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03104185",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市肥乡县建安路中段",
		"brand": 1,
		"city": "邯郸市",
		"dist": "肥乡县",
		"distance": 0.0,
		"id": 111932,
		"isStar": 1,
		"latitude": "36.559004",
		"longitude": "114.813224",
		"name": "荣耀授权体验店(建安路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100613",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市临漳县邺都南大街移动公司旁",
		"brand": 1,
		"city": "邯郸市",
		"dist": "临漳县",
		"distance": 0.0,
		"id": 111678,
		"isStar": 1,
		"latitude": "36.33081",
		"longitude": "114.613037",
		"name": "荣耀授权体验店(邺都大街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100610",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市成安县西关口西行200路南",
		"brand": 1,
		"city": "邯郸市",
		"dist": "成安县",
		"distance": 0.0,
		"id": 107586,
		"isStar": 1,
		"latitude": "36.447977",
		"longitude": "114.692388",
		"name": "荣耀授权体验店(政府街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03103736",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市馆陶县新华街国土资源局斜对过",
		"brand": 1,
		"city": "邯郸市",
		"dist": "馆陶县",
		"distance": 0.0,
		"id": 111751,
		"isStar": 1,
		"latitude": "36.548521",
		"longitude": "115.294026",
		"name": "荣耀授权体验店(新华街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100615",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市武安市中兴路1666号",
		"brand": 1,
		"city": "邯郸市",
		"dist": "武安市",
		"distance": 0.0,
		"id": 106905,
		"isStar": 1,
		"latitude": "36.692459",
		"longitude": "114.191067",
		"name": "荣耀授权体验店(中兴路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100255",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市永年区洺兴路电子一条街口",
		"brand": 1,
		"city": "邯郸市",
		"dist": "永年区",
		"distance": 0.0,
		"id": 111771,
		"isStar": 1,
		"latitude": "36.782304",
		"longitude": "114.494515",
		"name": "荣耀授权体验店(洺兴路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100611",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市鸡泽县鸡泽镇会盟南大街89",
		"brand": 1,
		"city": "邯郸市",
		"dist": "鸡泽县",
		"distance": 0.0,
		"id": 111786,
		"isStar": 1,
		"latitude": "36.919418",
		"longitude": "114.882834",
		"name": "荣耀授权体验店(会盟南大街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:00-18:30",
		"shopCode": "RYRA03100614",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市曲周县振兴路移动手机大卖场",
		"brand": 1,
		"city": "邯郸市",
		"dist": "曲周县",
		"distance": 0.0,
		"id": 107216,
		"isStar": 1,
		"latitude": "36.780474",
		"longitude": "114.964419",
		"name": "荣耀授权体验店(振兴路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "9:00-18:30",
		"shopCode": "RYRA03101174",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市武安市新华大街145号",
		"brand": 1,
		"city": "邯郸市",
		"dist": "武安县",
		"distance": 0.0,
		"id": 107729,
		"isStar": 1,
		"latitude": "36.695474",
		"longitude": "114.184811",
		"name": "荣耀授权体验店(新华大街店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03103735",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市大名县大名府路东段路南(妇幼保健院对过)",
		"brand": 1,
		"city": "邯郸市",
		"dist": "大名县",
		"distance": 0.0,
		"id": 111832,
		"isStar": 1,
		"latitude": "36.288208",
		"longitude": "115.171058",
		"name": "荣耀授权体验店(大名府路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:00-18:30",
		"shopCode": "RYRA03100612",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市广平县人民路166号",
		"brand": 1,
		"city": "邯郸市",
		"dist": "广平县",
		"distance": 0.0,
		"id": 108001,
		"isStar": 1,
		"latitude": "36.488035",
		"longitude": "114.957763",
		"name": "荣耀授权体验店(人民路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03103980",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市邯山区陵园路114号金茂大厦一楼",
		"brand": 1,
		"city": "邯郸市",
		"dist": "邯山区",
		"distance": 0.0,
		"id": 126945,
		"isStar": 1,
		"latitude": "36.59796329",
		"longitude": "114.4948675",
		"name": "荣耀授权体验店(陵园路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "8:30-18:30",
		"shopCode": "RYRA03100100",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "河北省邯郸市涉县振兴路与龙山大街交叉口东南角",
		"brand": 1,
		"city": "邯郸市",
		"dist": "涉县",
		"distance": 0.0,
		"id": 111607,
		"isStar": 1,
		"latitude": "36.567255",
		"longitude": "113.680533",
		"name": "荣耀授权体验店(振兴路店)",
		"pictures": [],
		"province": "河北省",
		"score": 5,
		"serviceTime": "9:00-18:00",
		"shopCode": "RYRA03100599",
		"starShopPic": "https://res.vmallres.com/pimages//offlineshop/FOMuBZyeqTWYYJgMLPP2.jpg",
		"type": 1
	}, {
		"address": "中长街老检查院对面",
		"brand": 1,
		"city": "邯郸市",
		"dist": "鸡泽县",
		"distance": 0.0,
		"id": 137612,
		"isStar": 0,
		"latitude": "36.921823",
		"longitude": "114.882514",
		"name": "邯郸鸡泽航天中长街旗舰店",
		"pictures": [],
		"province": "河北省",
		"score": 4,
		"serviceTime": "10:00-18:30",
		"shopCode": "RY0000000001",
		"starShopPic": "",
		"type": 0
	}, {
		"address": "磁州路130号",
		"brand": 1,
		"city": "邯郸市",
		"dist": "磁县",
		"distance": 0.0,
		"id": 137613,
		"isStar": 0,
		"latitude": "36.379835",
		"longitude": "114.395066",
		"name": "邯郸磁县盛尧磁州路电信店",
		"pictures": [],
		"province": "河北省",
		"score": 4,
		"serviceTime": "10:00-18:30",
		"shopCode": "RY0000000001",
		"starShopPic": "",
		"type": 0
	}, {
		"address": "滏阳北大街新世纪商业广场1楼",
		"brand": 1,
		"city": "邯郸市",
		"dist": "峰峰矿区",
		"distance": 0.0,
		"id": 137614,
		"isStar": 0,
		"latitude": "36.428341",
		"longitude": "114.217691",
		"name": "邯郸峰峰太行新世纪店",
		"pictures": [],
		"province": "河北省",
		"score": 4,
		"serviceTime": "10:00-18:30",
		"shopCode": "RY0000000001",
		"starShopPic": "",
		"type": 0
	}, {
		"address": "建设街移动公司路南西侧泰山通讯",
		"brand": 1,
		"city": "邯郸市",
		"dist": "肥乡区",
		"distance": 0.0,
		"id": 137625,
		"isStar": 0,
		"latitude": "36.559058",
		"longitude": "114.811038",
		"name": "邯郸肥乡泰山建设街店",
		"pictures": [],
		"province": "河北省",
		"score": 4,
		"serviceTime": "10:00-18:30",
		"shopCode": "RY0000000001",
		"starShopPic": "",
		"type": 0
	}],
	"success": true,
	"totalRows": 60
}

- 请求到的数据分析:

  - 数据为门店相关数据,其中每一个门店有其对应的一个id值,我们需要将id值解析出来,在后面请求详情页会使用到

import requests
import json
url = 'https://openapi.vmall.com/mcp/offlineshop/getShopList'
data = {
    "portal":"2",
    "lang":"zh-CN",
    "country":"CN",
    "brand":"1",
    "province":"河北省",
    "city":"邯郸市",
    "pageNo":"2",
    "pageSize":"20"}
#从中解析出id
json_data = requests.post(url,data=json.dumps(data)).json()
for dic in json_data['shopInfos']:
    _id = dic['id'] #解析出门店的id值

### 请求每一个门店详情页的数据

- 请求的url和请求方式:

 - 请求参数:

 发现只有shopId为动态变化的请求参数其他都是固定不变的,然后该shopId就是上一步我们解析出来的门店id,则基于门店id作为请求详情页的请求参数

import requests
import json
url = 'https://openapi.vmall.com/mcp/offlineshop/getShopList'
data = {
    "portal":"2",
    "lang":"zh-CN",
    "country":"CN",
    "brand":"1",
    "province":"河北省",
    "city":"邯郸市",
    "pageNo":"2",
    "pageSize":"20"}
#从中解析出id
json_data = requests.post(url,data=json.dumps(data)).json()
for dic in json_data['shopInfos']:
    _id = dic['id']
    #拼接详情页的url
    detail_url = 'https://openapi.vmall.com/mcp/offlineshop/getShopById?portal=2&version=10&country=CN&shopId={}&lang=zh-CN'.format(_id)
    finally_data = requests.get(url=detail_url).json()
    print(finally_data)#每一页详情页url的数据
    

  

原文地址:https://www.cnblogs.com/bobo-zhang/p/12320656.html