爬虫监控

数据监控:

import requests
import urllib
import time
import pymongo

# 必须写在外面,否则无法导入
client=pymongo.MongoClient('localhost',27017)
book_qunar=client['qunar']
sheet_qunar_zyx=book_qunar['qunar_zyx']


# 解析数据
def get_list(dep,item):
    url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit=0,20&includeAD=true&qsact=search'.format(
        urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item))
    strhtml = get_json(url)
    try:
        routeCount = int(strhtml['data']['limit']['routeCount'])
    except:
        return
    for limit in range(0, routeCount, 20):
        url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit={},20&includeAD=true&qsact=search'.format(
            urllib.request.quote(dep), urllib.request.quote(item),
            urllib.request.quote(item), limit)
        strhtml = get_json(url)
        result = {
            'date': time.strftime('%Y-%m-%d', time.localtime(time.time())),
            'dep': dep,
            'arrive': item,
            'limit': limit,
            'result': strhtml
        }
        sheet_qunar_zyx.insert_one(result)

def connect_mongo():
    client=pymongo.MongoClient('localhost',27017)
    book_qunar=client['qunar']
    return book_qunar['qunar_zyx']


def get_json(url):
    strhtml=requests.get(url)
    time.sleep(1)
    return strhtml.json()
if __name__ == "__main__":

    url='https://touch.dujia.qunar.com/depCities.qunar'
    dep_dict=get_json(url)
    for dep_item in dep_dict['data']:
        for dep in dep_dict['data'][dep_item]:
            a = []
            url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))
            arrive_dict = get_json(url)
            for arr_item in arrive_dict['data']:
                for arr_item_1 in arr_item['subModules']:
                    for query in arr_item_1['items']:
                        if query['query'] not in a:
                            a.append(query['query'])
            for item in a:
                get_list(dep,item)

监控:

from test import sheet_qunar_zyx
import time

# 数据库监控数量 
while True:
    print(sheet_qunar_zyx.find().count())
    time.sleep(10)
原文地址:https://www.cnblogs.com/star-py-blog/p/13740148.html