去哪儿网store更新

import re
import json
from odps import ODPS
from threading import Thread
import threading
from urllib import parse
import datetime
from lxml import etree

import random 
import requests
import time

from models import *

store_urls_data = {"https://fh.dujia.qunar.com/",'https://aaag9.package.qunar.com'}

def write_txt(html_data):
    f = open("a.txt", 'a+')
    f.write(html_data)
    f.write(" ")
    f.close()

# 获取到的商户信息
def process_nodes_list(url):    
    res_text = requests.get(url)
    html = etree.HTML(res_text.text)
    try:
        score = html.xpath("//var[@class='score']/text()")[0]
    except:
        # print(url)
        score = 0.0
    #print(score)
    try:
        try:           
            up_down =  html.xpath('//div[@class="rankline"]//i[contains(@class,"up")]')[0]
        except:
            up_down = html.xpath('//div[@class="rankline"]//i[contains(@class,"down")]')[0]
        up_down = up_down.attrib
    except:
        try:
            up_down = html.xpath('//div[@class="rankline"]//i[contains(@class,"equal")]')[0]
            up_down = up_down.attrib
        except:
            up_down = "NULL" # 此处为空值
            print(url)
    print(up_down)

    try:
        store_name = html.xpath("//div[@class='shop-rank']/strong/text()")[0]
    except:
        store_name = "NULL"
        print(url)

    res_li = html.xpath("//div[@class='business']//li/text()")
    if len(res_li) > 9:
        store = qunar_Store()
        store.store_name = store_name # 店铺名称
        store.store_score = score # 店铺评分
        if 'up' in str(up_down):
            store.store_rankline = 1
            store.store_percent = int(re.search(r'd+',str(up_down)).group(0)) / 100
        if 'down' in str(up_down):
            store.store_rankline = -1
            store.store_percent = int(re.search(r'd+',str(up_down)).group(0)) / 100
        if 'equal' in str(up_down):
            store.store_rankline = 0
            store.store_percent = 0.0
        if str(up_down) == "NULL":
            store.store_rankline = 2 # 出现2表示商户数据为空
            store.store_percent = 0.0
        store.company_name = res_li[1] # 公司的名字
        store.company_legal_person = res_li[3] # 公司的法人
        store.licence_num = res_li[5] # 
        store.trading_certificate = res_li[7]
        store.business_scope = res_li[9]
        store.create_time = datetime.datetime.now()

        store.save(force_insert=True)
    # except:
    #         write_txt(url)
    # if goods_list:
    #     data_0.up_product_to_odps(goods_list)

def get_nodes_json():
    url =  r.lpop('test.com:store_url')
    #url = 'https://zqlr1.package.qunar.com/'
    if url and url not in store_urls_data:
        process_nodes_list(url)

class parse_qunar_url_Thread(Thread):
    def run(self):
        while(1):
            get_nodes_json()
            #保存最终的数据


if __name__ == "__main__":
    for i in range(50):
        parse_qunar_url_thread = parse_qunar_url_Thread()     
        parse_qunar_url_thread.start()    
    
原文地址:https://www.cnblogs.com/dog-and-cat/p/13615469.html