宜出行人口热力图

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 14:22:58
# Desc   : qq登陆 , 滑动验证暂没处理


import os
import time
from selenium import webdriver
from yichuxing.settings import qq_list
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities


class Login(object):
    # 登陆qq,获取cookie
    LoginURL = "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000"
    def __init__(self, **kwargs):
        self.qq_num = kwargs.get("qq_num")
        self.qq_passwd = kwargs.get("qq_passwd")

    def after_smoothly_login(self, driver):
        cookie = {}
        for elem in driver.get_cookies():
            cookie[elem["name"]] = elem["value"]
            #driver.quit()
        return cookie

    def get_cookie_by_Chrome(self):
        try:
            chromedriver = "C:Program Files (x86)GoogleChromeApplicationchromedriver.exe"
            os.environ["webdriver.chrme.driver"] = chromedriver
            driver = webdriver.Chrome(chromedriver)
            #driver = webdriver.Chrome()
            driver.set_page_load_timeout(10)
            driver.get(self.LoginURL)
            driver.find_element_by_id("u").send_keys(self.qq_num)
            driver.find_element_by_id("p").send_keys(self.qq_passwd)
            driver.maximize_window()
            driver.find_element_by_id("go").click()
            time.sleep(6)

            if "宜出行" in driver.title:
                return self.after_smoothly_login(driver)
            elif "手机统一登录" in driver.title:
                return None

        except Exception:
            # driver.close()
            return None

    def get_cookie_by_PhantomJS(self):
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
            )
            driver  = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.set_page_load_timeout(10)
            driver.get(self.LoginURL)
            driver.find_element_by_id("u").clear()
            driver.find_element_by_id("u").send_keys(self.qq_num)
            driver.find_element_by_id("p").clear()
            driver.find_element_by_id("p").send_keys(self.qq_passwd)
            driver.find_element_by_id("go").click()
            time.sleep(6)

            if "宜出行" in driver.title:
                return self.after_smoothly_login(driver)
            elif "手机统一登录" in driver.title:
                return None

        except:
            # driver.close()
            return None

class CookieException(Exception):
    # 创建一个异常类,用于在cookie失效时抛出异常
    def __init__(self):
        Exception.__init__(self)

"""
if __name__ == "__main__":
    #L = Login(qq_num="xxxx", qq_passwd="xxxx")
    #L.get_cookie_by_Chrome()
"""

  

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 14:22:58
# Desc   : 宜出行热力图


import hashlib
import socket
import os
import json
import time
import random
import datetime
import requests
from yichuxing.settings import qq_list, s_fre, proxyMeta, is_proxy
from requests.exceptions import RequestException
#from utils.user_angents import agents
from data_utils.ali_oss import OSS2
from data_utils.time_convert import get_time_stamp
from yichuxing.yichuxing_utils.qqlogin import CookieException, Login
from data_utils.conmongodb import mongo_con_keepalive
from yichuxing.yichuxing_utils.create_grid import create_grid_by_center, get_gd_data


class Crawl():
    db = mongo_con_keepalive()
    header = {
              "Host": "c.easygo.qq.com",
              "Connection": "keep-alive",
              "Accept": "application/json",
              "Accept-Encoding": "gzip, deflate",
              "Accept-Language": "zh-CN,zh;q=0.9",
              "X-Requested-With": "XMLHttpRequest",
              "Referer": "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw",
              "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
            }
    start_url = "http://c.easygo.qq.com/api/egc/heatmapdata"
    cookie_data = None
    if is_proxy:
        wait_time = [0.16, 0.17]
    else:
        wait_time = [3, 3.1, 3.2, 3.3, 3.4]

    time_stamp = get_time_stamp()
    time_local = time.localtime(int(time_stamp))
    date = time.strftime("%Y-%m-%d", time_local)
    proxies = {
        "http": proxyMeta,
        "https": proxyMeta,
    }
    fre_data = {"qq": None,"pwd": None}
    fre = 0
    pid = os.getpid()
    oss = OSS2()
    path_dir = None
    website = "population_yichuxing"
    qq_status = "yichuxing_qq_status"

    def __init__(self):
        self.path_dir = "population/yichuxing/{0}/".format(self.time_stamp)
        self.db.get_collection('pathdir_dict').insert_one(
            {'pathdir': self.path_dir, 'website': self.website, 'flag': False}
        )
        if self.db.get_collection(self.qq_status).find_one({"date": self.date}) is None:
            self.db.get_collection(self.qq_status).remove({})
            print("新的一天,新的开始 初始化所有账号")
            self.db.get_collection(self.qq_status).insert_many(
                [{"qq": i["qq"], "pwd": i["pwd"], "n": 0,
                  'status': False, "date": self.date} for i in qq_list]
                )
        super(Crawl, self).__init__()

    def kill(self):
        try:
            os.system("kill {0}".format(self.pid))
        except OSError as e:
            print("kill pid error: ", e)

    def get_cookie(self):
        all_qq = self.db.get_collection(self.qq_status).find(
            {"status": False}, {"_id": 0}
        )
        qq_list = [i for i in all_qq]
        if len(qq_list) > 0:
            self.fre = 0
            self.fre_data = random.choice(qq_list)
            qq_num = self.fre_data.get("qq")
            qq_passwd = self.fre_data.get("pwd")
            L = Login(qq_num=qq_num, qq_passwd=qq_passwd)
            cookie_data = L.get_cookie_by_PhantomJS()
            #cookie_data = L.get_cookie_by_Chrome()
            if cookie_data:
                self.cookie_data = cookie_data
        elif len(qq_list) == 0:
            print("没有账号了, 杀死自己")
            self.kill()


    def spyder_params(self, item):
        # 生成 请求参数
        params = {"lng_min": item.get("lng_min"),
                  "lat_max": item.get("lat_max"),
                  "lng_max": item.get("lng_max"),
                  "lat_min": item.get("lat_min"),
                  "level": 16,
                  "city": "",
                  "lat": "undefined",
                  "lng": "undefined",
                  "_token": ""
                }
        return params

    def spyder(self, params):
        time.sleep(random.choice(self.wait_time))
        try:
            if self.fre >= s_fre:
                print("账号: {0}, 抓取次数达到上限, 更换qq账号".format(self.fre_data.get("qq")))
                qq = self.fre_data.get("qq")
                self.db.get_collection(self.qq_status).update_one(
                    {"qq": qq}, {"$set": {"status": True}}
                )
                self.get_cookie()
            if is_proxy:
                r = requests.get(self.start_url, headers=self.header,
                             cookies=self.cookie_data, params=params, proxies=self.proxies)
            else:

                r = requests.get(self.start_url, headers=self.header,
                             cookies=self.cookie_data, params=params)
            if r.status_code == 200:
                self.fre = self.fre + 1
                try:
                    return r.json()
                except:
                    raise CookieException
            else:
                raise CookieException
        except RequestException :
            self.spyder(params)

    def get(self, params):
        data_json = None
        try:
            data_json = self.spyder(params)
        except CookieException:
            print("账号: {0}, cookie 失效,获取新账号登陆, 并抓取".format(
                  self.fre_data.get("qq")))
            qq = self.fre_data.get("qq")
            self.db.get_collection(self.qq_status).update_one(
                {"qq": qq}, {"$set": {"status": True}}
            )
            self.get_cookie()
            data_json = self.spyder(params)
        return data_json

    def create_filename(self, url):
        # 生成文件名
        fname = '%s_%s_%s_%s.json' % (socket.gethostname(),
                                          url.split('//')[-1].split('/')[0].replace('.', '-'),
                                          hashlib.md5(url.encode()).hexdigest(),
                                          str(time.time()).split('.')[0])
        return fname

    def start(self):
        self.get_cookie()
        for i in get_gd_data():
            print("begin: ", i)
            latlng_dict = create_grid_by_center(i)
            print("将要抓取的次数: ", len(latlng_dict))
            for o in latlng_dict:
                print("抓取范围: ", o)
                params = self.spyder_params(o)
                data_json = self.get(params)
                file_ = "{0}{1}".format(self.path_dir, self.create_filename("{0}{1}".format(self.start_url, params)))
                if data_json.get("code") != 0:
                    print("code: {0}, 获取新的账号,再一次抓取".format(data_json.get("code")))
                    qq = self.fre_data.get("qq")
                    self.db.get_collection(self.qq_status).update_one(
                        {"qq": qq}, {"$set": {"status": True}}
                    )
                    self.get_cookie()
                    data_json = self.get(params)

                if data_json.get("code") == 0 and len(data_json.get("data")) > 0:
                    data_json["cityname"] = o["cityname"]
                    #print(data_json)
                    self.oss.uploadfiledata(file_, json.dumps(data_json))
        co = self.db.get_collection(self.qq_status).find({"status": False}).count()
        print("剩余可用qq count: ", co)



if __name__ == "__main__":
    c = Crawl()
    c.start()

  

# 每个账号抓取次数
s_fre = 70
# 每次爬取方格的边长(0.04 > 4公里) 平移量
lat_offset = 0.04
lng_offset = 0.04
# 是否开始代理 True:开启, False:不开启
is_proxy = True
grade = {0:6, 1: 6, 2: 5, 3: 4, 4: 4, 5: 4}  # 城市对应 抓取圈数
# 代理ip地址
proxyMeta = "http://xxx:xxx@proxy.abuyun.com:9020" 
# qq 账号
qq_list = [
{"qq": "xxx", "pwd": "xxx"},
]

  

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date   : 18/03/23 16:28:43
# Desc   :

import json
import numpy as np
from yichuxing.settings import lat_offset, lng_offset, grade
from data_utils.conmongodb import mongo_con_keepalive
from data_utils.location_convert import bd09togcj02


db = mongo_con_keepalive()

def get_gd_data():
    city_list = db.get_collection("params_citys").find({"exists_city": True}, {"_id": 0}).sort(
        "class")
    location = []
    for city in city_list:
        if city.get("province") != "广东省":
            if city.get("class") == 3:
                center_lng = city.get("center_lng")
                center_lat = city.get("center_lat")
                del city["center_lng"]
                del city["center_lat"]
                lng, lat = bd09togcj02(center_lng, center_lat)  # 转腾讯坐标系
                city["lng"] = lng
                city["lat"] = lat
                location.append(city)
    return location

def create_grid_by_center(location, n=None):
    # 以城市中心点辐射n圈 即 4*4*(5*2)`2 1600平方公里
    # 4*4 是每个区域的大小 区域大小可在setting里设置, (5*2)`2  5是圈数


    lng, lat, city_class, cityname = location["lng"], location["lat"], location["class"], location["cityname"]
    if n is None:
        n = grade.get(city_class)
    n = float(n)
    bottom_lat, top_lat = lat - lat_offset*n, lat + lat_offset*n
    left_lng, right_lng = lng - lng_offset*n, lng + lng_offset*n
    lat_range = np.arange(bottom_lat, top_lat, lat_offset)
    end_data = []

    for lat_ in lat_range:
        lng_range = np.arange(left_lng, right_lng, lng_offset)
        for lng_ in lng_range:
            end_data.append({"lng_min": lng_,
                            "lat_max":lat_ + lat_offset ,
                            "lng_max": lng_ + lng_offset,
                            "lat_min": lat_,
                            "cityname": cityname})
    return end_data

  

        "cityname" : "北京市",
	"province" : "北京市",
	"citycode" : "131",
	"center_lat" : 39.904211,   # 百度坐标
	"center_lng" : 116.407394,
	"class" : 0,
	"ftx_code" : "bj",
	"meituan_code" : "beijing",
	"meituan_id" : 1,
	"dianping_id" : 2,
	"dianping_code" : "beijing",
	"gd_adcode" : "110000",
	"gd_citycode" : "010",
	"shunqi_code" : "beijing",
	"xiecheng_code" : "BJS",
	"xiecheng_status" : true,
	"zhilian_code" : "beijing",
	"baidu_id" : 131,
	"exists_city" : true  
{
    "scale" : "20,50,100,200",
    "lng_a" : 116.550125,
    "lat_a" : 39.843624999999996,
    "lng_b" : 116.55662935278988,
    "lat_b" : 39.84962393215385,
    "lng_g" : 116.54429316621265,
    "lat_g" : 39.842540318493164,
    "gps_s" : "a",
    "count" : 800,
    "grid_y" : 159374,
    "grid_x" : 466200,
    "max_data" : 32000,
    "crawl_time" : "2018-05-29 10:03:37",
    "city" : "北京市",
}

经纬度解密代码

http://c.easygo.qq.com/eg_toc/js/map-d76c21c16d.bundle.js 

            lng = 1e-6 * (250.0 * d['grid_x'] + 125.0)

            lat = 1e-6 * (250.0 * d['grid_y'] + 125.0)

  

 教程仅供技术研究学习使用,若有侵权,联系本人删除

原文地址:https://www.cnblogs.com/dockers/p/9238535.html