Python从MongoDB中按天读取数据并格式化日志

#$cat SpeechMongoHandle.py 
from pymongo import Connection
import time
import datetime

# CTRL_A='x01'
# CTRL_B='x02'

CTRL_A='--'

def getEveryDay(begin_date,end_date):
    date_list = []
    begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
    end_date = datetime.datetime.strptime(end_date,"%Y-%m-%d")
    while begin_date < end_date:
        # date_str = begin_date.strftime("%Y-%m-%d")
        mid_date = begin_date + datetime.timedelta(days=1)
        tup = (str(begin_date),str(mid_date))
        date_list.append(tup)
        begin_date = mid_date
    return date_list
days = getEveryDay('2017-01-01','2017-01-02')


# MongoDB Connect
client = Connection('syslog-1',27017)
db_name = 'service'
db = client[db_name]
collection = db.speech

for day in days:
    tmpArray0 = time.strptime(str(day[0]), "%Y-%m-%d %H:%M:%S")
    tmpArray1 = time.strptime(str(day[1]), "%Y-%m-%d %H:%M:%S")
    timestamp0 = int(time.mktime(tmpArray0)*1000)
    timestamp1 = int(time.mktime(tmpArray1)*1000)
    print day
    print (timestamp0,timestamp1)

    results = collection.find({"time":{'$gte':timestamp0,'$lt':timestamp1}})

    for res in results:
        keys = res.keys()
        if "time" in keys:
            unixtimestamp = str(int(res["time"]))
            timetemp = time.localtime(int(unixtimestamp)/1000)
            thedate = time.strftime("%Y-%m-%d %H:%M:%S", timetemp)
        if "id" in keys:
            sn = res["id"]
        if "asr" in keys:
            asr = res["asr"]
        if "nlp" in keys:
            nlp = res["nlp"]
        if "domain" in keys:
            domain = res["domain"]
        if "intent" in keys:
            intent = res["intent"]

        print thedate + CTRL_A + sn + CTRL_A + asr + CTRL_A + nlp + CTRL_A + domain + CTRL_A + intent

应对场景为:MongoDB中历史数据巨多接近一年多,需要想办法将数据读出并且格式化上传到集群上。就想出了按天读取数据

或许还有最优解。。。

原文地址:https://www.cnblogs.com/zhzhang/p/6843615.html