去游标

去游标

mongo

游标机制:

在遍历全表、集合的情况下,当表、集合本身在增量时,游

刚开始是一致的,后续,就有效增量为0

1094295 / 1300000 ---- {'_id': ObjectId('5b03c2a99341f521755dd7c1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:47

1094295 / 1305000 ---- {'_id': ObjectId('5b03cc479341f521755deb49')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:49

1094295 / 1310000 ---- {'_id': ObjectId('5b03d61a9341f521755dfed1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:50

1094295 / 1315000 ---- {'_id': ObjectId('5b03d6249341f521755e1259')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:52

1094295 / 1320000 ---- {'_id': ObjectId('5b03e79d9341f521755e25e1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:54

1094295 / 1325000 ---- {'_id': ObjectId('5b05107d9341f521755e3969')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:55

from ProjectUtil.usingModuleTOMODIFY import getNow, mysql_write, mysql_fetch, time, randomSleep, return_logging
import os, random
from pymongo import MongoClient

'''
logging INIT
'''
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os.sep)[
    -1]
f_log = '{}{}{}'.format(time.strftime('%Y%m%d', time.localtime(time.time())),
                        this_file_name, '.log')
logging = return_logging(f_log)
# 打开mongo连接
host, username, password = '10.14.14.52', 'ain', 'adm'
uri = "mongodb://%s:%s@%s" % (username, password, host,)
mongo_ask_id_f = 'mongo_ask_id.txt'
c = 0
start_ = getNow()
os.remove(mongo_ask_id_f)
id_l = []
while True:
    try:
        mongo_client = MongoClient(uri)
        db = mongo_client.superpub
        c_ask = db.ask
        # c_similar_keyword = db.similar_keyword
        cursor = c_ask.find({}, {'_id': 1})
        while cursor.alive:
            for doc in cursor:
                c += 1
                # 猜测游标机制
                # 使得游标下移速度不小于数据增长速度(该表只增)
                if c % 100 == 0:
                    # print(c,'猜测游标机制')
                    continue
                id_ = doc['_id']
                with open(mongo_ask_id_f, 'a', encoding='utf-8') as fa:
                    s = '{}
'.format(id_)
                    fa.write(s)
                if c % 1000 == 0:
                    print('----------------------------',c)
                    with open(mongo_ask_id_f, 'r', encoding='utf-8') as fr:
                        l = [i.replace('
', '') for i in fr]
                        print(len(set(l)), '/', c, '----', doc)
                        del l
                    s = '{}{}{}{}
'.format('start:', start_, 'now:', getNow())
                    print(s)
    except Exception as e:
        print(e)
    try:
        mongo_client.close()
    except Exception as e:
        print(e)

  

原文地址:https://www.cnblogs.com/rsapaper/p/10008616.html