Mongo 高级查询

Aggregate

使用Mongodb内置的原生操作，聚合效率非常高类似于SQL Group By 操作。
每个阶段管道限制为100MB的内存

SQL	Mongodb
where	$match
group by	$group
having	$match
selet	$project
order by	$sort
limit	$limit
sum()	$sum
count()	$sum
join	$lookup

db.collection.aggregate(pipeline<array>, options<document>)

查询记录条数

db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: 1}
	}
})
// Sql
select count(*) as count from table;

查询某个字段之和

db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: "$age"}
	}
})
// Sql
select sum(age) as count from table;

以某个字段为键，求和

db.collection.aggregate({
	$group : {
		_id: "$sex",
		count: {$sum: "$age"}
	}
})
// Sql
select sex, sum(age) as count from table group by sex;

多个字段为键，进行求和

db.collection.aggregate({
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
})
// sql
select crawl_name, get_date, count(*) from table group by crawl_name, get_date;

对聚合的字段进行过滤

db.collection.aggregate([{
    $match: {
        get_date: {
            $gte: 20200701
        }
    }
}, {
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
}, {
    $match: {
        count: {
            $gte: 10
        }
    }
}])
// sql
select crawl_name, get_date, count(*) 
from table 
where get_date >= 20200701
group by crawl_name, get_date
having count(*) > 10;

MapReduce

var map = function() {
        emit(this.crawl_name, { count: 1 });
}

var reduce = function(key, emits) {
    total = 0
    for (var i in emits) {
        total += emits[i].count;
    }
    return {
        "count": total
    };
}

mr = db.runCommand({
    "mapreduce": "collection",
    "map": map,
    "reduce": reduce,
	"out": {inline: 1}
})