Mongo 高级查询

Aggregate

  • 使用Mongodb内置的原生操作,聚合效率非常高类似于SQL Group By 操作。
  • 每个阶段管道限制为100MB的内存
SQL Mongodb
where $match
group by $group
having $match
selet $project
order by $sort
limit $limit
sum() $sum
count() $sum
join $lookup
db.collection.aggregate(pipeline<array>, options<document>)
  1. 查询记录条数
db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: 1}
	}
})
// Sql
select count(*) as count from table;
  1. 查询某个字段之和
db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: "$age"}
	}
})
// Sql
select sum(age) as count from table;
  1. 以某个字段为键,求和
db.collection.aggregate({
	$group : {
		_id: "$sex",
		count: {$sum: "$age"}
	}
})
// Sql
select sex, sum(age) as count from table group by sex;
  1. 多个字段为键,进行求和
db.collection.aggregate({
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
})
// sql
select crawl_name, get_date, count(*) from table group by crawl_name, get_date;
  1. 对聚合的字段进行过滤
db.collection.aggregate([{
    $match: {
        get_date: {
            $gte: 20200701
        }
    }
}, {
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
}, {
    $match: {
        count: {
            $gte: 10
        }
    }
}])
// sql
select crawl_name, get_date, count(*) 
from table 
where get_date >= 20200701
group by crawl_name, get_date
having count(*) > 10;

MapReduce

var map = function() {
        emit(this.crawl_name, { count: 1 });
}

var reduce = function(key, emits) {
    total = 0
    for (var i in emits) {
        total += emits[i].count;
    }
    return {
        "count": total
    };
}

mr = db.runCommand({
    "mapreduce": "collection",
    "map": map,
    "reduce": reduce,
	"out": {inline: 1}
})

原文地址:https://www.cnblogs.com/iFanLiwei/p/14087326.html