Mongo 基础命令

插入

db.collection.insert({"name": "iFan"})
# 批量插入
db.collection.insertMany([
    {"name": "iFan2"}, {"name": "iFan1"}
])

mongodb能接受的最大消息长度为48M，如果当前的数据长度超过了48M，多数驱动程序会将该批量插入拆分为多个48M的请求。

如果在批量插入时，一个文档插入失败，则之前的文档是会插入成功的，但是之后的文档不会在进行插入了，可以使用continueOnError忽略错误。

删除

db.collection.drop() // 删除所有文档
db.collection.remove()
db.collection.deleteOne() 
db.collection.deleteMany()

更新

db.collection.update({查询条件}, {需要替换的数据}) // 对于符合条件的文档，整个替换
db.collection.update({查询条件}, {"$set": {需要更新的字段}}) // 对于符合条件的文档，原始文档中字段存在的则更新，不存在则添加
db.collection.update({查询条件}, {"$unset": {需要删除的字段}}) // 删除字段
db.collection.update({查询条件}, {"$inc": {"cnt": 1}}) // 对cnt字段进行+1, 如果不存在则创建
db.collection.update({查询条件}, {"$push": {"key": "value"}}) // 对key数组进行添加
db.collection.update({查询条件}, {"$push": {"key": {"$ne": "value"}}}) // 如果key数组中不存在value，则添加
db.collection.update({查询条件}, {"$push": {"key": {"$each": [添加多个元素], "$slice": -10}}}) // $each 可以一次性添加多个元素，$slice只保留最后添加的10个元素。
db.collection.update({查询条件}, {"$addToSet": {"key": "value"}}) // 保证添加的数据不会重复
db.collection.update({查询条件}, {"$pop": {"key": -1}}) // 从头部删除
db.collection.update({查询条件}, {"$pull": {"key": "条件"}}) // 根据条件删除
db.collection.update({查询条件}, {修改信息}, true) // 没有找到符合条件的文档，则将条件和更新文档为基础创建一个新文档
db.collection.updateMany()

当执行更新操作增加的文档大小超过了该文档分配的空间，更新操作会在磁盘上重定位该文档。

Mongodb不得不移动一个文档时，它会修改集合中的填充因子，填充因子是Mongodb为每个新文档预留的增长空间，使用db.coll.stats()查询。

查询

db.collection.find({查询条件}, {需要显示的字段})
db.collection.find({"name": {"$in": ['a', 'b']}}) // 一个键对应多个值
db.collection.find({"name": {"$nin": ['a', 'b']}}) // 不存在
db.collection.find({"name": {"$ne": "a"}}) // 不等于
db.collection.find({"$or": [{"name": "a"}, {"age": 18}]}) // OR
db.collection.find({"name": {"friend": null, "$exists": true}}) // 判断 name的friend是否存在并且是否为null
db.collection.find().count()
db.collection.count() // 查询集合的长度
db.collection.find({"age": {"$lt": 10}})   // age < 10
db.collection.find({"age": {"$lte": 10}})  // age <= 10
db.collection.find({"age": {"$gt": 10}})   // age > 10
db.collection.find({"age": {"$gte": 10}})  // age >= 10
db.collection.find({"age": {"$ne": 10}})   // age != 10

查询数组

db.collection.insert({"friend": ["a", "b", "c"]}) // 插入数组
db.collection.find({"friend": "a"}) // 存在a
db.collection.find({"friend": ["a", "b"]}) // 精确匹配。顺序，内容全部一致。
db.collection.find({"friend": {"$all": ["a", "b"]}}) // 既有a，也有b
db.collection.find({"friend": {"$size": 3}}) // 指定长度的数组。
db.collection.find({}, {"firend": {"$slice": -10}}) // 返回后10个
db.collection.find({}, {"firend": {"$slice": [10, 20]}}) // 返回第10到第20个，除非特别声明，否则使用 $slice 时将返回文档中的所有键。
db.collection.find({}, {"friend.$": 1}) // 返回朋友中的第1个人

查询内嵌文档

db.collection.find({"name.first": "i", "name.last": "Fan"}) // 查询内嵌文档。
db.collection.find({"name": {"$elemMatch": {"first": "i", "last": "Fan"}}}) // 在查询条件中部分指定匹配数组中的单个内嵌文档。

使用正则查询

db.collection.find({"name": /iFan/i}) // 使用正则表达式判断
// Mongodb使用Perl兼容的正则表达式库来匹配正则表达式
// mongodb可以为前缀表型正则表达式创建索引 `/^iFan/`，所以这类搜索会比较高效。

限制显示字段

db.collection_name.find({}, {key: 1, key2: 0}) // 查询字段：1->显示；0->不显示

分页和排序

db.collection.find().limit(number) // 限制显示条数
db.collection.find().limit(number).skip(number) //跳过文档
db.collection.find().sort({key:1, key2: 0}) // 根据key1升序，key2降序

Aggregate

使用Mongodb内置的原生操作，聚合效率非常高类似于SQL Group By 操作。
每个阶段管道限制为100MB的内存

SQL	Mongodb
where	$match
group by	$group
having	$match
selet	$project
order by	$sort
limit	$limit
sum()	$sum
count()	$sum
join	$lookup

db.collection.aggregate(pipeline<array>, options<document>)

查询记录条数

db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: 1}
	}
})
// Sql
select count(*) as count from table;

查询某个字段之和

db.collection.aggregate({
	$group : {
		_id: null,
		count: {$sum: "$age"}
	}
})
// Sql
select sum(age) as count from table;

以某个字段为键，求和

db.collection.aggregate({
	$group : {
		_id: "$sex",
		count: {$sum: "$age"}
	}
})
// Sql
select sex, sum(age) as count from table group by sex;

多个字段为键，进行求和

db.collection.aggregate({
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
})
// sql
select crawl_name, get_date, count(*) from table group by crawl_name, get_date;

对聚合的字段进行过滤

db.collection.aggregate([{
    $match: {
        get_date: {
            $gte: 20200701
        }
    }
}, {
    $group: {
        _id: {
            crawl_name: "$crawl_name",
            get_date: "$get_date"
        },
        count: {
            $sum: 1
        }
    }
}, {
    $match: {
        count: {
            $gte: 10
        }
    }
}])
// sql
select crawl_name, get_date, count(*) 
from table 
where get_date >= 20200701
group by crawl_name, get_date
having count(*) > 10;

MapReduce

var map = function() {
        emit(this.crawl_name, { count: 1 });
}

var reduce = function(key, emits) {
    total = 0
    for (var i in emits) {
        total += emits[i].count;
    }
    return {
        "count": total
    };
}

mr = db.runCommand({
    "mapreduce": "collection",
    "map": map,
    "reduce": reduce,
	"out": {inline: 1}
})