python连接MongoDB

一、MongoDB介绍

MongoDB 是一个基于分布式文件存储的数据库。由 C++ 语言编写。旨在为 WEB 应用提供可扩展的高性能数据存储解决方案。MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的。

在做爬虫的时候,抓取的数据是一些结构化的信息,字典结构的数据,在存储的过程中,不需要关心表的结构,可以直接将字典的结构进行动态的插入到MongoDB中即可,它是一个key-value关系的存储。

二、MongoDB安装(window下安装)

下载链接:https://www.mongodb.com/

网盘下载:https://pan.baidu.com/s/19HfmwU0ibx2qMsF8E-8DWw   提取码:m03v

MongoDB可视化工具:Robo 3T,下载链接:https://robomongo.org/download

三、python安装MongoDB

pip install pymongo -i http://pypi.douban.com/simple --trusted-host pypi.douban.com

四、python操作MongoDB

4.1 连接MongoDB

 1 import pymongo
 2 
 3 mongo_host = "localhost"
 4 mongo_db = "mydb"
 5 mongo_table = "people_info"
 6 
 7 # 连接MongoDB
 8 client = pymongo.MongoClient(mongo_host)
 9 db = client[mongo_db] #使用mydb数据库,或者client.mydb,没有则创建
10 table = db[mongo_table] #创建表(集合),或db.people_info

4.2 往MongoDB插入一条数据

1 info = {
2     "name": "耗子尾汁",
3     "age": 20,
4     "sex":"girl",
5     "address":"北京海淀"
6 }
7 
8 table.insert(info)

4.3 往MongoDB插入多条数据

 1 results = {
 2     "chinese": "优秀",
 3     "English": "良好",
 4     "math":"不及格",
 5     "history":"优秀",
 6     "political":"优秀",
 7     "geographic":"极差"
 8 }
 9 
10 table.insert([info,results])

 4.4 查询数据

# 查询一条数据
res = table.find_one({'sex':'girl'})
print(res) #{'_id': ObjectId('5fdc76e1404d12a17a0c438a'), 'name': '耗子尾汁', 'age': 20, 'sex': 'girl', 'address': '北京海淀'}

# 查询全部数据
results = table.find()
print(type(results)) #<class 'pymongo.cursor.Cursor'>
for i in table.find():
    print(i)

4.5 更新数据

# 把sex:girl改为boy,第一个大括号里为更新条件,第二个大括号为更新之后的内容
table.update({"sex":"girl"},{"$set":{"sex":"boy"}})

4.6 删除数据

1 # 删除集合table中的所有数据
2 table.remove()
3 
4 # 删除sex=girl的记录
5 table.remove({"sex":"girl"})
6 
7 # 删除全部数据,包括数据库名和表,慎重使用这个
8 # table.drop()

五、封装python操作MongoDB

  1 import pymongo
  2 import sys
  3 
  4 class ConnectMongo(object):
  5 
  6     def __init__(self,host="localhost",db='mydb',):
  7         self.__host = host
  8         self.__db = db
  9         try:
 10             client = pymongo.MongoClient(self.__host)
 11             self.db = client[self.__db]
 12         except Exception as e:
 13             print(e)
 14 
 15     def use_collection(self,collection):
 16         try:
 17             collect_table = self.db[collection]
 18         except Exception as e:
 19             print(e)
 20         else:
 21             return collect_table
 22 
 23     def insert_one_data(self,data,collection):
 24         """
 25         :param data: 插入的数据
 26         :param collection: 插入集合
 27         :return:
 28         """
 29         try:
 30             self.use_collection(collection).insert_one(data)
 31         except Exception as e:
 32             print(e)
 33 
 34     def insert_many_data(self,documents,collection):
 35         """
 36         :param args: 插入多条数据
 37         :param collection:
 38         :return:
 39         """
 40         if not isinstance(documents,list):
 41             raise TypeError("参数必须是一个非空的列表")
 42         for item in documents:
 43            try:
 44                 self.use_collection(collection).insert_many([item])
 45            except Exception as e:
 46                 print(e)
 47                 return None
 48 
 49     def query_one_data(self,query_parame,collection):
 50         "查询一条数据"
 51         if not isinstance(query_parame,dict):
 52             raise TypeError("查询参数必须为dict类型")
 53         try:
 54            res = self.use_collection(collection=collection).find_one(query_parame)
 55            return res
 56         except Exception as e:
 57             print(e)
 58 
 59     def query_all_data(self,collection,query_parame=None,limit_num = sys.maxsize):
 60         "查询多条数据"
 61         table = self.use_collection(collection)
 62         if query_parame is not None:
 63             if not isinstance(query_parame,dict):
 64                 raise TypeError("查询参数必须为dict类型")
 65         try:
 66             query_results = table.find(query_parame).limit(limit_num)# limit限制结果集查询数量
 67             res_list = [res for res in query_results]
 68             return res_list
 69         except Exception:
 70             return None
 71 
 72     def update_collection(self,query_conditions,after_change,collection):
 73         """
 74         :param query_conditions: 目标参数
 75         :param after_change: 需要更改的数据
 76         """
 77         if not isinstance(query_conditions,dict) or not isinstance(after_change,dict):
 78             raise TypeError("参数必须为dict类型")
 79         res = self.query_one_data(query_conditions,collection)
 80         if res is not None:
 81             try:
 82                 self.use_collection(collection).update_one(query_conditions,{"$set":after_change})
 83             except Exception as e:
 84                 print(e)
 85                 return None
 86         else:
 87             print("查询条件不存在")
 88 
 89     def delete_collection(self,search,collection):
 90         "删除一条数据"
 91         if not isinstance(search,dict):
 92             raise TypeError("参数必须为dict类型")
 93         try:
 94             self.use_collection(collection).delete_one(search)
 95         except Exception as e:
 96             print(e)
 97 
 98     def delete_many_collection(self,search,collecton):
 99         try:
100             self.use_collection(collecton).delete_many(search)
101         except Exception as e:
102             return None
103 
104     def drop_collection(self,collection):
105         "删除集合"
106         try:
107             self.use_collection(collection).drop()
108             print("delete success")
109         except Exception:
110             return None
原文地址:https://www.cnblogs.com/yzmPython/p/14155873.html