excel 数据入库

import logging
import pymongo
import os
import pandas as pd
import numpy as np
from tqdm import tqdm


def excel_mongo(file_path, db_name, table_name, sheet_num=True, ip_address='127.0.0.1', port=27017):
"""
file_path excel 文件的路径
db_name 数据库名称
table_name 数据库表的名称
sheet_num 是否多个sheet导入
ip_address 导入数据库的ip地址
"""
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
logging.basicConfig(filename='mongo_insert.log', level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
excel_data_list = []
logging.info("------- " + str(ip_address) + ' 数据库导入ip地址----')
if os.path.isfile(file_path):
file = os.path.splitext(file_path)
_, type = file
if type !='.xls' and type != '.xlsx':
logging.info("------- " + str(file_path) + ' 非excel文件类型----')
return
logging.info("------- "+str(file_path) + ' 路径下的excel数据开始读取----')
if sheet_num:
"""单个sheet 导入mongo"""
df = pd.read_excel(file_path, keep_default_na=False)
excel_header = list(df.columns)
for data in df.values:
row_data = dict(zip(excel_header, data))
excel_data_list.append(row_data)
else:
"""多 sheet 导入"""
df = pd.read_excel(file_path, keep_default_na=False, sheet_name=None)
for data_obj in df.values():
excel_header = list(data_obj.columns)
data_array = np.array(data_obj)
for data in data_array:
row_data = dict(zip(excel_header, data))
excel_data_list.append(row_data)
client = pymongo.MongoClient(ip_address, port=port)
db_insert = client[db_name][table_name]
try:
db_insert.insert_many(excel_data_list)
logging.info(f'{table_name}---数据入库成功')
except Exception as e:
logging.error(f'{table_name}入库失败--数据表中有部分没有表头,请查看! 错误类型:{e}')
print(f'{table_name}入库失败--数据表中有部分没有表头,请查看!')
dn_count = db_insert.find().count()
excel_count = df.shape[0]
if dn_count == excel_count:
logging.info(f'{table_name}---数据没有遗失')
else:
logging.error(f'{table_name}---数据遗失,删除数据库')
db_insert.drop()
else:
logging.warning("------- " + str(file_path) + ' 非文件类型----')


def to_mongo(path, ip_address, db='BiaoQian_3'):
for file in tqdm(os.listdir(path)):
file_path = os.path.join(path, file)
if os.path.isfile(file_path):
cate = path.split('\')[-1]
file_name, _ = os.path.splitext(file)
table_name = cate+'/'+file_name
excel_mongo(file_path, db, table_name, ip_address=ip_address)
else:
continue
print(f'{path}--excel 表格全部入库')


if __name__ == '__main__':
path = r'E:标签组数据第五次文旅'
ip_address = '192.168.0.147'
# db, 可指定数据库名称: 默认BiaoQian_3
db = 'BiaoQian_5'
to_mongo(path, ip_address,db)
原文地址:https://www.cnblogs.com/lqn404/p/13612201.html