getTable

import pymysql

import xlrd

import re

import linecache

import docx

import sys

import docx

from docx import Document #导入库

import prettytable as pt

import xlwt

#连接数据库

try:

db = pymysql.connect(host="localhost",user="root",

passwd="123456",

db="zch",

charset='utf8')

except:

print("could not connect to mysql server")

#创建excel存储

def open_excel():

try:

book = xlrd.open_workbook("test.xlsx") #文件名，把文件与py文件放在同一目录下

except:

print("open excel file failed!")

try:

sheet = book.sheet_by_name("test") #execl里面的worksheet1

return sheet

except:

print("locate worksheet in excel failed!")

#向数据库插入数据

def insert_deta(x,y):

sheet = open_excel()

cursor = db.cursor()

for b in range(1, sheet.nrows): #第一行是标题名，对应表中的字段名所以应该从第二行开始，计算机以0开始计数，所以值是1

row_data = sheet.row_values(b)

value = (row_data[0],row_data[1],row_data[2],row_data[3],row_data[4],x,y)

sql = "INSERT INTO enclosure(sheet_nrow,sheet_content1,sheet_content2,sheet_content3,sheet_content4,file_docx_id,file_txt_id)VALUES(%s,%s,%s,%s,%s,%s,%s)"

cursor.execute(sql,value) #执行sql语句

db.commit()

cursor.close() #关闭连接

cursor = db.cursor()

sql1=cursor.execute("select file_docx_id from file_docx")

info=cursor.fetchmany(sql1)#获取查询结果

#遍历id获得id对应的文件地址，取出并在本机搜索读取

for i in info:

　　i = int(i[0])

　　print(i)

　　sql = cursor.execute('select file_txt_id,file_docx_name from file_docx where file_docx_id=%d'%i)

　　info = cursor.fetchmany(sql)

　　info1 = int(info[0][0])

　　category = cursor.execute('select file_category_id from file_txt where file_txt_id=%d'%info1)

　　file_category=cursor.fetchmany(category)

　　address = cursor.execute('select file_docx_address from file_category where file_category_id=%d'%file_category[0][0])

　　file_docx_address=cursor.fetchmany(address)

　　if file_docx_address[0][0] is None :

　　　　print("当前文件已经全部遍历完")

　　　　break

　　else:

　　　　path=file_docx_address[0][0] +"\"+ info[0][1]

　　　　document = Document(path)

　　ables = document.tables #获取文件中的表格集

　　tb = pt.PrettyTable()

　　book = xlwt.Workbook(encoding = 'utf-8')

　　test1 = book.add_sheet(u'test',cell_overwrite_ok = True)

　　for j in range(0,2):

# table = tables[j]#获取文件中的第i-1个表格

　　　　try:

　　　　　　print('找到表格')

　　　　　　table = tables[j]

　　　　　　result = []

　　　　　　for q in range(0,len(table.rows)):#从表格第一行开始循环读取表格数据

　　　　　　　　for a in range(0,len(table.columns)):#从第一列开始循环读取表格数据

　　　　　　　　　　result = table.cell(q,a).text

　　　　　　　　　　test1.write(q,a,result)

　　　　　　book.save('test.xlsx')

　　　　　　print('运行成功')

　　　　except :

　　　　　　print('没有表格了')

　　　　　　break

　　insert_deta(i,info1)

db.close()#关闭数据

print ("ok ")