getTable

import pymysql
import xlrd
import re
import linecache
import docx
import sys
import docx
from docx import Document #导入库
import prettytable as pt
import xlwt

#连接数据库
try:
db = pymysql.connect(host="localhost",user="root",
passwd="123456",
db="zch",
charset='utf8')
except:
print("could not connect to mysql server")

#创建excel存储
def open_excel():
try:
book = xlrd.open_workbook("test.xlsx") #文件名,把文件与py文件放在同一目录下
except:
print("open excel file failed!")
try:
sheet = book.sheet_by_name("test") #execl里面的worksheet1
return sheet
except:
print("locate worksheet in excel failed!")


#向数据库插入数据
def insert_deta(x,y):
sheet = open_excel()
cursor = db.cursor()
for b in range(1, sheet.nrows): #第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1
row_data = sheet.row_values(b)
value = (row_data[0],row_data[1],row_data[2],row_data[3],row_data[4],x,y)
sql = "INSERT INTO enclosure(sheet_nrow,sheet_content1,sheet_content2,sheet_content3,sheet_content4,file_docx_id,file_txt_id)VALUES(%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql,value) #执行sql语句
db.commit()
cursor.close() #关闭连接


cursor = db.cursor()
sql1=cursor.execute("select file_docx_id from file_docx")
info=cursor.fetchmany(sql1)#获取查询结果
#遍历id获得id对应的文件地址,取出并在本机搜索读取
for i in info:
  i = int(i[0])
  print(i)
  sql = cursor.execute('select file_txt_id,file_docx_name from file_docx where file_docx_id=%d'%i)
  info = cursor.fetchmany(sql)
  info1 = int(info[0][0])
  category = cursor.execute('select file_category_id from file_txt where file_txt_id=%d'%info1)
  file_category=cursor.fetchmany(category)
  address = cursor.execute('select file_docx_address from file_category where file_category_id=%d'%file_category[0][0])
  file_docx_address=cursor.fetchmany(address)
  if file_docx_address[0][0] is None :
    print("当前文件已经全部遍历完")
    break
  else:
    path=file_docx_address[0][0] +"\"+ info[0][1]
    document = Document(path)
  ables = document.tables #获取文件中的表格集
  tb = pt.PrettyTable()
  book = xlwt.Workbook(encoding = 'utf-8')
  test1 = book.add_sheet(u'test',cell_overwrite_ok = True)
  for j in range(0,2):
# table = tables[j]#获取文件中的第i-1个表格
    try:
      print('找到表格')
      table = tables[j]
      result = []
      for q in range(0,len(table.rows)):#从表格第一行开始循环读取表格数据
        for a in range(0,len(table.columns)):#从第一列开始循环读取表格数据
          result = table.cell(q,a).text
          test1.write(q,a,result)
      book.save('test.xlsx')
      print('运行成功')
    except :
      print('没有表格了')
      break
  insert_deta(i,info1)
db.close()#关闭数据
print ("ok ")
原文地址:https://www.cnblogs.com/zyl-kx/p/11375177.html