Python-docx模块

Python-docx

python-docx包可以用来创建docx文档,并对现有文档进行更改,包含段落、分页符、表格、图片、标题、样式等几乎所有的word文档中能常用的功能都包含了

只能解析docx文件,解析不了doc文件

官方文档:

https://python-docx.readthedocs.io/en/latest/user/text.html

https://python-docx.readthedocs.io/en/latest/index.html

安装使用

pip3 install python-docx

案例一

from docx import Document #初始化对象
from docx.shared import Inches  #定义英尺
from docx.shared import Pt  #定义像素大小
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn #定义style的
from docx.shared import RGBColor

#打开docx文件
document = Document()

#增加一段
paragraph = document.add_paragraph('This is a demo.')

#在段落前直接插入一个新段落
prior_paragraph = paragraph.insert_paragraph_before('welcome!')

#这一类属性,每个有三种状态
#True 为使用属性;False 为不使用属性;None 默认属性继承自上一个字体
paragraph = document.add_paragraph()
paragraph.add_run('Lorem ipsum')
run = paragraph.add_run(' dolor')
run.bold = True
run.font.name=u'宋体'
r = run._element
r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
paragraph.add_run(' hello').underline = True 
paragraph.add_run(u'斜体、').italic = True 
paragraph.add_run(u'设置中文字体,') 
paragraph.add_run(u'设置字号').font.size=Pt(24) 

#添加文本
p = document.add_paragraph('test')

#文本居中
#WD_ALIGN_PARAGRAPH 存储了多种对齐格式
#例如:WD_ALIGN_PARAGRAPH.LEFT,左对齐;WD_ALIGN_PARAGRAPH.RIGHT,右对齐
p.alignment = WD_ALIGN_PARAGRAPH.CENTER 

#左缩进
p.left_indent = Inches(0.3)

#首行缩进
p.first_line_indent = Inches(0.3)

#上行间距
p.space_before = Pt(18)

#下行间距
p.space_after = Pt(12)

#添加标题
document.add_heading('The REAL meaning of the universe')
document.add_heading('The role of dolphins', level = 2)

#添加引用
document.add_paragraph('Intese quote',style="Intense Quote")

#添加分页符
document.add_page_break()

#添加表
table = document.add_table(rows=2,cols=2)
cell = table.cell(0,0)
cell.text = 'cell_00'
table.cell(0,1).text = 'cell_01'
row = table.rows[1]
row.cells[0].text = 'cell_10'
row.cells[1].text = 'cell_11'

#行列计数
row_count = len(table.rows)
col_count = len(table.columns)

#添加图片
document.add_picture('1.png',width=Inches(1.25))

#应用字符样式
paragraph = document.add_paragraph('Normal text, ')
paragraph.add_run('text with emphasis','Emphasis')

#增加有序列表
document.add_paragraph(
    u'有序列表元素1',style='List Number'
)
document.add_paragraph(
    u'有序列表元素2',style='List Number'
)

#增加无序列表
document.add_paragraph(
    u'无序列表元素1',style='List Bullet'
)
document.add_paragraph(
    u'无序列表元素2',style='List Bullet'
)
#或者paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
#   paragraph.style = 'ListBullet'

document.save('test.docx')

案例二

from docx import Document
from docx.shared import Inches

document = Document()

#添加标题,并设置级别,范围:0 至 9,默认为1
document.add_heading('Document Title', 0)

#添加段落,文本可以包含制表符(	)、换行符(
)或回车符(
)等
p = document.add_paragraph('A plain paragraph having some ')
#在段落后面追加文本,并可设置样式
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True

document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='Intense Quote')

#添加项目列表(前面一个小圆点)
document.add_paragraph(
    'first item in unordered list', style='List Bullet'
)
document.add_paragraph('second item in unordered list', style='List Bullet')

#添加项目列表(前面数字)
document.add_paragraph('first item in ordered list', style='List Number')
document.add_paragraph('second item in ordered list', style='List Number')

#添加图片
document.add_picture('monty-truth.png', width=Inches(1.25))

records = (
    (3, '101', 'Spam'),
    (7, '422', 'Eggs'),
    (4, '631', 'Spam, spam, eggs, and spam')
)

#添加表格:一行三列
# 表格样式参数可选:
# Normal Table
# Table Grid
# Light Shading、 Light Shading Accent 1 至 Light Shading Accent 6
# Light List、Light List Accent 1 至 Light List Accent 6
# Light Grid、Light Grid Accent 1 至 Light Grid Accent 6
# 太多了其它省略...
table = document.add_table(rows=1, cols=3, style='Light Shading Accent 2')
#获取第一行的单元格列表
hdr_cells = table.rows[0].cells
#下面三行设置上面第一行的三个单元格的文本值
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for qty, id, desc in records:
    #表格添加行,并返回行所在的单元格列表
    row_cells = table.add_row().cells
    row_cells[0].text = str(qty)
    row_cells[1].text = id
    row_cells[2].text = desc

document.add_page_break()

#保存.docx文档
document.save('demo.docx')

读取word文档

from docx import Document

doc = Document('demo.docx')

#每一段的内容
for para in doc.paragraphs:
    print(para.text)

#每一段的编号、内容
for i in range(len(doc.paragraphs)):
    print(str(i),  doc.paragraphs[i].text)

#表格
tbs = doc.tables
for tb in tbs:
    #行
    for row in tb.rows:    
        #列    
        for cell in row.cells:
            print(cell.text)
            #也可以用下面方法
            '''text = ''
            for p in cell.paragraphs:
                text += p.text
            print(text)'''
原文地址:https://www.cnblogs.com/chenwenyin/p/13557353.html