java合并多个word 2007 文档基于docx4j

参考文章:http://dh.swzhinan.com/post/185.html

引入的jar包

 1 <dependency>
 2             <groupId>org.docx4j</groupId>
 3             <artifactId>docx4j</artifactId>
 4             <version>6.0.1</version>
 5         </dependency>
 6         <dependency>
 7             <groupId>org.apache.commons</groupId>
 8             <artifactId>commons-compress</artifactId>
 9             <version>1.8.1</version>
10         </dependency>
11         <dependency>
12             <groupId>com.alibaba</groupId>
13             <artifactId>fastjson</artifactId>
14             <version>1.2.58</version>
15         </dependency>
16 
17         <dependency>
18             <groupId>org.apache.poi</groupId>
19             <artifactId>poi</artifactId>
20             <version>3.10-FINAL</version>
21         </dependency>
22         <dependency>
23             <groupId>org.apache.poi</groupId>
24             <artifactId>poi-ooxml</artifactId>
25             <version>3.10-FINAL</version>
26         </dependency>
27         <dependency>
28             <groupId>org.apache.xmlbeans</groupId>
29             <artifactId>xmlbeans</artifactId>
30             <version>2.5.0</version>
31         </dependency>
32         <dependency>
33             <groupId>org.apache.xmlgraphics</groupId>
34             <artifactId>xmlgraphics-commons</artifactId>
35             <version>1.3.1</version>
36         </dependency>

代码

  1 package com.htsoft.oa.action.sjrh.tool;
  2 
  3 import java.io.File;
  4 import java.io.FileInputStream;
  5 import java.io.FileOutputStream;
  6 import java.io.IOException;
  7 import java.io.InputStream;
  8 import java.io.OutputStream;
  9 import java.io.RandomAccessFile;
 10 import java.nio.MappedByteBuffer;
 11 import java.nio.channels.FileChannel;
 12 import java.nio.channels.FileChannel.MapMode;
 13 import java.text.SimpleDateFormat;
 14 import java.util.ArrayList;
 15 import java.util.Date;
 16 import java.util.Iterator;
 17 import java.util.List;
 18 
 19 import org.apache.commons.io.IOUtils;
 20 import org.docx4j.dml.wordprocessingDrawing.Inline;
 21 import org.docx4j.jaxb.Context;
 22 import org.docx4j.openpackaging.exceptions.Docx4JException;
 23 import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
 24 import org.docx4j.openpackaging.parts.PartName;
 25 import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
 26 import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
 27 import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
 28 import org.docx4j.relationships.Relationship;
 29 import org.docx4j.wml.Br;
 30 import org.docx4j.wml.CTAltChunk;
 31 import org.docx4j.wml.Drawing;
 32 import org.docx4j.wml.ObjectFactory;
 33 import org.docx4j.wml.P;
 34 import org.docx4j.wml.R;
 35 import org.docx4j.wml.STBrType;
 36 
 37 import com.alibaba.fastjson.JSONObject;
 38 import com.htsoft.oa.action.sjrh.pojo.MergeResult;
 39 
 40 public class WordMergeUtils {
 41     private static ObjectFactory factory = new ObjectFactory();
 42 
 43     /**
 44      * 合并docx
 45      * 
 46      * @param streams
 47      *            要合并的word文件的输入流
 48      * @param path
 49      *            合并后的文件的路径
 50      * @return
 51      * @throws Docx4JException
 52      * @throws IOException
 53      */
 54     public static File mergeDocx(final List<InputStream> streams, String path) throws Docx4JException, IOException {
 55 
 56         WordprocessingMLPackage target = null;
 57         final File generated = new File(path);
 58 
 59         int chunkId = 0;
 60         Iterator<InputStream> it = streams.iterator();
 61         while (it.hasNext()) {
 62             InputStream is = it.next();
 63             if (is != null) {
 64                 try {
 65                     if (target == null) {
 66                         // Copy first (master) document
 67                         OutputStream os = new FileOutputStream(generated);
 68                         os.write(IOUtils.toByteArray(is));
 69                         os.close();
 70 
 71                         target = WordprocessingMLPackage.load(generated);
 72                     } else {
 73                         MainDocumentPart documentPart = target.getMainDocumentPart();
 74 
 75 //                        addPageBreak(documentPart); // 另起一页，换页
 76 
 77                         insertDocx(documentPart, IOUtils.toByteArray(is), chunkId++);
 78                     }
 79                 } catch (Exception e) {
 80                     e.printStackTrace();
 81                 } finally {
 82                     is.close();
 83                 }
 84             }
 85         }
 86 
 87         if (target != null) {
 88             target.save(generated);
 89             // Docx4J.save(target, generated, Docx4J.FLAG_NONE);
 90             return generated;
 91         } else {
 92             return null;
 93         }
 94     }
 95 
 96     // 插入文档
 97     private static void insertDocx(MainDocumentPart main, byte[] bytes, int chunkId) {
 98         try {
 99             AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart(
100                     new PartName("/part" + chunkId + ".docx"));
101             // afiPart.setContentType(new ContentType(CONTENT_TYPE));
102             afiPart.setBinaryData(bytes);
103             Relationship altChunkRel = main.addTargetPart(afiPart);
104 
105             CTAltChunk chunk = Context.getWmlObjectFactory().createCTAltChunk();
106             chunk.setId(altChunkRel.getId());
107 
108             main.addObject(chunk);
109         } catch (Exception e) {
110             e.printStackTrace();
111         }
112     }
113 
114     /**
115      * wordML转word，原文件不变，返回转换完成的word文件对象。
116      * 
117      * @param file
118      * @return
119      * @throws Docx4JException
120      * @throws IOException
121      */
122     public static File wordMLToWord(File file) throws Docx4JException, IOException {
123         WordprocessingMLPackage target = WordprocessingMLPackage.load(file);
124         File temp = File.createTempFile(file.getName(), ".doc");
125         target.save(temp);
126         return temp;
127     }
128 
129     /**
130      * xml转docx，原文件不变，返回转换完成的word文件对象。
131      * 
132      * @param file
133      * @return
134      * @throws Docx4JException
135      * @throws IOException
136      */
137     public static File xmlToWord(File file) throws Docx4JException, IOException {
138         WordprocessingMLPackage target = WordprocessingMLPackage.load(file);
139         File temp = File.createTempFile(file.getName(), ".doc");
140         target.save(temp);
141         return temp;
142     }
143 
144     /**
145      * 合并wordML文档
146      * 
147      * @param list
148      * @param path
149      * @throws Docx4JException
150      * @throws IOException
151      */
152     public static File mergeWordML(List<File> list, String path) throws Docx4JException, IOException {
153         final List<InputStream> streams = new ArrayList<InputStream>();
154         for (int i = 0; i < list.size(); i++) {
155             File file = list.get(i);
156             // file = WordMLUtil.wordMLToWord(file); // wordML转word
157             streams.add(new FileInputStream(file));
158         }
159         return WordMergeUtils.mergeDocx(streams, path);
160     }
161 
162     /**
163      * 把文件转换成Byte[] Mapped File way MappedByteBuffer 可以在处理大文件时，提升性能
164      * 
165      * @param filename
166      * @return
167      * @throws IOException
168      */
169     public static byte[] fileToByteArray(String filename) throws IOException {
170 
171         RandomAccessFile raf = null;
172         FileChannel fc = null;
173         try {
174             raf = new RandomAccessFile(filename, "r");
175             fc = raf.getChannel();
176             MappedByteBuffer byteBuffer = fc.map(MapMode.READ_ONLY, 0, fc.size()).load();
177             System.out.println(byteBuffer.isLoaded());
178             byte[] result = new byte[(int) fc.size()];
179             if (byteBuffer.remaining() > 0) {
180                 byteBuffer.get(result, 0, byteBuffer.remaining());
181             }
182             return result;
183         } catch (IOException e) {
184             e.printStackTrace();
185             throw e;
186         } finally {
187             try {
188                 fc.close();
189                 raf.close();
190             } catch (IOException e) {
191                 e.printStackTrace();
192             }
193         }
194     }
195 
196     /**
197      * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中,
198      * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.
199      * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.
200      * 
201      * @param word
202      *            需要编辑的文件
203      * @param imageList
204      *            图片对象集合（ 图片对象属性： url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ）
205      * @throws Exception
206      *             不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)
207      */
208     public static void addImageToPackage(File word, List<JSONObject> imageList) throws Exception {
209 
210         WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);
211 
212         for (int i = 0; i < imageList.size(); i++) {
213             JSONObject image = imageList.get(i);
214 
215             byte[] bytes = fileToByteArray(image.getString("url"));
216 
217             BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, bytes);
218 
219             int docPrId = 1;
220             int cNvPrId = 2;
221             Inline inline = imagePart.createImageInline(image.getString("name"), image.getString("keyword"), docPrId,
222                     cNvPrId, false);
223 
224             P paragraph = addInlineImageToParagraph(inline);
225 
226             wordMLPackage.getMainDocumentPart().addObject(paragraph);
227         }
228 
229         wordMLPackage.save(word);
230     }
231 
232     /**
233      * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中,
234      * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.
235      * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.
236      * 
237      * @param wordFilePath
238      *            文件路径
239      * @param imageList
240      *            图片对象集合（ 图片对象属性： url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ）
241      * @throws Exception
242      *             不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)
243      */
244     public static void addImageToPackage(String wordFilePath, List<JSONObject> imageList) throws Exception {
245         addImageToPackage(new File(wordFilePath), imageList);
246     }
247 
248     /**
249      * 创建一个对象工厂并用它创建一个段落和一个可运行块R. 然后将可运行块添加到段落中. 接下来创建一个图画并将其添加到可运行块R中. 最后我们将内联
250      * 对象添加到图画中并返回段落对象.
251      * 
252      * @param inline
253      *            包含图片的内联对象.
254      * @return 包含图片的段落
255      */
256     private static P addInlineImageToParagraph(Inline inline) {
257         // 添加内联对象到一个段落中
258         P paragraph = factory.createP();
259         R run = factory.createR();
260         paragraph.getContent().add(run);
261         Drawing drawing = factory.createDrawing();
262         run.getContent().add(drawing);
263         drawing.getAnchorOrInline().add(inline);
264         return paragraph;
265     }
266 
267     /**
268      * 文档结尾添加一个空白页
269      * 
270      * @throws Docx4JException
271      */
272     public static void addPageBreak(File word) throws Docx4JException {
273 
274         WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);
275 
276         MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
277 
278         Br breakObj = new Br();
279         breakObj.setType(STBrType.PAGE);
280 
281         P paragraph = factory.createP();
282         paragraph.getContent().add(breakObj);
283         documentPart.getJaxbElement().getBody().getContent().add(paragraph);
284         wordMLPackage.save(word);
285     }
286 
287     /**
288      * 文档结尾添加一个空白页
289      * 
290      * @throws Docx4JException
291      */
292     public static void addPageBreak(MainDocumentPart documentPart) {
293         Br breakObj = new Br();
294         breakObj.setType(STBrType.PAGE);
295 
296         P paragraph = factory.createP();
297         paragraph.getContent().add(breakObj);
298         documentPart.getJaxbElement().getBody().getContent().add(paragraph);
299     }
300 
301     /**
302      * 文档结尾添加一个空白页
303      * 
304      * @throws Docx4JException
305      */
306     public static void addPageBreak(String wordFilePath) throws Docx4JException {
307         addPageBreak(new File(wordFilePath));
308     }
309 
310     /**
311      * 合并word文档 接口方法
312      * 
313      * @param sourceFiles待合并文件
314      * @param mergedFileName合并后的文件名称
315      * @throws Exception
316      */
317     public static MergeResult merge(String djxh, List<String> sourceFiles, String mergedFileName) {
318 
319         if (djxh == null || djxh.isEmpty()) {
320             return new MergeResult(-1, null, "登记序号为空!", null);
321         } else if (sourceFiles == null || sourceFiles.size() <= 0) {
322             return new MergeResult(-1, null, "待合并文件路径为空!", null);
323         }
324 
325         try {
326             List<File> files = new ArrayList<File>();
327             for (String filePath : sourceFiles) {
328                 File file = new File(filePath);
329                 files.add(file);
330             }
331 
332             // 保存基础路径
333             String path = "";
334             if ("1".equals(WordStaticFileds.open_Fixed_path)) {
335                 // 创建固定路径
336                 path = WordStaticFileds.create_word_path + "word/fixed/" + djxh;
337             } else {
338                 // 创建不固定路径
339                 path = WordStaticFileds.create_word_path + "word/notFixed/"
340                         + new SimpleDateFormat("yyyyMMdd").format(new Date()) + "/" + djxh;
341             }
342 
343             if (mergedFileName == null || mergedFileName.isEmpty()) {
344                 if (files.size() > 0) {
345                     String oldName = files.get(0).getName();
346                     int lastIndexOf = oldName.lastIndexOf(".");
347                     if (lastIndexOf > 0) {
348                         mergedFileName = oldName.substring(0, lastIndexOf) + "-合并后.docx";
349                     }
350                 }
351             }
352 
353             File mergedfile = new File(path);
354 
355             if (!mergedfile.exists()) {
356                 mergedfile.mkdirs();
357             }
358 
359             String mergedFullPath = path + "/" + mergedFileName;
360             File mergeWordML = WordMergeUtils.mergeWordML(files, mergedFullPath);
361             
362             
363             return new MergeResult(0, mergeWordML, "合并word文件成功!", mergeWordML.getAbsolutePath());
364         } catch (Exception e) {
365             return new MergeResult(-1, null, "合并word文件出错!错误信息:" + e.getMessage(), null);
366         }
367 
368     }
369 }

java合并多个word 2007 文档 基于docx4j

java合并多个word 2007 文档基于docx4j