word2html文件

 1 package com.wesib2b.lw.app.util;
 2 
 3 import java.io.ByteArrayOutputStream;
 4 import java.io.File;
 5 import java.io.FileInputStream;
 6 import java.io.FileNotFoundException;
 7 import java.io.FileOutputStream;
 8 import java.io.InputStream;
 9 import java.util.List;
10 import javax.xml.parsers.DocumentBuilderFactory;
11 import javax.xml.transform.OutputKeys;
12 import javax.xml.transform.Transformer;
13 import javax.xml.transform.TransformerFactory;
14 import javax.xml.transform.dom.DOMSource;
15 import javax.xml.transform.stream.StreamResult;
16 import org.apache.commons.io.FileUtils;
17 import org.apache.poi.hwpf.HWPFDocument;
18 import org.apache.poi.hwpf.converter.PicturesManager;
19 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
20 import org.apache.poi.hwpf.usermodel.Picture;
21 import org.apache.poi.hwpf.usermodel.PictureType;
22 import org.w3c.dom.Document;
23 
24 public class test {
25     public static void main(String[] args) throws Throwable {
26         final String path = "D:\";
27         final String file = "aaa.doc";
28         InputStream input = new FileInputStream(path + file);
29         HWPFDocument wordDocument = new HWPFDocument(input);
30         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
31                 DocumentBuilderFactory.newInstance().newDocumentBuilder()
32                         .newDocument());
33         wordToHtmlConverter.setPicturesManager(new PicturesManager() {
34             public String savePicture(byte[] content, PictureType pictureType,
35                     String suggestedName, float widthInches, float heightInches) {
36                 return suggestedName;
37             }
38         });
39         wordToHtmlConverter.processDocument(wordDocument);
40         List pics = wordDocument.getPicturesTable().getAllPictures();
41         if (pics != null) {
42             for (int i = 0; i < pics.size(); i++) {
43                 Picture pic = (Picture) pics.get(i);
44                 try {
45                     pic.writeImageContent(new FileOutputStream(path
46                             + pic.suggestFullFileName()));
47                 } catch (FileNotFoundException e) {
48                     e.printStackTrace();
49                 }
50             }
51         }
52         Document htmlDocument = wordToHtmlConverter.getDocument();
53         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
54         DOMSource domSource = new DOMSource(htmlDocument);
55         StreamResult streamResult = new StreamResult(outStream);
56         TransformerFactory tf = TransformerFactory.newInstance();
57         Transformer serializer = tf.newTransformer();
58         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
59         serializer.setOutputProperty(OutputKeys.INDENT, "yes");
60         serializer.setOutputProperty(OutputKeys.METHOD, "html");
61         serializer.transform(domSource, streamResult);
62         outStream.close();
63         String content = new String(outStream.toByteArray());
64         FileUtils.write(new File(path, "1.html"), content, "utf-8");
65     }
66 }
原文地址:https://www.cnblogs.com/jason123/p/7039937.html