利用POI抽取word中的图片并保存在文件中

利用POI抽取word中的图片并保存在文件中

poi.apache.org/hwpf/quick-guide.html 
1.抽取word doc中的图片

 1 package parse;
 2 
 3 import java.io.*;
 4 import java.util.*;
 5 
 6 
 7 import org.apache.poi.hwpf.HWPFDocument;
 8 import org.apache.poi.hwpf.model.PicturesTable;
 9 import org.apache.poi.hwpf.usermodel.CharacterRun;
10 import org.apache.poi.hwpf.usermodel.Picture;
11 import org.apache.poi.hwpf.usermodel.Range;
12 
13 public class ReadImgDoc { 
14 
15 public static void main(String[] args) throws Exception {   
16   new ReadImgDoc().readPicture("E:\上海项目测试\文档\模板.doc");
17 }
18 
19   private void readPicture(String path)throws Exception{
20      FileInputStream in=new FileInputStream(new File(path)); 
21      HWPFDocument doc=new HWPFDocument(in); 
22      int length=doc.characterLength();
23      PicturesTable pTable=doc.getPicturesTable();
24     // int TitleLength=doc.getSummaryInformation().getTitle().length();
25 
26      //  System.out.println(TitleLength);
27       // System.out.println(length);
28        for (int i=0;i<length;i++){
29          Range range=new Range(i, i+1,doc);
30 
31          CharacterRun cr=range.getCharacterRun(0);
32          if(pTable.hasPicture(cr)){
33             Picture pic=pTable.extractPicture(cr, false);
34           String afileName=pic.suggestFullFileName();
35           OutputStream out=new FileOutputStream(new File("E:\上海项目测试\docImage\"+UUID.randomUUID()+afileName));
36           pic.writeImageContent(out);
37 
38         }
39        }
40 
41   }
42 
43 }

2.抽取 word docx文件中的图片

 1 package parse;
 2 
 3 import java.io.File;
 4 import java.io.FileInputStream;
 5 import java.io.FileOutputStream;
 6 import java.io.IOException;
 7 import java.util.List;
 8 
 9 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
10 import org.apache.poi.xwpf.usermodel.XWPFDocument;
11 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
12 
13 public class GetPicsDocx {
14   public static void main(String[] args) {
15     String path ="E:\上海项目测试\文档\35.docx";
16     File file = new File(path);
17     try {
18       FileInputStream fis = new FileInputStream(file);
19       XWPFDocument document = new XWPFDocument(fis);
20       XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(document);
21       String text = xwpfWordExtractor.getText();
22       System.out.println(text);
23       List<XWPFPictureData> picList = document.getAllPictures();
24       for (XWPFPictureData pic : picList) {
25         System.out.println(pic.getPictureType() + file.separator + pic.suggestFileExtension()
26             +file.separator+pic.getFileName());
27         byte[] bytev = pic.getData();
28         FileOutputStream fos = new FileOutputStream("E:\上海项目测试\docxImage\"+pic.getFileName()); 
29         fos.write(bytev);
30       }
31       fis.close();
32     } catch (IOException e) {
33       e.printStackTrace();
34     }
35   }
36 }
原文地址:https://www.cnblogs.com/Renyi-Fan/p/8148708.html