java提取docx文件中的图片

使用的maven为:

<dependency>
     <groupId>org.apache.poi</groupId>
     <artifactId>poi</artifactId>
     <version>3.17</version>
</dependency>

1. 获取docx文件中的所有图片

//  获取word中的所有图片
  public static void readImageInParagraph() throws IOException {

    XWPFDocument document = new XWPFDocument(new FileInputStream("C:\Users\Administrator\Desktop\JP6-0-2006.docx"));
    // 用XWPFDocument的getAllPictures来获取所有的图片
    List<XWPFPictureData> picList = document.getAllPictures();
    for (XWPFPictureData pic : picList) {
      System.out.println(pic.getPictureType() +  pic.getFileName());
      byte[] bytev = pic.getData();
//                System.out.println(bytev.length);
      // 大于1000bites的图片我们才弄下来,消除word中莫名的小图片的影响
      if (bytev.length > 300) {
        FileOutputStream fos = new FileOutputStream("C:\Users\Administrator\Desktop\cc\" + pic.getFileName());
        fos.write(bytev);
      }
    }
  }

2. 根据docx的内容,获取每段内容对应的图片

 public static List<String> readImageInParagraphTest() throws IOException {  //XWPFParagraph paragraph
InputStream is = new FileInputStream("C:\Users\Administrator\Desktop\JP6-0-2006.docx");
XWPFDocument doc = new XWPFDocument(is);
List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
List<String> picArr = new ArrayList<>();
for (XWPFParagraph paragraph : paras) {
List<XWPFRun> runs = paragraph.getRuns();
for (XWPFRun run : runs) {
List<XWPFPicture> pictures = run.getEmbeddedPictures();
// System.out.println(pictures);
if (!pictures.isEmpty()) {
for (XWPFPicture picture : pictures) {
// 将图片下载到本地,
// XWPFPicture picture = pictures.get(0);
XWPFPictureData pictureData = picture.getPictureData();
// System.out.println(pictureData.getPictureType());
byte[] bytev = pictureData.getData();

// 大于1000bites的图片我们才弄下来,消除word中莫名的小图片的影响
if (bytev.length > 20) {
// String fileName = "../../../../resources/static/picture/" + pictureData.getFileName();
String fileName = "D:\programming\fileex\src\main\resources\static\picture\" + pictureData.getFileName();
FileOutputStream fos = new FileOutputStream(fileName);
fos.write(bytev);
String pic_string = "/picture/" + pictureData.getFileName();
picArr.add(pic_string);
System.out.println(pic_string); //获取指定大小的图片
}
}
}
}
}
return picArr;
}
原文地址:https://www.cnblogs.com/lxz123/p/15136439.html