Java定位PDF中关键字的坐标

使用itextpdf来操作PDF文件,定位PDF文件中的关键字坐标演示

测试结果:

测试的PDF文件如下:

junit测试输出坐标:

maven配置中引入itextpdf:

<!-- 引入pdf -->
    <dependency>
      <groupId>com.itextpdf</groupId>
      <artifactId>itextpdf</artifactId>
      <version>5.5.13</version>
    </dependency>

定位工具类PdfHelper.java

package com.alphajuns.util;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.io.IOException;

/**
 * @ClassName PdfHelper
 * @Description Pdf帮助类
 * @Author AlphaJunS
 * @Date 2020/3/7 17:40
 * @Version 1.0
 */
public class PdfHelper {

    /**
     * @Author AlphaJunS
     * @Date 18:24 2020/3/7
     * @Description 用于供外部类调用获取关键字所在PDF文件坐标
     * @param filepath
     * @param keyWords
     * @return float[]
     */
    public static float[] getKeyWordsByPath(String filepath, String keyWords) {
        float[] coordinate = null;
        try{
            PdfReader pdfReader = new PdfReader(filepath);
            coordinate = getKeyWords(pdfReader, keyWords);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return coordinate;
    }

    /**
     * @Author AlphaJunS
     * @Date 18:26 2020/3/7
     * @Description 获取关键字所在PDF坐标
     * @param pdfReader
     * @param keyWords
     * @return float[]
     */
    private static float[] getKeyWords(PdfReader pdfReader, String keyWords) {
        float[] coordinate = null;
        int page = 0;
        try{
            int pageNum = pdfReader.getNumberOfPages();
            PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
            CustomRenderListener renderListener = new CustomRenderListener();
            renderListener.setKeyWord(keyWords);
            for (page = 1; page <= pageNum; page++) {
                renderListener.setPage(page);
                pdfReaderContentParser.processContent(page, renderListener);
                coordinate = renderListener.getPcoordinate();
                if (coordinate != null) break;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return coordinate;
    }

}

pdf帮助类CustomRenderListener.java:

package com.alphajuns.util;

import com.itextpdf.awt.geom.Rectangle2D.Float;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;

/**
 * @Author AlphaJunS
 * @Date 12:53 2020/3/7
 * @Description pdf签名帮助类
 */
public class CustomRenderListener implements RenderListener{

    private float[] pcoordinate = null;

    private String keyWord;

    private int page;

    public int getPage() {
        return page;
    }

    public void setPage(int page) {
        this.page = page;
    }

    public float[] getPcoordinate(){
        return pcoordinate;
    }

    public String getKeyWord() {
        return keyWord;
    }

    public void setKeyWord(String keyWord) {
        this.keyWord = keyWord;
    }

    @Override
    public void beginTextBlock() {}

    @Override
    public void endTextBlock() {}

    @Override
    public void renderImage(ImageRenderInfo arg0) {}

    @Override
    public void renderText(TextRenderInfo textRenderInfo) {
        String text = textRenderInfo.getText();
        if (null != text && text.contains(keyWord)) {
            Float boundingRectange = textRenderInfo.getBaseline().getBoundingRectange();
            pcoordinate = new float[3];
            pcoordinate[0] = boundingRectange.x;
            pcoordinate[1] = boundingRectange.y;
            pcoordinate[2] = page;
        }
    }

}
原文地址:https://www.cnblogs.com/alphajuns/p/12436332.html