我正在尝试从 PDF 文件中提取突出显示的文本。我设法编写了这段代码,但它无法提取突出显示的文本。知道是什么问题吗?或者有更好的/方法吗?我可以使用任何其他图书馆吗?
谢谢
public String getHighlightedText() {
log.info("test");
String highlightedText = "";
String pdfFilePath = "test.pdf";
PdfReader reader = new PdfReader(pdfFilePath);
log.info("reader.getNumberOfPages(): {}", reader.getNumberOfPages());
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
String pageText = PdfTextExtractor.getTextFromPage(reader, i);
log.info("page text : {} ", pageText);
PdfDictionary pageDict = reader.getPageN(i);
log.info("pageDict : {}", pageDict);
PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);
if (annotsArray == null) {
continue;
}
for (int j = 0; j < annotsArray.size(); j++) {
PdfDictionary annotDict = annotsArray.getAsDict(j);
log.info("annotDict.toString() : {} ", annotDict.getAsString(PdfName.CONTENTS));
if (PdfName.HIGHLIGHT.equals(annotDict.getAsName(PdfName.SUBTYPE))) {
PdfString text = annotDict.getAsString(PdfName.CONTENTS);
log.info("text : {}", text);
}
}
}
reader.close();
return highlightedText;
}