我正在编写一个Java应用程序来充当模板读写器。我在使用文本方面取得了成功,但对图像有一些不足之处......
获取图像非常简单 - 使用扩展PDFStreamEngine的类
package readingPdf;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
public class ImageStripper extends PDFStreamEngine {
ArrayList<Object []> imagesData = null;
public ImageStripper() throws IOException {
// preparing PDFStreamEngine
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
imagesData = new ArrayList<Object[]>();
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
// get the PDF object
PDXObject xobject = getResources().getXObject(objectName);
// check if the object is an image object
if (xobject instanceof PDImageXObject) {
Object[] imageData = new Object[3];
PDImageXObject image = (PDImageXObject) xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
// position of image in the pdf in terms of user space units
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
+ " in user space units");
imageData[0] = ctmNew.getTranslateX();// xPos
imageData[1] = ctmNew.getTranslateY();// yPos
imageData[2] = image;//Image
imagesData.add(imageData);
} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
showForm(form);
}
} else {
super.processOperator(operator, operands);
}
}
public ArrayList<Object[]> getImagesList(){
return imagesData;
}
}
接下来是其实现
public class PDFManager{
private PDFParser parser;
private PDDocument pdDoc;
private PDDocument retDoc;
private COSDocument cosDoc;
private PDPage page;
private String filePath;
private File file;
public PDDocument transferImage() throws IOException {
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file, "r"));
parser.parse();
cosDoc = parser.getDocument();
pdDoc = new PDDocument(cosDoc);
//Get Image Data
ImageStripper imageStripper = new ImageStripper();
imageStripper.processPage(pdDoc.getPage(0));
ArrayList<Object []> imageList = imageStripper.getImagesList();
//Close Doc
pdDoc.close();
cosDoc.close();
//Create new PDF Doc
retDoc = new PDDocument();
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
retDoc.addPage(page);
PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);
for(int pos = 0; pos < imageList.size() ; pos++) {
Object [] imageData = imageList.get(pos);
float xPos = (float)imageData[0];
float yPos = (float)imageData[1];
PDImageXObject image = (PDImageXObject)imageData[2];
cs.drawImage(image, xPos, yPos);
}
cs.close();
return retDoc;
}
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf");
doc.save("c:\\test\\test2.pdf");
doc.close();
}
}
现在问题出现在我正在写的调用cs.drawImage
的地方。除了尝试保存新文件外,所有代码都没有任何问题执行...我得到了异常COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
我怀疑仍然有元数据将图像链接到原始文档,因为调用PDImageXobject.createFromFile("c:\\test\\testImage.png", doc)
会返回一个完美写入的新的PDImageXObject
实例。随着被写入的PDDocument
被传递到PDImageXObject
,我怀疑它以某种方式联系起来。
我无法将图像保存到临时位置,因为这只是测试POC。
任何援助将不胜感激
@Tilman主持人
谢谢你的解决方案
我将原始文档的关闭移动到一个单独的方法,我在写完文件后调用了该方法
public void closeFiles(){
pdDoc.close();
cosDoc.close();
}