在XWPFDocument中插入多个复制的段落

问题描述 投票:0回答:1

我正在尝试使用 Apache POI 复制 XWPFDocument 的段落。由于 POI 无法在任意点插入预制段落,因此我阅读了大量答案,建议首先使用 insertNewParagraph() 插入一次性段落,然后使用 setParagraph( 将临时段落替换为我实际想要的段落) )。这变得更加复杂,因为 insertNewParagraph 不能只接受作为正文元素列表所需索引的输入(如 XWPFTable.addRow(row,pos) 的工作原理),并且必须向其传递 XmlCursor。

TestIn.docx 我创建为一个测试,包含 6 个段落 A、B、C、D、E、F。

import java.io.FileInputStream;
import java.io.FileOutputStream;

import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;

public class ParagraphIssue
{
    public void debugElement (IBodyElement elem, StringBuilder s, XWPFParagraph a, XWPFParagraph b, XWPFParagraph c, XWPFParagraph d, XWPFParagraph e, XWPFParagraph f,
        XWPFParagraph t1, XWPFParagraph r1, XWPFParagraph t2, XWPFParagraph r2)
    {
        if (s.length () > 0) s.append (" ");
        if (elem == a) s.append ("A");
        else if (elem == b) s.append ("B");
        else if (elem == c) s.append ("C");
        else if (elem == d) s.append ("D");
        else if (elem == e) s.append ("E");
        else if (elem == f) s.append ("F");
        else if (elem == t1) s.append ("T1");
        else if (elem == r1) s.append ("R1");
        else if (elem == t2) s.append ("T2");
        else if (elem == r2) s.append ("R2");
        else s.append ("U");
    }
    
    public void debug (XWPFDocument doc, XWPFParagraph a, XWPFParagraph b, XWPFParagraph c, XWPFParagraph d, XWPFParagraph e, XWPFParagraph f,
        XWPFParagraph t1, XWPFParagraph r1, XWPFParagraph t2, XWPFParagraph r2)
    {
        StringBuilder s = new StringBuilder ();
        for (IBodyElement elem : doc.getBodyElements ())
            debugElement (elem, s, a, b, c, d, e, f, t1, r1, t2, r2);
        System.out.println("Elements: " + s);
        
        s = new StringBuilder ();
        for (XWPFParagraph para : doc.getParagraphs ())
            debugElement (para, s, a, b, c, d, e, f, t1, r1, t2, r2);
        System.out.println("Paragraphs: " + s);
    }
    
    public void run (XWPFDocument doc, int insertionPoint)
    {
        XWPFParagraph paraA = doc.getParagraphs().get(0);
        XWPFParagraph paraB = doc.getParagraphs().get(1);
        XWPFParagraph paraC = doc.getParagraphs().get(2);
        XWPFParagraph paraD = doc.getParagraphs().get(3);
        XWPFParagraph paraE = doc.getParagraphs().get(4);
        XWPFParagraph paraF = doc.getParagraphs().get(5);
        
        System.out.println ("--- Document initial state ---");
        debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, null, null, null, null);

        // Clone the first paragraph
        XWPFParagraph cloneThis = (XWPFParagraph) doc.getBodyElements ().get (0);
        XWPFParagraph clonedPara = new XWPFParagraph ((CTP) cloneThis.getCTP ().copy (), doc);
        
        // Add new paragraph before the final paragraph
        XWPFParagraph insertBeforePara = (XWPFParagraph) doc.getBodyElements ().get (insertionPoint);
        XmlCursor cursor = insertBeforePara.getCTP ().newCursor ();
        
        XWPFParagraph newPara = doc.insertNewParagraph (cursor);
        newPara.insertNewRun (0).setText ("this should get replaced");
        
        System.out.println ("--- Insert 1st temporary para before F ---");
        debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, null, null);
        
        int newParaIndex = 0;
        for (IBodyElement elem : doc.getBodyElements ())
        {
            if (elem == newPara)
                break;
            else if (elem.getElementType () == newPara.getElementType ())
                newParaIndex++;
        }
        
        System.out.println ("1st temporary para is at index " + newParaIndex);      // 5, as expected
        
        // Now replace the added paragraph with the cloned one
        doc.setParagraph (clonedPara, newParaIndex);
        System.out.println ("--- Replace 1st temporary para ---");
        debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, null, null);
        
        // Do exactly the same thing again to clone the second paragraph
        XWPFParagraph cloneThis2 = (XWPFParagraph) doc.getBodyElements ().get (1);
        XWPFParagraph clonedPara2 = new XWPFParagraph ((CTP) cloneThis2.getCTP ().copy (), doc);
        
        XWPFParagraph insertBeforePara2 = (XWPFParagraph) doc.getBodyElements ().get (insertionPoint + 1);
        XmlCursor cursor2 = insertBeforePara2.getCTP ().newCursor ();
        
        XWPFParagraph newPara2 = doc.insertNewParagraph (cursor2);
        newPara2.insertNewRun (0).setText ("this should get replaced too");

        System.out.println ("--- Insert 2nd temporary para before F ---");
        debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, newPara2, clonedPara2);
        
        int newParaIndex2 = 0;
        for (IBodyElement elem : doc.getBodyElements ())
        {
            if (elem == newPara2)
                break;
            else if (elem.getElementType () == newPara2.getElementType ())
                newParaIndex2++;
        }
        
        System.out.println ("2nd temporary para is at index " + newParaIndex2);
        
        doc.setParagraph (clonedPara2, newParaIndex2);      // So then this replaces the wrong paragraph
        System.out.println ("--- Replace 2nd temporary para ---");
        debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, newPara2, clonedPara2);
    }
    
    public final static void main (final String [] args)
    {
        try (FileInputStream in = new FileInputStream ("W:\\TestIn.docx"))
        {
            XWPFDocument doc = new XWPFDocument (in);
            new ParagraphIssue ().run (doc, 5);
            
            try (FileOutputStream out = new FileOutputStream ("W:\\TestOut.docx"))
            {
                doc.write (out);
            }
        }
        catch (Exception e)
        {
            e.printStackTrace ();
        }
    }
}

很多是调试代码,因此我可以获得准确显示正在发生的情况的输出:

--- 记录初始状态---
元素:A B C D E F
段落:A B C D E F
--- 在 F 之前插入第一个临时段落 ---
元素:A B C D E T1 F
段落:A B C D E T1 F
第一个临时段落位于索引 5 - 到目前为止完美
--- 替换第一个临时段落 ---
元素:A B C D E T1 F
段落:A B C D E R1 F - 段落列表有替换段落,但元素列表仍然有临时段落
--- 在 F 之前插入第二个临时段落 ---
元素:A B C D E T1 T2 F
段落:T2 A B C D E R1 F -现在第二个临时段落已经位于列表的前面;它位于元素列表中的正确位置
第二个临时段落位于索引 6
--- 替换第二个临时段落 ---
元素:A B C D E T1 T2 F
段落:T2 A B C D E R2 F - 元素列表仍包含临时段落;段落列表第二段位置错误

令人惊讶的是,保存的 Word 文档实际上看起来是正确的,但我不明白当两个列表看起来都不正确时怎么办。

就查找插入位置而言,到目前为止我可以使用

int newParaIndex = doc.getPosOfParagraph (newPara);
。当您将表添加到组合中时,就会出现问题。现在,我编辑了源文档并插入了一个表格,因此元素列表现在看起来像 A、B、(表格)、C、D、E、F 并将 insertPoint 相应更改为 6。

现在您不能再使用 doc.getPosOfParagraph (),因为它返回元素列表(包括表格)中的段落索引,但 setParagraph 需要段落列表(不包括表格)中的段落索引。使用 doc.getParagraphPos() 对此进行补偿,对于第二个插入的临时段落返回 0,因为正如您在上面的输出中所看到的,这实际上就是它所在的位置。因此,我通过仅搜索元素列表的段落来解决这个问题,正如您在代码中看到的那样。

添加表后再次运行(这是调试输出中的“U”):

--- 记录初始状态---
元素:A B U C D E F
段落:A B C D E F
--- 在 F 之前插入第一个临时段落 ---
元素:A B U C D E T1 F
段落:A B C D E T1 F
--- 替换第一个临时段落 ---
元素:A B U C D E T1 F
段落:A B C D E R1 F
--- 在 F 之前插入第二个临时段落 ---
元素:A B U C D E T1 T2 F
段落:T2 A B C D E R1 F
第二个临时段落位于索引 6
--- 替换第二个临时段落 ---
元素:A B U C D E T1 T2 F
段落:T2 A B C D E R2 F

这实际上确实在保存的文档中生成了正确的输出。我的问题是:

  1. 是否有更好的方法来解决临时段落在一个列表中被替换而不是在另一个列表中以及第二个临时段落出现在列表前面的问题?例如,我应该重新使用相同的 XmlCursor 来插入第二个临时段落吗?我是否应该一次性制作所有临时段落,然后一次将其全部替换,而不是一次替换一个?这样的事情会有帮助吗?
  2. 当我在真实的应用程序中尝试这种方法时,Word 抱怨文档已损坏。它提供尝试恢复它,如果我单击“是”,那么它会打开,内容和所有复制的段落看起来都是正确的,但这里的奇怪行为导致了损坏的文档警告。
java apache-poi xwpf
1个回答
0
投票

我在尝试使用 Apache POI 复制 Word 文档中的段落时遇到了同样的问题。我发现你的第一个问题的解决方案确实是一次性制作所有临时段落(即所有对

insertNewParagraph
的调用),然后将它们全部替换为之后要复制的内容(即所有对
setParagraph的调用) 
)。

我最终得到了以下工作解决方案:

/** Utilities for managing paragraphs in Word documents. */
public class Paragraphs {
  /** Copy the given paragraph and its contents to a new paragraph in the document. */
  public static List<XWPFParagraph> duplicate(XWPFParagraph paragraph, int times) {
    // **Implementation note:**
    // Due to some weird behaviour with Apache POI's insertNewParagraph and setParagraph as described in
    // https://stackoverflow.com/questions/75289475/insert-multiple-copied-paragraphs-in-xwpfdocument
    // we have to insert the new paragraphs first, then copy the contents of the original paragraph to them.
    // We cannot insert and copy in the same loop, because insertNewParagraph will insert subsequent paragraphs at
    // position 0, while getPosOfParagraph still returns the intended position, thus causing only the first duplication
    // to succeed, while the rest only result in more empty paragraphs at the start of document.getParagraphs().

    var document = paragraph.getDocument();

    var newParagraphs = new ArrayList<XWPFParagraph>();
    try (var cursor = paragraph.getCTP().newCursor()) {
      for (int i = 0; i < times; i++) {
        var newParagraph = document.insertNewParagraph(cursor);
        newParagraphs.add(newParagraph);

        while (cursor.toNextToken() != TokenType.START);
      }
    }

    for (int i = 0; i < newParagraphs.size(); i++) {
      // copy the contents of the original paragraph to a new paragraph and overwrite the empty paragraph
      var newParagraphPosition = document.getPosOfParagraph(newParagraphs.get(i));
      var newParagraph = new XWPFParagraph((CTP) paragraph.getCTP().copy(), document);
      document.setParagraph(newParagraph, newParagraphPosition);

      newParagraphs.set(i, newParagraph); // replace the empty paragraph with the copied one
    }

    return newParagraphs;
  }
}

据我所知,这种奇怪行为的原因确实与

setParagraph
中的 TODO 评论有关,正如 Axel Richter 在他的评论中指出的那样。为了演示这种行为,我编写了这些通过测试:

  @Test
  void demoApachePoiBugInvalidBehaviour() {
    var document = new XWPFDocument();
    var paragraph1 = document.createParagraph();
    paragraph1.createRun().setText("Hello World!");

    var paragraph2 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
    paragraph2.createRun().setText("Hello People!");

    assertEquals("Hello People!", document.getParagraphs().get(0).getText());
    assertEquals("Hello World!", document.getParagraphs().get(1).getText());

    var paragraph2Position = document.getPosOfParagraph(paragraph2);
    var newParagraph2 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
    document.setParagraph(newParagraph2, paragraph2Position);

    assertEquals("Hello World!", document.getParagraphs().get(0).getText());
    assertEquals("Hello World!", document.getParagraphs().get(1).getText());

    // so far so good.

    // However, inserting a new paragraph at the position of paragraph1 now results in the new paragraph being inserted
    // at position 0, while it should be inserted at position 1 (one before last).

    var paragraph3 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
    paragraph3.createRun().setText("Hello Opinity!");

    assertEquals("Hello Opinity!", document.getParagraphs().get(0).getText());
    assertEquals("Hello World!", document.getParagraphs().get(1).getText());
    assertEquals("Hello World!", document.getParagraphs().get(2).getText());

    var paragraph3Position = document.getPosOfParagraph(paragraph3);
    var newParagraph3 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
    document.setParagraph(newParagraph3, paragraph3Position);

    assertThrows(XmlValueDisconnectedException.class, () -> document.getParagraphs().get(0).getText());
    assertEquals("Hello World!", document.getParagraphs().get(1).getText());
    assertEquals("Hello World!", document.getParagraphs().get(2).getText());
  }

与按正确顺序调用

insertNewParagraph
setParagraph
时的预期行为相反:

  @Test
  void demoApachePoiBugValidBehaviour() {
    var document = new XWPFDocument();
    var paragraph1 = document.createParagraph();
    paragraph1.createRun().setText("Hello World!");

    var paragraph2 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
    paragraph2.createRun().setText("Hello People!");

    var paragraph3 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
    paragraph3.createRun().setText("Hello Opinity!");

    assertEquals("Hello People!", document.getParagraphs().get(0).getText());
    assertEquals("Hello Opinity!", document.getParagraphs().get(1).getText());
    assertEquals("Hello World!", document.getParagraphs().get(2).getText());

    var paragraph2Position = document.getPosOfParagraph(paragraph2);
    var newParagraph2 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
    document.setParagraph(newParagraph2, paragraph2Position);

    assertEquals("Hello World!", document.getParagraphs().get(0).getText());
    assertEquals("Hello Opinity!", document.getParagraphs().get(1).getText());
    assertEquals("Hello World!", document.getParagraphs().get(2).getText());

    var paragraph3Position = document.getPosOfParagraph(paragraph3);
    var newParagraph3 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
    document.setParagraph(newParagraph3, paragraph3Position);

    assertEquals("Hello World!", document.getParagraphs().get(0).getText());
    assertEquals("Hello World!", document.getParagraphs().get(1).getText());
    assertEquals("Hello World!", document.getParagraphs().get(2).getText());
  }
© www.soinside.com 2019 - 2024. All rights reserved.