假设我在字符串中有一些 reStructuredText 源
source = """
============
Introduction
============
Hello world.
.. code-block:: bash
$ echo Greetings.
"""
import sys
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text
def parse_rst(text: str) -> docutils.nodes.document:
parser = docutils.parsers.rst.Parser()
components = (docutils.parsers.rst.Parser,)
settings = docutils.frontend.OptionParser(components=components).get_default_values()
document = docutils.utils.new_document('<rst-doc>', settings=settings)
parser.parse(text, document)
return document
if __name__ == '__main__':
document = parse_rst(source)
我想使用 Python 将其转换为没有剩余标记的纯文本。
sphinx.builders.text.TextBuilder
但它似乎需要一个 App
对象,而不是字符串。
这段代码有效。它有一些技巧,比如设置一个假的配置目录,也许有更好的方法。
import sys
import textwrap
import types
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text
import sphinx.util.osutil
def parse_rst(text: str) -> docutils.nodes.document:
parser = docutils.parsers.rst.Parser()
components = (docutils.parsers.rst.Parser,)
settings = docutils.frontend.OptionParser(
components=components
).get_default_values()
document = docutils.utils.new_document("<rst-doc>", settings=settings)
parser.parse(text, document)
return document
if __name__ == "__main__":
source = textwrap.dedent(
"""\
============
Introduction
============
Hello world.
.. code-block:: bash
$ echo Greetings.
"""
)
document = parse_rst(source)
app = types.SimpleNamespace(
srcdir=None,
confdir=None,
outdir=None,
doctreedir="/",
config=types.SimpleNamespace(
text_newlines="native",
text_sectionchars="=",
text_add_secnumbers=False,
text_secnumber_suffix=".",
),
tags=set(),
registry=types.SimpleNamespace(
create_translator=lambda self, something, new_builder: sphinx.writers.text.TextTranslator(
document, new_builder
)
),
)
builder = sphinx.builders.text.TextBuilder(app)
translator = sphinx.writers.text.TextTranslator(document, builder)
document.walkabout(translator)
print(translator.body)
输出:
Introduction
============
Hello world.
$ echo Greetings.
Sphinx 带有一个 TextBuilder。从命令行:
make text
这是一个似乎在 Linux 中有效的解决方法。 它使用 linux 命令“more”来完成这项工作。
import subprocess
def rst_to_plain_text(rst_file: Union[str, Path], tmpfile="tmp.txt") -> str:
"""use linux command 'more' to read contents of a reStructuredText format file to string
Args:
rst_file (Union[str, Path]): filepath to .rst file
tmpfile (str, optional): temporary file to write as plain text
Returns:
str: plain text version of rst file
"""
subprocess.check_output(f"more {rst_file} > {tmpfile}", shell=True)
with open(tmpfile, "r") as f:
txt = f.read()
return txt
马库斯·特尔维辛