我有一个
md
文件类似于:
# level_1
## level_11
- ind1
- ind2
## level_12
- ind3
# level_2
## level_21
- ind4
我想将其转换为类似于此的
csv
文件(最好使用R或Python):
level1, level_11, ind1
level1, level_11, ind2
level1, level_12, ind3
level2, level_21, ind4
import re
def parse_markdown(file_path):
"""
Parses a markdown file and returns a list of rows for a CSV file.
"""
rows = []
current_level1 = None
current_level2 = None
with open(file_path, "r") as f:
for line in f:
line = line.strip()
# Check for level 1 heading
if line.startswith("# "):
current_level1 = line[2:]
current_level2 = None
# Check for level 2 heading
elif line.startswith("## "):
current_level2 = line[3:]
# Check for list item
elif line.startswith("- "):
if current_level1 and current_level2:
rows.append([current_level1, current_level2, line[2:]])
elif current_level1:
rows.append([current_level1, None, line[2:]])
return rows
def write_csv(file_path, rows):
"""
Writes a list of rows to a CSV file.
"""
with open(file_path, "w") as f:
writer = csv.writer(f)
writer.writerow(["level1", "level_11", "ind"])
writer.writerows(rows)
# Example usage
rows = parse_markdown("your_markdown_file.md")
write_csv("output_csv_file.csv", rows)