input_string: = """
+------------------------------------+-----------------------------------+
|title | title |
+------------------------------------+-----------------------------------+
| abcdabcdabc | abcdabcd, abcdabcdabcd, abcdabcda |
+------------------------------------+-----------------------------------+
| abcdabcdabcdabcdabcdabcdabcdabcda | abcd abcddrama and abcdabcdabcdabD|
| | to abcd abcd abc |
+------------------------------------+-----------------------------------+
"""
期望的输出是每个字段中按列划分的短语:
[ (u'abcdabcdabc', u'abcdabcd, abcdabcdabcd, abcdabcda'), (u'abcdabcdabcdabcdabcdabcdabcdabcda', u"abcd abcddrama and abcdabcdabcdabD to abcd abcd abc") ]
input_string = """
+------------------------------------+-----------------------------------+
|title | title |
+------------------------------------+-----------------------------------+
| abcdabcdabc | abcdabcd, abcdabcdabcd, abcdabcda |
+------------------------------------+-----------------------------------+
| abcdabcdabcdabcdabcdabcdabcdabcda | abcd abcddrama and abcdabcdabcdabD|
| | to abcd abcd abc |
+------------------------------------+-----------------------------------+
"""
def reform(s):
s1 = ""
s2 = ""
for i in range(len(s)):
if (i%3) == 1:
s1 += s[i].lstrip().rstrip() + " "
if (i%3) == 2:
s2 += s[i].lstrip().rstrip() + " "
return s1.rstrip(), s2.rstrip()
sections = input_string.split("+\n")[2:4]
section_split = [x.replace("-","").replace("+","").split("|") for x in sections]
print([reform(x) for x in section_split])