Python:仅附加第一个 if 语句,不填充列表的其余部分

问题描述 投票:0回答:1

这是我的代码:

import re

def split_text(text):
    lines = text.split("\n")

    forewing_length = []
    biostatus = []
    distribution_and_frequency = []
    habitat_seasonality_behaviour = []
    life_history = []
    notes = []

    for line in lines:
        match = re.match(r"Forewing length: *(.*?(?=Biostatus|Distribution and frequency|Habitat, seasonality and behaviour|Life history|Notes|$))", line)
        if match:
            forewing_length.append(match.group(1))
        elif line.startswith("Biostatus"):
            match = re.match(r"Biostatus: *(.*?(?=Distribution and frequency|Habitat, seasonality and behaviour|Life history|Notes|$))", line)
            if match:
                biostatus.append(match.group(1))
        elif line.startswith("Distribution and frequency"):
            match = re.match(r"Distribution and frequency: *(.*?(?=Habitat, seasonality and behaviour|Life history|Notes|$))", line)
            if match:
                distribution_and_frequency.append(match.group(1))
        elif line.startswith("Habitat, seasonality and behaviour"):
            match = re.match(r"Habitat, seasonality and behaviour: *(.*?(?=Life history|Notes|$))", line)
            if match:
                habitat_seasonality_behaviour.append(match.group(1))
        elif line.startswith("Life history"):
            match = re.match(r"Life history: *(.*?(?=Notes|$))", line)
            if match:
                life_history.append(match.group(1))
        elif line.startswith("Notes"):
            match = re.match(r"Notes: *(.*)", line)
            if match:
                notes.append(match.group(1))

    return forewing_length, biostatus, distribution_and_frequency, habitat_seasonality_behaviour, life_history, notes

text = "Forewing length: 2.5–3.5 mm. Biostatus: Endemic. Distribution and frequency: Northern North Island, as far south as Hawke’s Bay and Taranaki. Often abundant, but inconspicuous. Habitat, seasonality and behaviour: Native forest, especially podocarp forest. Adults may be found from late November to March. They fly by day in sunshine, settling briefly on ferns and other vegetation, and have been observed swarming around flow- ering nikau palm. Life history: Very poorly known; larvae have been extracted from moss and from rotting wood. Notes: The family Micropterigidae is an extremely ancient lineage, and the adults retain functional mandibles, unlike all other adult moths. These mandibles are used to chew fern spores and pollen. There is a diverse fauna of 19 species in this family in New Zealand; they are found mostly in moist forest. The forewing pattern of shining white markings in Zealandopterix zonodoxa is variable."

split_text(text)

输出为:

(['2.5–3.5 mm. '], [], [], [], [], [])

期望的输出是:

(['2.5–3.5 mm.'],
 ['Endemic.'],
 ['Northern North Island, as far south as Hawke’s Bay and Taranaki. Often abundant, but inconspicuous.'],
 ['Native forest, especially podocarp forest. Adults may be found from late November to March. They fly by day in sunshine, settling briefly on ferns and other vegetation, and have been observed swarming around flow- ering nikau palm.'],
 ['Very poorly known; larvae have been extracted from moss and from rotting wood.'],
 ['The family Micropterigidae is an extremely ancient lineage, and the adults retain functional mandibles, unlike all other adult moths. These mandibles are used to chew fern spores and pollen. There is a diverse fauna of 19 species in this family in New Zealand; they are found mostly in moist forest. The forewing pattern of shining white markings in Zealandopterix zonodoxa is variable.'])

我是编码新手,第一次使用 re,我知道这可能效率低下且简陋。我还制作了一个没有正则表达式的版本,只有一堆 if 语句,同样的问题。

python regex string split append
1个回答
0
投票

所做的更改:-

(1)在第

\n
段落中添加
text

(2)

split_text(text)
变为
print(split_text(text))

代码:-

import re

def split_text(text):
    lines = text.split("\n")

    forewing_length = []
    biostatus = []
    distribution_and_frequency = []
    habitat_seasonality_behaviour = []
    life_history = []
    notes = []

    for line in lines:
        match = re.match(r"Forewing length: *(.*?(?=Biostatus|Distribution and frequency|Habitat, seasonality and behaviour|Life history|Notes|$))", line)
        if match:
            forewing_length.append(match.group(1))
        elif line.startswith("Biostatus"):
            match = re.match(r"Biostatus: *(.*?(?=Distribution and frequency|Habitat, seasonality and behaviour|Life history|Notes|$))", line)
            if match:
                biostatus.append(match.group(1))
        elif line.startswith("Distribution and frequency"):
            match = re.match(r"Distribution and frequency: *(.*?(?=Habitat, seasonality and behaviour|Life history|Notes|$))", line)
            if match:
                distribution_and_frequency.append(match.group(1))
        elif line.startswith("Habitat, seasonality and behaviour"):
            match = re.match(r"Habitat, seasonality and behaviour: *(.*?(?=Life history|Notes|$))", line)
            if match:
                habitat_seasonality_behaviour.append(match.group(1))
        elif line.startswith("Life history"):
            match = re.match(r"Life history: *(.*?(?=Notes|$))", line)
            if match:
                life_history.append(match.group(1))
        elif line.startswith("Notes"):
            match = re.match(r"Notes: *(.*)", line)
            if match:
                notes.append(match.group(1))

    return forewing_length, biostatus, distribution_and_frequency, habitat_seasonality_behaviour, life_history, notes

text = "Forewing length: 2.5–3.5 mm.\nBiostatus: Endemic.\nDistribution and frequency: Northern North Island, as far south as Hawke's Bay and Taranaki. Often abundant, but inconspicuous.\nHabitat, seasonality and behaviour: Native forest, especially podocarp forest. Adults may be found from late November to March. They fly by day in sunshine, settling briefly on ferns and other vegetation, and have been observed swarming around flow- ering nikau palm.\nLife history: Very poorly known; larvae have been extracted from moss and from rotting wood.\nNotes: The family Micropterigidae is an extremely ancient lineage, and the adults retain functional mandibles, unlike all other adult moths. These mandibles are used to chew fern spores and pollen. There is a diverse fauna of 19 species in this family in New Zealand; they are found mostly in moist forest. The forewing pattern of shining white markings in Zealandopterix zonodoxa is variable."

print(split_text(text))

输出:-

(['2.5–3.5 mm.'], ['Endemic.'], ["Northern North Island, as far south as Hawke's Bay and Taranaki. Often abundant, but inconspicuous."], ['Native forest, especially podocarp forest. Adults may be found from late November to March. They fly by day in sunshine, settling briefly on ferns and other vegetation, and have been observed swarming around flow- ering nikau palm.'], ['Very poorly known; larvae have been extracted from moss and from rotting wood.'], ['The family Micropterigidae is an extremely ancient lineage, and the adults retain functional mandibles, unlike all other adult moths. These mandibles are used to chew fern spores and pollen. There is a diverse fauna of 19 species in this family in New Zealand; they are found mostly in moist forest. The forewing pattern of shining white markings in Zealandopterix zonodoxa is variable.'])
© www.soinside.com 2019 - 2024. All rights reserved.