我不明白我写的这个正则表达式出了什么问题。
import re
# Define the input strings
inputs = [
"exmpl-staging-1234-e2e-1707336983872",
"exmpl-staging-1234-e2e-1706336983875",
"exmpl-staging-main-e2e-1707336983878",
"exmpl-demo-e2e-1707336983878",
"exmpl-production-e2e-1707336983875",
"exmpl-staging-2345",
"exmpl-staging-1234",
"exmpl-staging-1234-my-case-title",
"exmpl-staging-1234-my-case-title-e2e-1707336983872"
]
# Define the regex pattern
pattern = re.compile(r'^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+))?(?:-e2e-(?P<timestamp>\d+))?$')
# Initialize a list to store the extracted data
extracted_data = []
# Loop through the input strings
for input_str in inputs:
# Match the pattern against the input string
match = pattern.match(input_str)
# Extract the required information
if match:
extracted_data.append({
'Input': input_str,
'Type': match.group('type'),
'Case': match.group('case') if match.group('case') else 'none',
'Title': match.group('title') if match.group('title') else 'none',
'Timestamp': match.group('timestamp') if match.group('timestamp') else 'none'
})
else:
extracted_data.append({
'Input': input_str,
'Type': 'No match found',
'Case': 'No match found',
'Title': 'No match found',
'Timestamp': 'No match found'
})
print("| Input | Type | Case | Title | Timestamp |")
print("|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|")
for data in extracted_data:
print("| {:<50} | {:<10} | {:<7} | {:<36} | {:<15} |".format(data['Input'], data['Type'], data['Case'], data['Title'], data['Timestamp']))
这是它提供的输出:
| Input | Type | Case | Title | Timestamp |
|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|
| exmpl-staging-1234-e2e-1707336983872 | staging | 1234 | none | 1707336983872 |
| exmpl-staging-1234-e2e-1706336983875 | staging | 1234 | none | 1706336983875 |
| exmpl-staging-main-e2e-1707336983878 | staging | main | none | 1707336983878 |
| exmpl-demo-e2e-1707336983878 | demo | none | none | 1707336983878 |
| exmpl-production-e2e-1707336983875 | production | none | none | 1707336983875 |
| exmpl-staging-2345 | staging | 2345 | none | none |
| exmpl-staging-1234 | staging | 1234 | none | none |
| exmpl-staging-1234-my-case-title | staging | 1234 | my-case-title | none |
| exmpl-staging-1234-my-case-title-e2e-1707336983872 | staging | 1234 | my-case-title-e2e-1707336983872 | none |
它按预期工作,直到最后一个
input
,其中 timestamp
为空,并且 timestamp
被错误地捕获为 title
组的一部分。我在这里做错了什么?
使title组变得非贪婪(Regex101):
^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+?))?(?:-e2e-(?P<timestamp>\d+))?$
import re
# Define the input strings
inputs = [
"exmpl-staging-1234-e2e-1707336983872",
"exmpl-staging-1234-e2e-1706336983875",
"exmpl-staging-main-e2e-1707336983878",
"exmpl-demo-e2e-1707336983878",
"exmpl-production-e2e-1707336983875",
"exmpl-staging-2345",
"exmpl-staging-1234",
"exmpl-staging-1234-my-case-title",
"exmpl-staging-1234-my-case-title-e2e-1707336983872",
]
# Define the regex pattern
pattern = re.compile(
r"^exmpl-(?P<type>main|staging|demo|production)(?:-(?P<case>\d+|main))?(?:-(?P<title>(?!e2e-\d+).+?))?(?:-e2e-(?P<timestamp>\d+))?$"
)
# Initialize a list to store the extracted data
extracted_data = []
# Loop through the input strings
for input_str in inputs:
# Match the pattern against the input string
match = pattern.match(input_str)
# Extract the required information
if match:
extracted_data.append(
{
"Input": input_str,
"Type": match.group("type"),
"Case": match.group("case") if match.group("case") else "none",
"Title": match.group("title") if match.group("title") else "none",
"Timestamp": match.group("timestamp")
if match.group("timestamp")
else "none",
}
)
else:
extracted_data.append(
{
"Input": input_str,
"Type": "No match found",
"Case": "No match found",
"Title": "No match found",
"Timestamp": "No match found",
}
)
print(
"| Input | Type | Case | Title | Timestamp |"
)
print(
"|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|"
)
for data in extracted_data:
print(
"| {:<50} | {:<10} | {:<7} | {:<36} | {:<15} |".format(
data["Input"], data["Type"], data["Case"], data["Title"], data["Timestamp"]
)
)
打印:
| Input | Type | Case | Title | Timestamp |
|----------------------------------------------------|------------|---------|--------------------------------------|-----------------|
| exmpl-staging-1234-e2e-1707336983872 | staging | 1234 | none | 1707336983872 |
| exmpl-staging-1234-e2e-1706336983875 | staging | 1234 | none | 1706336983875 |
| exmpl-staging-main-e2e-1707336983878 | staging | main | none | 1707336983878 |
| exmpl-demo-e2e-1707336983878 | demo | none | none | 1707336983878 |
| exmpl-production-e2e-1707336983875 | production | none | none | 1707336983875 |
| exmpl-staging-2345 | staging | 2345 | none | none |
| exmpl-staging-1234 | staging | 1234 | none | none |
| exmpl-staging-1234-my-case-title | staging | 1234 | my-case-title | none |
| exmpl-staging-1234-my-case-title-e2e-1707336983872 | staging | 1234 | my-case-title | 1707336983872 |