我在 python 中有一个字符串变量,我需要提取以特定字符串开头和结尾的所有部分。
开始字符串是“proc sql”,结束字符串是“quit;”之间的所有行都应提取到列表中;
row ='''proc sql;
create table answer_1_stg1 as
select distinct a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc
,sum(b.invoice_qty) as sum_invoice
,avg(b.invoice_qty) as avg_invoice
,max(b.invoice_qty) as max_invoice
,min(b.invoice_qty) as min_invoice
from promo as a
left join invoice as b
on
a.CUSTOMER_HIERARCHY_LVL2_CD=b.CUSTOMER_HIERARCHY_LVL2_CD and
a.basecode=b.basecode and
a.SALESORG_CD=b.SALESORG_CD and
a.LOCATION_CD=b.LOCATION_CD
and b.INVOICE_DT between a.event_start_dt and a.event_end_dt
where year(a.event_start_dt) = 2017 and year(b.INVOICE_DT) = 2017
group by a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc;
quit;
%sort(answer_1_stg1,descending sum_invoice);
proc sql;
create table answer_1_stg2 as
select distinct promo_mechanic_desc
,sum(sum_invoice) as total
from answer_1_stg1;
quit;'''
我只需要提取两个字符串“proc sql”和“quit”之间的部分。最终的列表应该是这样的:
lt = ['''proc sql;
create table answer_1_stg1 as
select distinct a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc
,sum(b.invoice_qty) as sum_invoice
,avg(b.invoice_qty) as avg_invoice
,max(b.invoice_qty) as max_invoice
,min(b.invoice_qty) as min_invoice
from promo as a
left join invoice as b
on
a.CUSTOMER_HIERARCHY_LVL2_CD=b.CUSTOMER_HIERARCHY_LVL2_CD and
a.basecode=b.basecode and
a.SALESORG_CD=b.SALESORG_CD and
a.LOCATION_CD=b.LOCATION_CD
and b.INVOICE_DT between a.event_start_dt and a.event_end_dt
where year(a.event_start_dt) = 2017 and year(b.INVOICE_DT) = 2017
group by a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc;
quit;''','''proc sql;
create table answer_1_stg2 as
select distinct promo_mechanic_desc
,sum(sum_invoice) as total
from answer_1_stg1;
quit;''']
我尝试运行以下代码,但它只将第一行打印到列表中:
fm = []
for i in row.split('\n'):
if "proc sql" in i:
print()
fm.append(i.strip())
您需要额外的变量来控制您是否在
proc sql
和 quit
之间
循环设置之前,即
found = False
,
循环中
proc sql
然后设置found = True
(并设置为空fm
)if found: fm.append(i)
quit
然后设置 found = False
(并将 fm
转换为字符串,并添加到 lt
)row ='''proc sql;
create table answer_1_stg1 as
select distinct a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc
,sum(b.invoice_qty) as sum_invoice
,avg(b.invoice_qty) as avg_invoice
,max(b.invoice_qty) as max_invoice
,min(b.invoice_qty) as min_invoice
from promo as a
left join invoice as b
on
a.CUSTOMER_HIERARCHY_LVL2_CD=b.CUSTOMER_HIERARCHY_LVL2_CD and
a.basecode=b.basecode and
a.SALESORG_CD=b.SALESORG_CD and
a.LOCATION_CD=b.LOCATION_CD
and b.INVOICE_DT between a.event_start_dt and a.event_end_dt
where year(a.event_start_dt) = 2017 and year(b.INVOICE_DT) = 2017
group by a.CUSTOMER_HIERARCHY_LVL2_CD
,b.brand_cd
,b.category_cd
,a.promo_mechanic_nm
,a.promo_mechanic_desc;
quit;
%sort(answer_1_stg1,descending sum_invoice);
proc sql;
create table answer_1_stg2 as
select distinct promo_mechanic_desc
,sum(sum_invoice) as total
from answer_1_stg1;
quit;'''
found = False
lt = []
fm = []
for line in row.split('\n'):
line = line.strip()
if "proc sql" in line:
found = True
fm = []
if found:
fm.append(line)
if "quit" in line:
found = False
lt.append( "\n".join(fm) )
for item in lt:
print(item)
print('----')