import pandas as pd
data = pd.read_csv('transactions(1).csv')
userTransactionsDF = pd.DataFrame(data, columns=['Date', 'Original Description', 'Amount', 'Category',
'Account Name'])
print(userTransactionsDF.head(1))
# output
# Date Original Description ... Category Account Name
# 0 4/14/2023 PETCO 304 ... Pet Food & Supplies DISCOVER IT CHROME CARD
data2 = pd.read_csv('recommended_categories.csv')
recommendedCategoriesDF = pd.DataFrame(data2)
print(recommendedCategoriesDF .head(1))
# output
# Unnamed: 0 HOUSING TRANSPORT FOOD ... INCOME ENTERTAINMENT SERVICES Other
# 0 0 NaN shell qdoba ... NaN nintendo NaN NaN
如果
userTransactionsDF['Original Description'][0]
与recommendedCategoriesDF
中的任何值匹配
在 recommendedCategoriesDF
中的值名称的两侧使用 '.+' 通配符,然后我想返回与通配符匹配的值所在的列名称
例如如果
['Original Description'][0]
输出'POS WD Nintendo CA877180016 8'
然后想要它匹配'nintendo'
在recommendedCategoriesDF并返回列名'ENTERTAINMENT'
我尝试了以下将推荐的类别数据框转换为字典并返回键,如果该值与字典中的任何值匹配并带有通配符。我想我离基地很远……
for j in userTransactionsDF['Original Description']:
for p in userTransactionsDF['Amount']:
print('original description = ' + j)
#need to implement re to implement wildcard in below logical statement
if (i for i in categories_dict if re.search('.+' + str(categories_dict[i]) + '.+', j)):
value = {i for i in categories_dict if re.search('.+' + str(categories_dict[i]) + '.+', j)}
print('value = ' + str(value))
categorized_Transactions.loc[len(categorized_Transactions)] = pandas.Series({value: p})
else: categorized_Transactions.loc[len(categorized_Transactions)] = pandas.Series({'Other': p})