所以我有一个数据框,并编写了一个函数来根据一组条件在新列中添加值。
代码使用了两个数据框
第一个是 merged_df,这是我尝试添加新列的 df,它具有以下上下文属性:
merged_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 889 entries, 0 to 888
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 UNIQUE_ID 889 non-null object
1 REGISTERED_NAME 889 non-null object
2 EMAIL 889 non-null object
3 DBS_CHECK_DATE 889 non-null object
4 EXPIRY_DATE 889 non-null object
5 UNIQUE_ID 889 non-null object
6 Status 889 non-null object
7 Action 889 non-null object
dtypes: object(8)
然后有一个单列数据框,其中的内容是指定条件之一所需要的
SAP_only_EAs.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 UNIQUE_ID 64 non-null object
dtypes: object(1)
这是函数:
import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
def current_month():
return datetime.datetime.now().strftime("%m/%Y")
def get_current_date():
return datetime.datetime.now().strftime("%d/%m/%Y")
def three_month_ahead():
current = datetime.datetime.now()
three_month = current + relativedelta(months=3)
return three_month.strftime("%m/%Y")
def next_month_expiry():
current = datetime.datetime.now()
nextmonth = current + relativedelta(months=1)
return nextmonth.strftime("%m/%Y")
def year_ahead():
current = datetime.datetime.now()
year_on = current + relativedelta(months = 12)
return year_on.strftime("%d/%m/%Y")
#sap_only_eas_set = set(SAP_only_EAs['UNIQUE_ID'].tolist())
def action_col(expiry_date, Status, uniqueID):
three_months_ahead = pd.to_datetime(three_month_ahead(), format='%m/%Y')
next_month = pd.to_datetime(next_month_expiry(), format='%m/%Y')
current_date_today = pd.to_datetime(get_current_date(), format='%d/%m/%Y')
year_on = pd.to_datetime(year_ahead(), format='%d/%m/%Y')
expiry_date = pd.to_datetime(expiry_date, format='%d/%m/%Y') # Convert the expiry_date to datetime
if f"{expiry_date.year}-{expiry_date.month:02}" == f"{three_months_ahead.year}-{three_months_ahead.month:02}":
return 'Send 3 month request'
elif expiry_date.month == next_month.month and expiry_date.year == next_month.year:
return 'Send 1 month reminder'
elif expiry_date < current_date_today and Status == "Not Suspended":
return 'DBS expired: Suspend & update iAdmin notes'
elif (year_on < expiry_date < current_date_today) and Status == "Suspended":
return 'No action needed - correct suspensions in place'
elif uniqueID in SAP_only_EAs['UNIQUE_ID']:
return 'No action needed - SAP only assessor'
elif (expiry_date < year_on) and Status == "Suspended":
return 'DBS expired for over a year – look at whether account closure is appropriate'
else:
return 'No action required – valid DBS check'
merged_df['Action'] = merged_df.apply(lambda row: action_col(row['EXPIRY_DATE'], row['Status'], row['UNIQUE_ID']), axis=1)
所以我遇到的错误是与这部分条件有关
elif uniqueID in SAP_only_EAs['UNIQUE_ID']:
return 'No action needed - SAP only assessor'
当我将其注释掉时,该函数运行正常,但是包含此内容后,我收到此错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
c:\Users\orla_quidos\Code\DBS project\STATUS section of the scheduled job python script.ipynb Cell 9 line 5
48 else:
49 return 'No action required – valid DBS check'
---> 51 merged_df['Action'] = merged_df.apply(lambda row: action_col(row['EXPIRY_DATE'], row['Status'], row['UNIQUE_ID']), axis=1)
File c:\Users\orla_quidos\anaconda3\lib\site-packages\pandas\core\frame.py:9568, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs)
9557 from pandas.core.apply import frame_apply
9559 op = frame_apply(
9560 self,
9561 func=func,
(...)
9566 kwargs=kwargs,
9567 )
-> 9568 return op.apply().__finalize__(self, method="apply")
File c:\Users\orla_quidos\anaconda3\lib\site-packages\pandas\core\apply.py:764, in FrameApply.apply(self)
761 elif self.raw:
762 return self.apply_raw()
--> 764 return self.apply_standard()
File c:\Users\orla_quidos\anaconda3\lib\site-packages\pandas\core\apply.py:891, in FrameApply.apply_standard(self)
890 def apply_standard(self):
--> 891 results, res_index = self.apply_series_generator()
...
--> 371 hash(key)
372 try:
373 key = ensure_python_int(key)
TypeError: unhashable type: 'Series'
非常感谢任何想法,SAP_only_EA 只是 SQL 查询转换为数据帧的一列输出,我不知道如何更改其格式以便函数接受它,我尝试列出(它在这里被注释掉) )和许多其他的事情都无济于事?!
蒂亚! :)
import pandas as pd import numpy as np from datetime import datetime
from dateutil.relativedelta import relativedelta
current_date_today = datetime.today()
three_month_ahead = current_date_today + pd.DateOffset(months=3)
current_month = datetime.now().strftime("%m/%Y")
next_month = current_date_today + pd.DateOffset(months=1)
year = current_date_today + pd.DateOffset(months=12)
expiry_date = pd.to_datetime(merged_df['EXPIRY_DATE'],
format='%d/%m/%Y')
CONDLIST = [(expiry_date.dt.year == three_month_ahead.year) &
(expiry_date.dt.month == three_month_ahead.month),
(expiry_date.dt.month == next_month.month) & (expiry_date.dt.year == next_month.year),
(expiry_date < current_date_today) & (merged_df['Status'] == "Not Suspended"),
((year < expiry_date) & (expiry_date < current_date_today) & (merged_df['Status'] == "Suspended")),
(merged_df['UNIQUE_ID'].isin(SAP_only_EAs['UNIQUE_ID'])),
(expiry_date < year) & (merged_df['Status'] == "Suspended")]
CHOICELIST = ["Send 3 month request",
"Send 1 month reminder",
"DBS expired: Suspend & update iAdmin notes",
"No action needed - correct suspensions in place",
"No action needed - SAP only assessor",
"DBS expired for over a year – look at whether account closure is appropriate"]
merged_df['Action'] = np.select(CONDLIST, CHOICELIST, default = "No
action required – valid DBS check")
Thanks to the guy who suggest I use np.select - my new found love <3 got it to work :)