abbreviation_mapping = {
"E": "Europe",
"A": "US/Canada",
"S": "South America",
"O": "Australia",
"Z": "New Zealand",
"N": "New Delhi/Kolkata",
"U": "New Delhi/Kolkata",
"": "New Delhi/Kolkata"
# Add more mappings as needed
}
def abbreviation_column(self,df,abbreviation_mapping,col_name):
"""
This function will replace abbreviation with long form
:param df: input dataframe
:param abbreviation_mapping: Give abbreviation in the form of dictionary
:param col_name: column name on which abbreviation_column need to apply
:return: df
"""
try:
df = df.withColumn(
col_name,
when(cast(col(col_name).isin(abbreviation_mapping.keys()),int)| (col(col_name) == ""),
col(col_name).replace("", "New Delhi/Kolkata").replace(*abbreviation_mapping.items()))
.otherwise(col(col_name))
)
except Exception as e:
raise Exception(f"Error: While running abbreviation_column_method. Failed with exception: {e}")
return df
您可以使用udf来实现此目的,请参阅下面的代码
abbreviation_mapping = {
"E": "Europe",
"A": "US/Canada",
"S": "South America",
"O": "Australia",
"Z": "New Zealand",
"N": "New Delhi/Kolkata",
"U": "New Delhi/Kolkata",
"": "New Delhi/Kolkata"
# Add more mappings as needed
}
abbreviationUDF = udf(lambda z: abbreviation_mapping[short_form])
def abbreviation_column(df,abbreviation_mapping,col_name):
"""
This function will replace abbreviation with long form
:param df: input dataframe
:param abbreviation_mapping: Give abbreviation in the form of dictionary
:param col_name: column name on which abbreviation_column need to apply
:return: df
"""
try:
df = df.withColumn(col_name, abbreviationUDF(col(col_name)))
except Exception as e:
raise Exception(f"Error: While running abbreviation_column_method. Failed with exception: {e}")
return df
df2 = abbreviation_column(df, abbreviation_mapping, "city")