我正在尝试进行字符串匹配。但是当我在创建列时遇到此错误。请帮忙。 (AttributeError: 'DataFrame' 对象没有属性 'withColumn')
from pyspark.sql import functions as f
from fuzzywuzzy import fuzz
from pyspark.sql.types import StringType
from pyspark.sql import SparkSession, DataFrame
def matchstring(s1, s2):
return fuzz.token_sort_ratio(s1, s2)
MatchUDF = f.udf(matchstring, StringType())
spark = SparkSession.builder.appName("test").getOrCreate()
df_merged = ps.merge(df_Sale_KR,df_Dist_Mast, on='Distributor_ID', how='left')
df_similarity_score = df_merged.withColumn("similarity_score", MatchUDF(f.col("source"), f.col("target")))
df_similarity_score.show()