我尝试创建逻辑回归,但收到这样的错误:“ValueError:无法将字符串转换为浮点数:'F. Robertson'”。我删除了带有玩家姓名的列。
fifa_df_2.drop(columns = ['short_name','long_name','dob', 'nationality_name'])
from sklearn.model_selection import train_test_split
x = fifa_df_2.drop('player_positions', axis = 1)
y = fifa_df_2['player_positions']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=27)
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler
oversample = RandomOverSampler(sampling_strategy='minority')
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = oversample.fit_resample(x_train, y_train)
x_train, y_train = oversample.fit_resample(x_train, y_train)`
`from sklearn.preprocessing import OneHotEncoder
categorical_columns = ['preferred_foot', 'work_rate', 'body_type']
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(fifa_df_2[categorical_columns])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_columns))
fifa_df_2 = pd.concat([fifa_df_2.drop(categorical_columns, axis=1), encoded_df], axis=1)
from fast_ml.model_development import train_valid_test_split
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
def split_positions(positions):
return positions.split(', ') if isinstance(positions, str) else positions
x = fifa_df_2.drop('player_positions', axis = 1)
y = fifa_df_2['player_positions']
x_train, y_train, x_valid, y_valid, x_test, y_test = train_valid_test_split(fifa_df_2, target = 'player_positions',
train_size = 0.8,
valid_size = 0.1,
test_size = 0.1 )
y_train = y_train.apply(split_positions)
y_valid = y_valid.apply(split_positions)
y_test = y_test.apply(split_positions)
all_positions = set(y_train.explode().unique().tolist() +
y_valid.explode().unique().tolist() +
y_test.explode().unique().tolist())
label_encoder.fit(list(all_positions))
y_train = y_train.apply(lambda x: split_positions(x) if isinstance(x, (str, float)) else x)
y_valid = y_valid.apply(lambda x: split_positions(x) if isinstance(x, (str, float)) else x)
y_test = y_test.apply(lambda x: split_positions(x) if isinstance(x, (str, float)) else x)
我尝试使用热编码来列“short_name”,但也收到了这样的错误
您忘记分配给变量
fifa_df_2 = ...
fifa_df_2 = fifa_df_2.drop(columns=['short_name','long_name','dob', 'nationality_name'])
或者你应该使用
inplace=True
fifa_df_2.drop(inplace=True, columns=['short_name','long_name','dob', 'nationality_name'])