import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
test=pd.read_csv('test.csv')
train=pd.read_csv('train.csv')
y=train['Survived'].reset_index(drop=True)
train_features = train.drop(['Survived'], axis=1)
test_features = test
features = pd.concat([train_features, test_features]).reset_index(drop=True)
features= features.drop(['Name'],axis=1)
features= features.drop(['Ticket'],axis=1)
features= features.drop(['Cabin'],axis=1)
features.isna().sum()
from sklearn.preprocessing import Imputer
imp = Imputer()
features.iloc[:, [3,6]] = imp.fit_transform(features.iloc[:,[3,6]].values)
features['Embarked'] = features['Embarked'].fillna('S')
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
['Sex']=le.fit_transform(features['Sex'])
features['Embarked']=le.fit_transform(features['Embarked'])
from sklearn.preprocessing import OneHotEncoder
one = OneHotEncoder()
features = one.fit_transform(features)
features = pd.DataFrame(list(features))
train = features.iloc[:len(y),:]
test=features.iloc[len(y):,:]
from sklearn.linear_model import LogisticRegression
lin_reg = LogisticRegression()
lin_reg.fit(train,y)
我收到以下错误:
File "<ipython-input-1-e57e780501e5>", line 56, in <module>
lin_reg.fit(train,y)
File
"C:\Users\USER\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py",
line 1220, in fit
order="C")
File
"C:\Users\USER\Anaconda3\lib\site-packages\sklearn\utils\validation.py",
line 573, in check_X_y
ensure_min_features, warn_on_dtype, estimator)
File
"C:\Users\USER\Anaconda3\lib\site-packages\sklearn\utils\validation.py",
line 433, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: setting an array element with a sequence.
In [53]: np.array(np.array([[1,2,3],[3,4]]))
Out[53]: array([list([1, 2, 3]), list([3, 4])], dtype=object)
In [54]: np.array(np.array([[1,2,3],[3,4]]),int)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: int() argument must be a string, a bytes-like object or a number, not 'list'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-54-8edc6222b80d> in <module>
----> 1 np.array(np.array([[1,2,3],[3,4]]),int)
ValueError: setting an array element with a sequence.
内部数组包含不同大小的列表,从而产生对象dtype数组。如果尝试使用int
dtype创建新数组,则会收到错误消息。您需要更仔细地观察
train
和y
。他们的shapes
和dtype
是什么。如果其中一个是object
dtype,或者形状是意外的(例如1d而不是2d),则需要查看其生成方式。