from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('magenta', 'blue')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('magenta', 'blue'))(i), label = j)


ValueError                                Traceback (most recent call last)
Cell In[69], line 6
      3 X_set, y_set = X_test, y_test
      4 X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
      5                      np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
----> 6 plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
      7              alpha = 0.75, cmap = ListedColormap(('magenta', 'blue')))
      8 plt.xlim(X1.min(), X1.max())
      9 plt.ylim(X2.min(), X2.max())

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_base.py:419, in LinearClassifierMixin.predict(self, X)
    405 """
    406 Predict class labels for samples in X.
    416     Vector containing the class labels for each sample.
    417 """
    418 xp, _ = get_namespace(X)
--> 419 scores = self.decision_function(X)
    420 if len(scores.shape) == 1:
    421     indices = xp.astype(scores > 0, int)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_base.py:400, in LinearClassifierMixin.decision_function(self, X)
    397 check_is_fitted(self)
    398 xp, _ = get_namespace(X)
--> 400 X = self._validate_data(X, accept_sparse="csr", reset=False)
    401 scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
    402 return xp.reshape(scores, -1) if scores.shape[1] == 1 else scores

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py:588, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
    585     out = X, y
    587 if not no_val_X and check_params.get("ensure_2d", True):
--> 588     self._check_n_features(X, reset=reset)
    590 return out

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py:389, in BaseEstimator._check_n_features(self, X, reset)
    386     return
    388 if n_features != self.n_features_in_:
--> 389     raise ValueError(
    390         f"X has {n_features} features, but {self.__class__.__name__} "
    391         f"is expecting {self.n_features_in_} features as input."
    392     )

ValueError: X has 2 features, but LogisticRegression is expecting 6 features as input.




    import pandas as pd
    import numpy as np
    import seaborn as sns
    import matplotlib.pyplot as plt
    training_set = pd.read_csv('Train_Titanic.csv')
    sns.heatmap(training_set.isnull(), yticklabels=False, cbar=False, cmap='Blues')
    #if we want to dop Cabin column from the memory we set implace=True 
    training_set.drop('Cabin', axis=1, inplace=True)
    training_set.drop(['Name','Ticket','Embarked'], axis=1, inplace=True)
    def Fill_Age(data):
        age = data[0]
        sex = data[1]
        if pd.isnull(age):        #if the data has null
            if sex is 'male':
                return 29         #This is average from the above boxplot of male
            else:                 #This means if the sex is female 
                return 25         #This is average from the above boxplot of female
            return age            #This will return the same age if it isn't null
    training_set['Age'] = training_set[['Age','Sex']].apply(Fill_Age, axis=1)
    training_set.drop('PassengerId', axis=1, inplace=True)
    male = pd.get_dummies(training_set['Sex'])
    male = pd.get_dummies(training_set['Sex'], drop_first=True)
    male = pd.get_dummies(training_set['Sex'], drop_first=True, dtype=int)
    #Let's drop Sex column from our original data
    training_set.drop('Sex', axis=1, inplace=True)
    #Now, let's add the male column that we have created to the original dataset
    training_set = pd.concat([training_set, male], axis=1)

#Now let's take our data and assign it to X (input) and y(output)
X = training_set.drop('Survived', axis=1).values

y = training_set['Survived'].values

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test= train_test_split(X,y, test_size = 0.2, random_state = 10)
    from sklearn.linear_model import LogisticRegression # We import class (LogisticRegression)
    classifier = LogisticRegression(random_state=0)     # We took an object from the class
    #We are performing our training
    y_predict = classifier.predict(X_test)
确保您的输入数据(特别是 X)包含您打算用于预测的所有特征。在这种情况下,它应至少包含以下列:“Pclass”、“Sex”、“Age”、“SibSp”、“Parch”和“Fare”。 它应该具有与逻辑回归模型期望的相同数量的特征。如果您的模型需要 6 个特征,则需要提供具有 6 个特征的输入数据。 如果您正在使用逻辑回归模型并且预计它只有 2 个特征,那么模型配置或初始化可能存在问题。 确认您的输入数据已正确预处理。 确保您使用适当的训练和测试数据集。如果您使用错误的数据集或数据形状不匹配,则可能会出现错误。


