我应该使用 Python 编写购物预测代码,我没有收到任何错误,但是当我运行代码时什么也没有发生。有什么问题吗?
我期待看到这段代码的评估生成的值:
我已经多次检查代码,但无法找出问题所在。
import csv
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
TEST_SIZE = 0.4
def main():
# Check command-line arguments
if len(sys.argv) != 2:
sys.exit("Usage: python shopping.py data")
# Load data from spreadsheet and split into train and test sets
evidence, labels = load_data(sys.argv[1])
X_train, X_test, y_train, y_test = train_test_split(
evidence, labels, test_size=TEST_SIZE
)
# Train model and make predictions
model = train_model(X_train, y_train)
predictions = model.predict(X_test)
sensitivity, specificity = evaluate(y_test, predictions)
# Print results
print(f"Correct: {(y_test == predictions).sum()}")
print(f"Incorrect: {(y_test != predictions).sum()}")
print(f"True Positive Rate: {100 * sensitivity:.2f}%")
print(f"True Negative Rate: {100 * specificity:.2f}%")
def load_data(filename):
"""
Load shopping data from a CSV file `filename` and convert into a list of
evidence lists and a list of labels. Return a tuple (evidence, labels).
evidence should be a list of lists, where each list contains the
following values, in order:
1- Administrative, an integer
2- Administrative_Duration, a floating point number
3- Informational, an integer
4- Informational_Duration, a floating point number
5- ProductRelated, an integer
6- ProductRelated_Duration, a floating point number
7- BounceRates, a floating point number
8- ExitRates, a floating point number
9- PageValues, a floating point number
10- SpecialDay, a floating point number
11- Month, an index from 0 (January) to 11 (December)
12- OperatingSystems, an integer
13- Browser, an integer
14- Region, an integer
15- TrafficType, an integer
16- VisitorType, an integer 0 (not returning) or 1 (returning)
17- Weekend, an integer 0 (if false) or 1 (if true)
labels should be the corresponding list of labels, where each label
is 1 if Revenue is true, and 0 otherwise.
"""
data = pd.read_csv('shopping.csv', header=0)
# 11
months = {'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'June': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9,
'Nov': 10, 'Dec': 11}
data.Month = data.Month.map(months)
# 16
data.VisitorType = data.VisitorType.map(lambda x: 1 if x == 'Returning_Visitor' else 0)
# 17
data.Weekend = data.Weekend.map(lambda x: 1 if x == 'TRUE' else 0)
# NOT REQUIRED but done, so I can classify it into INTs
data.Revenue = data.Revenue.map(lambda x: 1 if x == 'TRUE' else 0)
integers = ['Administrative', 'Informational', 'ProductRelated', 'Month', 'OperatingSystems', 'Browser', 'Region',
'TrafficType', 'VisitorType', 'Weekend', 'Revenue']
floats = ['Administrative_Duration', 'Informational_Duration', 'ProductRelated_Duration', 'BounceRates',
'ExitRates',
'PageValues', 'SpecialDay']
for value in integers:
if data[value].dtype != 'int64':
data = data.astype({value: 'int64'})
else:
continue
for value in floats:
if data[value].dtype != 'float64':
data = data.astype({value: 'float64'})
else:
continue
evidence = data.iloc[:, :-1].values.tolist()
labels = data.iloc[:, -1].values.tolist()
if len(evidence) != len(labels):
print('ERROR! Evidence and label lists are not the same length')
else:
print(f'there are {len(evidence)} entries in this database. \n')
return evidence, labels
def train_model(evidence, labels):
"""
Given a list of evidence lists and a list of labels, return a
fitted k-nearest neighbor model (k=1) trained on the data.
"""
model = KNeighborsClassifier(n_neighbors=1)
model.fit(evidence, labels)
return model
def evaluate(labels, predictions):
"""
Given a list of actual labels and a list of predicted labels,
return a tuple (sensitivity, specificity).
Assume each label is either a 1 (positive) or 0 (negative).
`sensitivity` should be a floating-point value from 0 to 1
representing the "true positive rate": the proportion of
actual positive labels that were accurately identified.
`specificity` should be a floating-point value from 0 to 1
representing the "true negative rate": the proportion of
actual negative labels that were accurately identified.
"""
positives = labels.count(1)
negatives = labels.count(0)
sens = 0
spec = 0
for label, pred in zip(labels, predictions):
if label == 1:
if label == pred:
sens += 1
else:
if label == pred:
spec += 1
sensitivity = sens / positives
specificity = spec / negatives
return sensitivity, specificity
if __name__ == "__main__":
main()