为什么这段代码不会带来任何类型的输出?

问题描述 投票:0回答:0

我应该使用 Python 编写购物预测代码,我没有收到任何错误,但是当我运行代码时什么也没有发生。有什么问题吗?

我期待看到这段代码的评估生成的值:

enter image description here

我已经多次检查代码,但无法找出问题所在。

import csv
import sys
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

TEST_SIZE = 0.4


def main():
    # Check command-line arguments
    if len(sys.argv) != 2:
        sys.exit("Usage: python shopping.py data")

    # Load data from spreadsheet and split into train and test sets
    evidence, labels = load_data(sys.argv[1])
    X_train, X_test, y_train, y_test = train_test_split(
        evidence, labels, test_size=TEST_SIZE
    )

    # Train model and make predictions
    model = train_model(X_train, y_train)
    predictions = model.predict(X_test)
    sensitivity, specificity = evaluate(y_test, predictions)

    # Print results
    print(f"Correct: {(y_test == predictions).sum()}")
    print(f"Incorrect: {(y_test != predictions).sum()}")
    print(f"True Positive Rate: {100 * sensitivity:.2f}%")
    print(f"True Negative Rate: {100 * specificity:.2f}%")


def load_data(filename):
    """
    Load shopping data from a CSV file `filename` and convert into a list of
    evidence lists and a list of labels. Return a tuple (evidence, labels).

    evidence should be a list of lists, where each list contains the
    following values, in order:
        1- Administrative, an integer
        2- Administrative_Duration, a floating point number
        3- Informational, an integer
        4- Informational_Duration, a floating point number
        5- ProductRelated, an integer
        6- ProductRelated_Duration, a floating point number
        7- BounceRates, a floating point number
        8- ExitRates, a floating point number
        9- PageValues, a floating point number
        10- SpecialDay, a floating point number
        11- Month, an index from 0 (January) to 11 (December)
        12- OperatingSystems, an integer
        13- Browser, an integer
        14- Region, an integer
        15- TrafficType, an integer
        16- VisitorType, an integer 0 (not returning) or 1 (returning)
        17- Weekend, an integer 0 (if false) or 1 (if true)

    labels should be the corresponding list of labels, where each label
    is 1 if Revenue is true, and 0 otherwise.
    """
    data = pd.read_csv('shopping.csv', header=0)

    # 11
    months = {'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'June': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9,
              'Nov': 10, 'Dec': 11}
    data.Month = data.Month.map(months)

    # 16
    data.VisitorType = data.VisitorType.map(lambda x: 1 if x == 'Returning_Visitor' else 0)

    # 17
    data.Weekend = data.Weekend.map(lambda x: 1 if x == 'TRUE' else 0)

    # NOT REQUIRED but done, so I can classify it into INTs
    data.Revenue = data.Revenue.map(lambda x: 1 if x == 'TRUE' else 0)

    integers = ['Administrative', 'Informational', 'ProductRelated', 'Month', 'OperatingSystems', 'Browser', 'Region',
                'TrafficType', 'VisitorType', 'Weekend', 'Revenue']

    floats = ['Administrative_Duration', 'Informational_Duration', 'ProductRelated_Duration', 'BounceRates',
              'ExitRates',
              'PageValues', 'SpecialDay']

    for value in integers:
        if data[value].dtype != 'int64':
            data = data.astype({value: 'int64'})
        else:
            continue

    for value in floats:
        if data[value].dtype != 'float64':
            data = data.astype({value: 'float64'})
        else:
            continue

    evidence = data.iloc[:, :-1].values.tolist()
    labels = data.iloc[:, -1].values.tolist()

    if len(evidence) != len(labels):
        print('ERROR! Evidence and label lists are not the same length')
    else:
        print(f'there are {len(evidence)} entries in this database. \n')

    return evidence, labels


def train_model(evidence, labels):
    """
    Given a list of evidence lists and a list of labels, return a
    fitted k-nearest neighbor model (k=1) trained on the data.
    """
    model = KNeighborsClassifier(n_neighbors=1)
    model.fit(evidence, labels)
    return model


def evaluate(labels, predictions):
    """
    Given a list of actual labels and a list of predicted labels,
    return a tuple (sensitivity, specificity).

    Assume each label is either a 1 (positive) or 0 (negative).

    `sensitivity` should be a floating-point value from 0 to 1
    representing the "true positive rate": the proportion of
    actual positive labels that were accurately identified.

    `specificity` should be a floating-point value from 0 to 1
    representing the "true negative rate": the proportion of
    actual negative labels that were accurately identified.
    """
    positives = labels.count(1)
    negatives = labels.count(0)

    sens = 0
    spec = 0

    for label, pred in zip(labels, predictions):
        if label == 1:
            if label == pred:
                sens += 1

        else:
            if label == pred:
                spec += 1

    sensitivity = sens / positives
    specificity = spec / negatives

    return sensitivity, specificity


if __name__ == "__main__":
    main()
python pandas artificial-intelligence
© www.soinside.com 2019 - 2024. All rights reserved.