#!/usr/bin/python3 # _*_ coding=utf-8 _*_ import argparse import code import readline import signal import sys import pandas from pandas import scatter_matrix import matplotlib.pyplot as plt from sklearn import model_selection from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC def SigHandler_SIGINT(signum, frame): print() sys.exit(0) class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") parser.add_argument("--bool", action="store_true", help="bool", default=False) parser.add_argument("--dbg", action="store_true", help="debug", default=False) self.args = parser.parse_args() def marrionette_type_1(): url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"] dataset = pandas.read_csv(url, names=names) print(dataset.shape) print(dataset.head(20)) print(dataset.describe()) print(dataset.groupby("class").size()) #dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) #dataset.hist() pandas.plotting.scatter_matrix(dataset) plt.show() array = dataset.values X = array[:,0:4] Y = array[:,4] validation_size = 0.20 seed = 7 X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size, random_state=seed) scoring="accuracy" models = [] models.append(("LR", LogisticRegression())) models.append(("LDA", LinearDiscriminantAnalysis())) models.append(("KNN", KNeighborsClassifier())) models.append(("CART", DecisionTreeClassifier())) models.append(("NB", GaussianNB())) models.append(("SVM", SVC())) results = [] names = [] for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std()) print(msg) fig = plt.figure() fig.suptitle("algorithm comparison") ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.show() # knn knn = KNeighborsClassifier() knn.fit(X_train, Y_train) predictions = knn.predict(X_validation) print(accuracy_score(Y_validation, predictions)) print(confusion_matrix(Y_validation, predictions)) print(classification_report(Y_validation, predictions)) # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) #here marrionette_type_1() def main(): argparser = Argparser() if argparser.args.dbg: try: premain(argparser) except: variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) shell.interact(banner="DEBUG REPL") else: premain(argparser) if __name__ == "__main__": main()