diff options
author | bloodstalker <thabogre@gmail.com> | 2018-09-01 15:11:26 +0000 |
---|---|---|
committer | bloodstalker <thabogre@gmail.com> | 2018-09-01 15:11:26 +0000 |
commit | 3040b01a62da872195421bd210b8e0720f4a0d4b (patch) | |
tree | 9e96d21ad3b2a181beb67c060076831732d32246 | |
parent | update (diff) | |
download | seer-3040b01a62da872195421bd210b8e0720f4a0d4b.tar.gz seer-3040b01a62da872195421bd210b8e0720f4a0d4b.zip |
update
-rwxr-xr-x | marionette.py | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/marionette.py b/marionette.py new file mode 100755 index 0000000..2222940 --- /dev/null +++ b/marionette.py @@ -0,0 +1,101 @@ +#!/usr/bin/python3 + +import argparse +import code +import readline +import signal +import sys +import pandas +from pandas import scatter_matrix +import matplotlib.pyplot as plt +from sklearn import model_selection +from sklearn.metrics import classification_report +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.naive_bayes import GaussianNB +from sklearn.svm import SVC + +def SigHandler_SIGINT(signum, frame): + print() + sys.exit(0) + +class Argparser(object): + def __init__(self): + parser = argparse.ArgumentParser() + parser.add_argument("--string", type=str, help="string") + parser.add_argument("--bool", action="store_true", help="bool", default=False) + parser.add_argument("--dbg", action="store_true", help="debug", default=False) + self.args = parser.parse_args() + +# write code here +def premain(argparser): + signal.signal(signal.SIGINT, SigHandler_SIGINT) + #here + url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" + names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"] + dataset = pandas.read_csv(url, names=names) + print(dataset.shape) + print(dataset.head(20)) + print(dataset.describe()) + print(dataset.groupby("class").size()) + #dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) + #dataset.hist() + pandas.plotting.scatter_matrix(dataset) + plt.show() + array = dataset.values + X = array[:,0:4] + Y = array[:,4] + validation_size = 0.20 + seed = 7 + X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size, random_state=seed) + scoring="accuracy" + models = [] + models.append(("LR", LogisticRegression())) + models.append(("LDA", LinearDiscriminantAnalysis())) + models.append(("KNN", KNeighborsClassifier())) + models.append(("CART", DecisionTreeClassifier())) + models.append(("NB", GaussianNB())) + models.append(("SVM", SVC())) + results = [] + names = [] + for name, model in models: + kfold = model_selection.KFold(n_splits=10, random_state=seed) + cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) + results.append(cv_results) + names.append(name) + msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std()) + print(msg) + fig = plt.figure() + fig.suptitle("algorithm comparison") + ax = fig.add_subplot(111) + plt.boxplot(results) + ax.set_xticklabels(names) + plt.show() + + # knn + knn = KNeighborsClassifier() + knn.fit(X_train, Y_train) + predictions = knn.predict(X_validation) + print(accuracy_score(Y_validation, predictions)) + print(confusion_matrix(Y_validation, predictions)) + print(classification_report(Y_validation, predictions)) + +def main(): + argparser = Argparser() + if argparser.args.dbg: + try: + premain(argparser) + except: + variables = globals().copy() + variables.update(locals()) + shell = code.InteractiveConsole(variables) + shell.interact(banner="DEBUG REPL") + else: + premain(argparser) + +if __name__ == "__main__": + main() |