aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbloodstalker <thabogre@gmail.com>2018-09-01 15:11:26 +0000
committerbloodstalker <thabogre@gmail.com>2018-09-01 15:11:26 +0000
commit3040b01a62da872195421bd210b8e0720f4a0d4b (patch)
tree9e96d21ad3b2a181beb67c060076831732d32246
parentupdate (diff)
downloadseer-3040b01a62da872195421bd210b8e0720f4a0d4b.tar.gz
seer-3040b01a62da872195421bd210b8e0720f4a0d4b.zip
update
-rwxr-xr-xmarionette.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/marionette.py b/marionette.py
new file mode 100755
index 0000000..2222940
--- /dev/null
+++ b/marionette.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python3
+
+import argparse
+import code
+import readline
+import signal
+import sys
+import pandas
+from pandas import scatter_matrix
+import matplotlib.pyplot as plt
+from sklearn import model_selection
+from sklearn.metrics import classification_report
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import accuracy_score
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import SVC
+
+def SigHandler_SIGINT(signum, frame):
+ print()
+ sys.exit(0)
+
+class Argparser(object):
+ def __init__(self):
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--string", type=str, help="string")
+ parser.add_argument("--bool", action="store_true", help="bool", default=False)
+ parser.add_argument("--dbg", action="store_true", help="debug", default=False)
+ self.args = parser.parse_args()
+
+# write code here
+def premain(argparser):
+ signal.signal(signal.SIGINT, SigHandler_SIGINT)
+ #here
+ url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
+ names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"]
+ dataset = pandas.read_csv(url, names=names)
+ print(dataset.shape)
+ print(dataset.head(20))
+ print(dataset.describe())
+ print(dataset.groupby("class").size())
+ #dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False)
+ #dataset.hist()
+ pandas.plotting.scatter_matrix(dataset)
+ plt.show()
+ array = dataset.values
+ X = array[:,0:4]
+ Y = array[:,4]
+ validation_size = 0.20
+ seed = 7
+ X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size, random_state=seed)
+ scoring="accuracy"
+ models = []
+ models.append(("LR", LogisticRegression()))
+ models.append(("LDA", LinearDiscriminantAnalysis()))
+ models.append(("KNN", KNeighborsClassifier()))
+ models.append(("CART", DecisionTreeClassifier()))
+ models.append(("NB", GaussianNB()))
+ models.append(("SVM", SVC()))
+ results = []
+ names = []
+ for name, model in models:
+ kfold = model_selection.KFold(n_splits=10, random_state=seed)
+ cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
+ results.append(cv_results)
+ names.append(name)
+ msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std())
+ print(msg)
+ fig = plt.figure()
+ fig.suptitle("algorithm comparison")
+ ax = fig.add_subplot(111)
+ plt.boxplot(results)
+ ax.set_xticklabels(names)
+ plt.show()
+
+ # knn
+ knn = KNeighborsClassifier()
+ knn.fit(X_train, Y_train)
+ predictions = knn.predict(X_validation)
+ print(accuracy_score(Y_validation, predictions))
+ print(confusion_matrix(Y_validation, predictions))
+ print(classification_report(Y_validation, predictions))
+
+def main():
+ argparser = Argparser()
+ if argparser.args.dbg:
+ try:
+ premain(argparser)
+ except:
+ variables = globals().copy()
+ variables.update(locals())
+ shell = code.InteractiveConsole(variables)
+ shell.interact(banner="DEBUG REPL")
+ else:
+ premain(argparser)
+
+if __name__ == "__main__":
+ main()