aboutsummaryrefslogblamecommitdiffstats
path: root/marionette.py
blob: 9b8a6e772d3ab8bdd23643610782ee3a7bfc6795 (plain) (tree)
1
2
                  
                      



















                                                                    
 



                                     
 



                                                                





                                                                     

                                       
 
                         
                                                                                    






                       




                                               

                                                                                       


                                           

                     

                          








                                                          










                                                                     


                                                              


















                                                                       
 


                                                   
          
                        
 
 












                                                      
 

                          
#!/usr/bin/python3
# _*_ coding=utf-8 _*_

import argparse
import code
import readline
import signal
import sys
import pandas
from pandas import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC


def SigHandler_SIGINT(signum, frame):
    print()
    sys.exit(0)


class Argparser(object):
    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument("--string", type=str, help="string")
        parser.add_argument(
            "--bool", action="store_true", help="bool", default=False
        )
        parser.add_argument(
            "--dbg", action="store_true", help="debug", default=False
        )
        self.args = parser.parse_args()


def marrionette_type_1():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    names = [
        "sepal-length",
        "sepal-width",
        "petal-length",
        "petal-width",
        "class",
    ]
    dataset = pandas.read_csv(url, names=names)
    print(dataset.shape)
    print(dataset.head(20))
    print(dataset.describe())
    print(dataset.groupby("class").size())
    # dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False)
    # dataset.hist()
    pandas.plotting.scatter_matrix(dataset)
    plt.show()
    array = dataset.values
    X = array[:, 0:4]
    Y = array[:, 4]
    validation_size = 0.20
    seed = 7
    (
        X_train,
        X_validation,
        Y_train,
        Y_validation,
    ) = model_selection.train_test_split(
        X, Y, test_size=validation_size, random_state=seed
    )
    scoring = "accuracy"
    models = []
    models.append(("LR", LogisticRegression()))
    models.append(("LDA", LinearDiscriminantAnalysis()))
    models.append(("KNN", KNeighborsClassifier()))
    models.append(("CART", DecisionTreeClassifier()))
    models.append(("NB", GaussianNB()))
    models.append(("SVM", SVC()))
    results = []
    names = []
    for name, model in models:
        kfold = model_selection.KFold(n_splits=10, random_state=seed)
        cv_results = model_selection.cross_val_score(
            model, X_train, Y_train, cv=kfold, scoring=scoring
        )
        results.append(cv_results)
        names.append(name)
        msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std())
        print(msg)
    fig = plt.figure()
    fig.suptitle("algorithm comparison")
    ax = fig.add_subplot(111)
    plt.boxplot(results)
    ax.set_xticklabels(names)
    plt.show()

    # knn
    knn = KNeighborsClassifier()
    knn.fit(X_train, Y_train)
    predictions = knn.predict(X_validation)
    print(accuracy_score(Y_validation, predictions))
    print(confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))


# write code here
def premain(argparser):
    signal.signal(signal.SIGINT, SigHandler_SIGINT)
    # here
    marrionette_type_1()


def main():
    argparser = Argparser()
    if argparser.args.dbg:
        try:
            premain(argparser)
        except:
            variables = globals().copy()
            variables.update(locals())
            shell = code.InteractiveConsole(variables)
            shell.interact(banner="DEBUG REPL")
    else:
        premain(argparser)


if __name__ == "__main__":
    main()