#!/usr/bin/python3
# _*_ coding=utf-8 _*_
import argparse
import code
import readline
import signal
import sys
import pandas
from pandas import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
def SigHandler_SIGINT(signum, frame):
print()
sys.exit(0)
class Argparser(object):
def __init__(self):
parser = argparse.ArgumentParser()
parser.add_argument("--string", type=str, help="string")
parser.add_argument(
"--bool", action="store_true", help="bool", default=False
)
parser.add_argument(
"--dbg", action="store_true", help="debug", default=False
)
self.args = parser.parse_args()
def marrionette_type_1():
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = [
"sepal-length",
"sepal-width",
"petal-length",
"petal-width",
"class",
]
dataset = pandas.read_csv(url, names=names)
print(dataset.shape)
print(dataset.head(20))
print(dataset.describe())
print(dataset.groupby("class").size())
# dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False)
# dataset.hist()
pandas.plotting.scatter_matrix(dataset)
plt.show()
array = dataset.values
X = array[:, 0:4]
Y = array[:, 4]
validation_size = 0.20
seed = 7
(
X_train,
X_validation,
Y_train,
Y_validation,
) = model_selection.train_test_split(
X, Y, test_size=validation_size, random_state=seed
)
scoring = "accuracy"
models = []
models.append(("LR", LogisticRegression()))
models.append(("LDA", LinearDiscriminantAnalysis()))
models.append(("KNN", KNeighborsClassifier()))
models.append(("CART", DecisionTreeClassifier()))
models.append(("NB", GaussianNB()))
models.append(("SVM", SVC()))
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(
model, X_train, Y_train, cv=kfold, scoring=scoring
)
results.append(cv_results)
names.append(name)
msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
fig = plt.figure()
fig.suptitle("algorithm comparison")
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
# knn
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
# write code here
def premain(argparser):
signal.signal(signal.SIGINT, SigHandler_SIGINT)
# here
marrionette_type_1()
def main():
argparser = Argparser()
if argparser.args.dbg:
try:
premain(argparser)
except:
variables = globals().copy()
variables.update(locals())
shell = code.InteractiveConsole(variables)
shell.interact(banner="DEBUG REPL")
else:
premain(argparser)
if __name__ == "__main__":
main()