From 02c8661250be26dc35b71c7fa9fb0f2eb9890b44 Mon Sep 17 00:00:00 2001 From: terminaldweller Date: Thu, 27 Jan 2022 21:21:54 +0330 Subject: black and update --- ACO.py | 107 ++++++++++++++++++++ PSO.py | 173 ++++++++++++++++++++++++++++++++ SA.py | 97 ++++++++++++++++++ cnn.py | 285 +++++++++++++++++++++++++++++++++++------------------ digester.py | 90 +++++++++++++---- lstm.py | 311 +++++++++++++++++++++++++++++++++++++++++++++------------- marionette.py | 47 ++++++--- seer.py | 165 ++++++++++++++++++++++++------- stock.py | 23 ++++- tfann.py | 127 ++++++++++++++---------- 10 files changed, 1140 insertions(+), 285 deletions(-) create mode 100755 ACO.py create mode 100755 PSO.py create mode 100755 SA.py diff --git a/ACO.py b/ACO.py new file mode 100755 index 0000000..3ec1ffb --- /dev/null +++ b/ACO.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +import math +import time +import random + + +Nodes = [ + [0, 10, 20, 30, 40], + [10, 0, 20, 30, 40], + [20, 20, 0, 30, 40], + [30, 30, 30, 0, 40], + [40, 40, 40, 40, 0], +] + +Pheromone = [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], +] + + +class Ant: + def __init__(self, node): + self.node = node + self.route = int() + self.exclusion_list = list() + + def __repr__(self): + return "{node: %s}" % (self.node) + + def __str__(self): + return "{node: %s}" % (self.node) + + def nextHop(self): + n = math.ceil(random.random() * len(Nodes)) - 1 + if ( + n not in self.exclusion_list + and n != self.exclusion_list[-1] + and Nodes[self.exclusion_list[-1]][n] != 0 + ): + self.exclusion_list.append(n) + return n + else: + if len(self.exclusion_list) == len(Nodes): + return None + else: + self.nextHop() + + def nextHop2(self): + pass + + def walk(self): + self.exclusion_list.append(self.node) + for _ in range(0, len(Nodes)): + next = self.nextHop() + if next is not None: + self.route += Nodes[self.node][next] + self.node = next + + def printRoute(self): + print(self.exclusion_list) + + +class ACO: + def __init__(self, ant_count): + self.ant_count = ant_count + self.Ants = self.factory() + + def factory(self): + result = list() + for _ in range(0, self.ant_count): + node = math.ceil(random.random() * 5) - 1 + result.append(Ant(node)) + return result + + def fitness(self): + pass + + def run(self): + for ant in self.Ants: + ant.walk() + ant.printRoute() + for i in range(0, len(Nodes) - 1): + Pheromone[ant.exclusion_list[i]][ant.exclusion_list[i + 1]] = ( + 1 / ant.route + ) + + +def main(): + random.seed(time.time()) + rho = 0 + alpha = 1 + beta = 1 + aco = ACO(5) + # for ant in aco.Ants: + # print("ant:", ant) + aco.run() + for i in range(0, len(Pheromone)): + for j in range(0, len(Pheromone)): + print(Pheromone[i][j], end=" ") + print() + + +if __name__ == "__main__": + main() diff --git a/PSO.py b/PSO.py new file mode 100755 index 0000000..0d019cd --- /dev/null +++ b/PSO.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +import time +import random + + +class Particle: + def __init__(self, x, y, z): + self.Pbest_x = 0 + self.Pbest_y = 0 + self.Pbest_z = 0 + self.x = x + self.y = y + self.z = z + self.x_k1 = 0 + self.y_k1 = 0 + self.z_k1 = 0 + self.V_x = 0 + self.V_y = 0 + self.V_z = 0 + self.V_xk1 = 0 + self.V_yk1 = 0 + self.V_zk1 = 0 + + def __repr__(self): + return ( + "{x: %s , y: %s, z: %s, Pbest_x: %s, Pbest_y: %s, Pbest_z: %s}\n" + % ( + self.x, + self.y, + self.z, + self.Pbest_x, + self.Pbest_y, + self.Pbest_z, + ) + ) + + def __str__(self): + return ( + "{x: %s , y: %s, z: %s, Pbest_x: %s, Pbest_y: %s, Pbest_z: %s}\n" + % ( + self.x, + self.y, + self.z, + self.Pbest_x, + self.Pbest_y, + self.Pbest_z, + ) + ) + + def update_X(self): + self.x = self.x + self.V_xk1 + self.y = self.y + self.V_yk1 + self.z = self.z + self.V_zk1 + + def update_V(self, w, c1, c2, Gbest_x, Gbest_y, Gbest_z): + rand1 = random.random() + rand2 = random.random() + self.V_xk1 = ( + w * self.V_x + + c1 * rand1 * (self.Pbest_x - self.x) + + c2 * rand2 * (Gbest_x - self.x) + ) + self.V_yk1 = ( + w * self.V_y + + c1 * rand1 * (self.Pbest_y - self.y) + + c2 * rand2 * (Gbest_y - self.y) + ) + self.V_zk1 = ( + w * self.V_z + + c1 * rand1 * (self.Pbest_z - self.z) + + c2 * rand2 * (Gbest_z - self.z) + ) + + def update_Pbest(self, x, y, z: float): + self.Pbest_x = x + self.Pbest_y = y + self.Pbest_z = z + + def doRound(self, w, c1, c2, Gbest_x, Gbest_y, Gbest_z, fitness): + fitness_x = fitness(self.x, self.y, self.z) + self.update_V(w, c1, c2, Gbest_x, Gbest_y, Gbest_z) + self.update_X() + if abs(fitness(self.Pbest_x, self.Pbest_y, self.Pbest_z)) > abs( + fitness_x + ): + self.update_Pbest(self.x, self.y, self.z) + + +class PSO: + def __init__(self, w, c1, c2, particle_count): + self.Gbest_x = 0 + self.Gbest_y = 0 + self.Gbest_z = 0 + self.particle_count = particle_count + self.Particles = self.factory() + self.w = w + self.c1 = c1 + self.c2 = c2 + + def factory(self): + result = list() + for _ in range(1, self.particle_count): + x = ( + random.random() * 10 + if random.random() > 0.5 + else -random.random() * 10 + ) + y = ( + random.random() * 10 + if random.random() > 0.5 + else -random.random() * 10 + ) + z = ( + random.random() * 10 + if random.random() > 0.5 + else -random.random() * 10 + ) + result.append(Particle(x, y, z)) + return result + + def fitness(self, x, y, z: float): + return ( + (x ** 5) - ((x ** 2) * y * z) + (z * x) + (y ** 2) - (z ** 3) - 10 + ) + + def doRround(self): + roundBest_x = float() + roundBest_y = float() + roundBest_z = float() + for particle in self.Particles: + if abs(self.fitness(roundBest_x, roundBest_y, roundBest_z)) > abs( + self.fitness(particle.x, particle.y, particle.z) + ): + roundBest_x = particle.x + roundBest_y = particle.y + roundBest_z = particle.z + self.Gbest_x = roundBest_x + self.Gbest_y = roundBest_y + self.Gbest_z = roundBest_z + for particle in self.Particles: + particle.doRound( + self.w, + self.c1, + self.c2, + self.Gbest_x, + self.Gbest_y, + self.Gbest_z, + self.fitness, + ) + + def printGlobalBest(self): + print( + "x: %s, y: %s, z: %s, fitness: %s" + % ( + self.Gbest_x, + self.Gbest_y, + self.Gbest_z, + self.fitness(self.Gbest_x, self.Gbest_y, self.Gbest_z), + ), + ) + + +def main(): + random.seed(time.time()) + round_count = 10 + pso = PSO(5, 1.5, 1.5, 50) + for _ in range(1, round_count): + pso.doRround() + pso.printGlobalBest() + + +if __name__ == "__main__": + main() diff --git a/SA.py b/SA.py new file mode 100755 index 0000000..24b9a7f --- /dev/null +++ b/SA.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +from random import seed, random +from time import time +import math + + +class X: + def __init__(self, x: float, y: float, z: float): + self.x_now = 0 + self.y_now = 0 + self.z_now = 0 + self.x_k = x + self.y_k = y + self.z_k = z + self.x_k1 = 0 + self.y_k1 = 0 + self.z_k1 = 0 + self.k = 0 + + def neighbour(self): + self.X_k_to_now() + self.x_now = self.x_now + ( + random() / 100 if random() > 0.5 else -random() / 100 + ) + self.y_now = self.y_now + ( + random() / 100 if random() > 0.5 else -random() / 100 + ) + self.z_now = self.z_now + ( + random() / 100 if random() > 0.5 else -random() / 100 + ) + self.k = self.k + 1 + + def function(self, x: float, y: float, z: float): + return ( + (x ** 5) - ((x ** 2) * y * z) + (z * x) + (y ** 2) - (z ** 3) - 10 + ) + + def X_now(self): + return self.function(self.x_now, self.y_now, self.z_now) + + def X_k(self): + return self.function(self.x_k, self.y_k, self.z_k) + + def X_k_to_now(self): + self.x_now = self.x_k + self.y_now = self.y_k + self.z_now = self.z_k + + def X_now_to_k1(self): + self.x_k1 = self.x_now + self.y_k1 = self.y_now + self.z_k1 = self.z_now + + def X_k_to_k1(self): + self.x_k1 = self.x_k + self.y_k1 = self.y_k + self.z_k1 = self.z_k + + def X_k1_to_k(self): + self.x_k = self.x_k1 + self.y_k = self.y_k1 + self.z_k = self.z_k1 + + +def SA(): + K_max = 600 + T_zero = 30 + alpha = 0.90 + alpha_step = 20 + x = X(1, 0, -1) + T = T_zero + seed(time()) + for k in range(1, K_max): + x.neighbour() + if x.X_now() <= x.X_k(): + x.X_now_to_k1() + else: + p = math.e ** ((-(x.X_now() - x.X_k())) / T) + seed(time()) + r = random() + if p >= r: + x.X_now_to_k1() + else: + x.X_k_to_k1() + if k % alpha_step == 0: + T = T * alpha + x.X_k1_to_k() + print(x.x_k, x.y_k, x.z_k) + print("k=", x.X_k()) + + +def main(): + SA() + + +if __name__ == "__main__": + main() diff --git a/cnn.py b/cnn.py index 23e450f..bd0cb78 100755 --- a/cnn.py +++ b/cnn.py @@ -34,18 +34,25 @@ import keras from keras import optimizers import matplotlib.pyplot as plt + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + class PastSampler(object): def __init__(self, N, K, sliding_window=True): self.N = N @@ -55,23 +62,30 @@ class PastSampler(object): def transform(self, A): M = self.N + self.K if self.sliding_window: - I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1,1) + I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1) else: - if A.shape[0]%M == 0: - I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1,1) + if A.shape[0] % M == 0: + I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1, 1) else: - I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape(-1,1) + I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape( + -1, 1 + ) - B = A[I].reshape(-1, M*A.shape[1], A.shape[2]) - ci = self.N*A.shape[1] + B = A[I].reshape(-1, M * A.shape[1], A.shape[2]) + ci = self.N * A.shape[1] return B[:, :ci], B[:, ci:] + def getData(symbol_str): data_file = Path("./cnn/" + symbol_str + ".csv") - original_columns =["close", "date", "high", "low", "open"] + original_columns = ["close", "date", "high", "low", "open"] new_columns = ["Close", "Timestamp", "High", "Low", "Open"] columns = ["Close"] - url = "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" + symbol_str + "&start=1356998100&end=9999999999&period=300" + url = ( + "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" + + symbol_str + + "&start=1356998100&end=9999999999&period=300" + ) r = requests.get(url) d = json.loads(r.content.decode("utf-8")) df = pd.DataFrame(d) @@ -85,15 +99,16 @@ def getData(symbol_str): original_df = pd.read_csv(data_file).loc[:, columns] return df, original_df, time_stamps + def Scaler(df, original_df, time_stamps, symbol_str): - file_name="./cnn/" + symbol_str + "_close.h5" + file_name = "./cnn/" + symbol_str + "_close.h5" scaler = MinMaxScaler() - columns= ["Close"] + columns = ["Close"] for c in columns: - df[c] = scaler.fit_transform(df[c].values.reshape(-1,1)) - A = np.array(df)[:,None,:] - original_A = np.array(original_df)[:,None,:] - time_stamps = np.array(time_stamps)[:,None,None] + df[c] = scaler.fit_transform(df[c].values.reshape(-1, 1)) + A = np.array(df)[:, None, :] + original_A = np.array(original_df)[:, None, :] + time_stamps = np.array(time_stamps)[:, None, None] NPS, NFS = 256, 16 ps = PastSampler(NPS, NFS, sliding_window=False) B, Y = ps.transform(A) @@ -109,15 +124,16 @@ def Scaler(df, original_df, time_stamps, symbol_str): f.create_dataset("original_inputs", data=original_B) f.create_dataset("original_outputs", data=original_Y) + def cnn_type_1(symbol_str): df, original_df, time_stamps = getData(symbol_str) Scaler(df, original_df, time_stamps, symbol_str) # run on gpu - ''' + """ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" - ''' + """ config = tf.ConfigProto() config.gpu_options.allow_growth = True @@ -134,21 +150,29 @@ def cnn_type_1(symbol_str): epochs = 100 - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:] - training_labels = labels[:training_size,:] - validation_datas = datas[training_size:,:] - validation_labels = labels[training_size:,:] + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :] + training_labels = labels[:training_size, :] + validation_datas = datas[training_size:, :] + validation_labels = labels[training_size:, :] model = Sequential() # 2 Layers - model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) + model.add( + Conv1D( + activation="relu", + input_shape=(step_size, nb_features), + strides=3, + filters=8, + kernel_size=20, + ) + ) model.add(Dropout(0.5)) - model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) + model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16)) - ''' + """ # 3 Layers model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=8)) #model.add(LeakyReLU()) @@ -168,10 +192,27 @@ def cnn_type_1(symbol_str): #model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Conv1D( strides=2, filters=nb_features, kernel_size=2)) - ''' + """ + + model.compile(loss="mse", optimizer="adam") + model.fit( + training_datas, + training_labels, + verbose=1, + batch_size=batch_size, + validation_data=(validation_datas, validation_labels), + epochs=epochs, + callbacks=[ + CSVLogger(output_file_name + ".csv", append=True), + ModelCheckpoint( + output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", + monitor="val_loss", + verbose=1, + mode="min", + ), + ], + ) - model.compile(loss='mse', optimizer='adam') - model.fit(training_datas, training_labels,verbose=1, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')]) def lstm_type_cnn_1(symbol_str, kind): df, original_df, time_stamps = getData(symbol_str) @@ -187,35 +228,65 @@ def lstm_type_cnn_1(symbol_str, kind): set_session(tf.Session(config=config)) with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: - datas = hf['inputs'].value - labels = hf['outputs'].value + datas = hf["inputs"].value + labels = hf["outputs"].value step_size = datas.shape[1] - units= 50 + units = 50 second_units = 30 batch_size = 8 nb_features = datas.shape[2] epochs = 100 - output_size=16 + output_size = 16 output_file_name = "cnn/" + symbol_str + "_CNN_LSTM_2_relu" - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:] - training_labels = labels[:training_size,:,0] - validation_datas = datas[training_size:,:] - validation_labels = labels[training_size:,:,0] - - #build model + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :] + training_labels = labels[:training_size, :, 0] + validation_datas = datas[training_size:, :] + validation_labels = labels[training_size:, :, 0] + + # build model model = Sequential() if kind == "GRU": - model.add(GRU(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) + model.add( + GRU( + units=units, + activation="tanh", + input_shape=(step_size, nb_features), + return_sequences=False, + ) + ) elif kind == "LSTM": - model.add(LSTM(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) + model.add( + LSTM( + units=units, + activation="tanh", + input_shape=(step_size, nb_features), + return_sequences=False, + ) + ) model.add(Dropout(0.8)) model.add(Dense(output_size)) model.add(LeakyReLU()) - model.compile(loss='mse', optimizer='adam') - model.fit(training_datas, training_labels, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')]) + model.compile(loss="mse", optimizer="adam") + model.fit( + training_datas, + training_labels, + batch_size=batch_size, + validation_data=(validation_datas, validation_labels), + epochs=epochs, + callbacks=[ + CSVLogger(output_file_name + ".csv", append=True), + ModelCheckpoint( + output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", + monitor="val_loss", + verbose=1, + mode="min", + ), + ], + ) + def load_cnn_type_1(symbol_str, vis_year, vis_month): df, original_df, time_stamps = getData(symbol_str) @@ -227,83 +298,107 @@ def load_cnn_type_1(symbol_str, vis_year, vis_month): """ with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: - datas = hf['inputs'].value - labels = hf['outputs'].value - input_times = hf['input_times'].value - output_times = hf['output_times'].value - original_inputs = hf['original_inputs'].value - original_outputs = hf['original_outputs'].value - original_datas = hf['original_datas'].value - - scaler=MinMaxScaler() - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:,:] - training_labels = labels[:training_size,:,:] - validation_datas = datas[training_size:,:,:] - validation_labels = labels[training_size:,:,:] - validation_original_outputs = original_outputs[training_size:,:,:] - validation_original_inputs = original_inputs[training_size:,:,:] - validation_input_times = input_times[training_size:,:,:] - validation_output_times = output_times[training_size:,:,:] - - ground_true = np.append(validation_original_inputs,validation_original_outputs, axis=1) - ground_true_times = np.append(validation_input_times,validation_output_times, axis=1) + datas = hf["inputs"].value + labels = hf["outputs"].value + input_times = hf["input_times"].value + output_times = hf["output_times"].value + original_inputs = hf["original_inputs"].value + original_outputs = hf["original_outputs"].value + original_datas = hf["original_datas"].value + + scaler = MinMaxScaler() + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :, :] + training_labels = labels[:training_size, :, :] + validation_datas = datas[training_size:, :, :] + validation_labels = labels[training_size:, :, :] + validation_original_outputs = original_outputs[training_size:, :, :] + validation_original_inputs = original_inputs[training_size:, :, :] + validation_input_times = input_times[training_size:, :, :] + validation_output_times = output_times[training_size:, :, :] + + ground_true = np.append( + validation_original_inputs, validation_original_outputs, axis=1 + ) + ground_true_times = np.append( + validation_input_times, validation_output_times, axis=1 + ) step_size = datas.shape[1] - batch_size= 8 + batch_size = 8 nb_features = datas.shape[2] model = Sequential() # 2 layers - model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) + model.add( + Conv1D( + activation="relu", + input_shape=(step_size, nb_features), + strides=3, + filters=8, + kernel_size=20, + ) + ) # model.add(LeakyReLU()) model.add(Dropout(0.25)) - model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) + model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16)) model.load_weights("cnn/" + symbol_str + "_CNN_2_relu-76-0.00036.hdf5") - model.compile(loss='mse', optimizer='adam') + model.compile(loss="mse", optimizer="adam") predicted = model.predict(validation_datas) predicted_inverted = [] for i in range(original_datas.shape[1]): scaler.fit(original_datas[:, i].reshape(-1, 1)) - predicted_inverted.append(scaler.inverse_transform(predicted[:,:,i])) + predicted_inverted.append(scaler.inverse_transform(predicted[:, :, i])) print(np.array(predicted_inverted).shape) - #get only the close data - ground_true = ground_true[:,:,0].reshape(-1) + # get only the close data + ground_true = ground_true[:, :, 0].reshape(-1) ground_true_times = ground_true_times.reshape(-1) - ground_true_times = pd.to_datetime(ground_true_times, unit='s') + ground_true_times = pd.to_datetime(ground_true_times, unit="s") # since we are appending in the first dimension - predicted_inverted = np.array(predicted_inverted)[0,:,:].reshape(-1) + predicted_inverted = np.array(predicted_inverted)[0, :, :].reshape(-1) print(np.array(predicted_inverted).shape) - validation_output_times = pd.to_datetime(validation_output_times.reshape(-1), unit='s') + validation_output_times = pd.to_datetime( + validation_output_times.reshape(-1), unit="s" + ) ground_true_df = pd.DataFrame() - ground_true_df['times'] = ground_true_times - ground_true_df['value'] = ground_true + ground_true_df["times"] = ground_true_times + ground_true_df["value"] = ground_true prediction_df = pd.DataFrame() - prediction_df['times'] = validation_output_times - prediction_df['value'] = predicted_inverted - - prediction_df = prediction_df.loc[(prediction_df["times"].dt.year == vis_year )&(prediction_df["times"].dt.month > vis_month ),: ] - ground_true_df = ground_true_df.loc[(ground_true_df["times"].dt.year == vis_year )&(ground_true_df["times"].dt.month > vis_month ),:] - - plt.figure(figsize=(20,10)) - plt.plot(ground_true_df.times,ground_true_df.value, label = 'Actual') - plt.plot(prediction_df.times,prediction_df.value,'ro', label='Predicted') - plt.legend(loc='upper left') + prediction_df["times"] = validation_output_times + prediction_df["value"] = predicted_inverted + + prediction_df = prediction_df.loc[ + (prediction_df["times"].dt.year == vis_year) + & (prediction_df["times"].dt.month > vis_month), + :, + ] + ground_true_df = ground_true_df.loc[ + (ground_true_df["times"].dt.year == vis_year) + & (ground_true_df["times"].dt.month > vis_month), + :, + ] + + plt.figure(figsize=(20, 10)) + plt.plot(ground_true_df.times, ground_true_df.value, label="Actual") + plt.plot(prediction_df.times, prediction_df.value, "ro", label="Predicted") + plt.legend(loc="upper left") plt.show() + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here - #cnn_type_1("ETH") - #lstm_type_cnn_1("ETH", "GRU") + # here + # cnn_type_1("ETH") + # lstm_type_cnn_1("ETH", "GRU") load_cnn_type_1("ETH", 2018, 4) + def main(): argparser = Argparser() if argparser.args.dbg: @@ -311,7 +406,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -319,5 +415,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/digester.py b/digester.py index 0c17b1c..9f7bff9 100755 --- a/digester.py +++ b/digester.py @@ -17,37 +17,67 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.svm import SVC + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here - dataframe = pd.read_csv('/tmp/features.csv') + # here + dataframe = pd.read_csv("/tmp/features.csv") dataframe.head() y = dataframe.target - X = dataframe.drop(['target'], axis=1) + X = dataframe.drop(["target"], axis=1) - corpus = X['attr'] + corpus = X["attr"] vc = CountVectorizer() vc.fit(corpus) - numeric_features = pd.concat([X.drop(['attr'], axis=1), pd.DataFrame(vc.transform(corpus).toarray(), columns=vc.vocabulary_)], axis=1) + numeric_features = pd.concat( + [ + X.drop(["attr"], axis=1), + pd.DataFrame( + vc.transform(corpus).toarray(), columns=vc.vocabulary_ + ), + ], + axis=1, + ) numeric_features.head() - plt.scatter(dataframe.index, dataframe.target, color='red', label='target') - plt.scatter(numeric_features.index, numeric_features.depth, color='green', label='depth') - plt.scatter(numeric_features.index, numeric_features.text_ratio, color='blue', label='text_ratio') - plt.scatter(numeric_features.index, numeric_features.alink_text_ratio, color='skyblue', label='alink_text_ratio') + plt.scatter(dataframe.index, dataframe.target, color="red", label="target") + plt.scatter( + numeric_features.index, + numeric_features.depth, + color="green", + label="depth", + ) + plt.scatter( + numeric_features.index, + numeric_features.text_ratio, + color="blue", + label="text_ratio", + ) + plt.scatter( + numeric_features.index, + numeric_features.alink_text_ratio, + color="skyblue", + label="alink_text_ratio", + ) plt.legend(loc=(1, 0)) plt.show() scaler = preprocessing.StandardScaler() @@ -56,23 +86,43 @@ def premain(argparser): # clf = MultinomialNB() # clf = RandomForestClassifier() - clf = SVC(C=1, kernel='poly', probability=True) + clf = SVC(C=1, kernel="poly", probability=True) clf.fit(scaled_X, y) predicted_index = clf.predict(scaled_X).tolist().index(True) scaled_X = scaler.transform(numeric_features) pred_y = clf.predict(scaled_X) - print pd.DataFrame(clf.predict_log_proba(scaled_X),columns=clf.classes_) - print 'Number of mispredicted out of %d is %d (%.2f%%)' % (y.shape[0], (y!=pred_y).sum(), (y!=pred_y).sum()*100.0/y.shape[0]) - print - print 'Predicted rows:' - print dataframe[pred_y].drop(['text_ratio', 'alink_text_ratio', 'contain_title'], axis=1).merge(pd.DataFrame(clf.predict_log_proba(scaled_X)[pred_y],columns=clf.classes_, index=dataframe[pred_y].index), left_index=True, right_index=True) - print + print(pd.DataFrame(clf.predict_log_proba(scaled_X), columns=clf.classes_)) + print( + "Number of mispredicted out of %d is %d (%.2f%%)" + % ( + y.shape[0], + (y != pred_y).sum(), + (y != pred_y).sum() * 100.0 / y.shape[0], + ) + ) + print() + print("Predicted rows:") + print( + dataframe[pred_y] + .drop(["text_ratio", "alink_text_ratio", "contain_title"], axis=1) + .merge( + pd.DataFrame( + clf.predict_log_proba(scaled_X)[pred_y], + columns=clf.classes_, + index=dataframe[pred_y].index, + ), + left_index=True, + right_index=True, + ) + ) + print() # print 'Acutual rows:' # print dataframe[dataframe.target] + def main(): argparser = Argparser() if argparser.args.dbg: @@ -80,7 +130,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -88,5 +139,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/lstm.py b/lstm.py index 935a8c2..7d65552 100755 --- a/lstm.py +++ b/lstm.py @@ -1,9 +1,9 @@ #!python # _*_ coding=utf-8 _*_ -#original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb +# original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb -#@#!pip install lxml -#@#!mkdir lstm-models +# @#!pip install lxml +# @#!mkdir lstm-models import argparse import code import readline @@ -26,70 +26,124 @@ from keras import layers window_len = 10 split_date = "2018-03-01" + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + def getData_CMC(crypto, crypto_short): - market_info = pd.read_html("https://coinmarketcap.com/currencies/"+crypto+"/historical-data/?start=20160428&end="+time.strftime("%Y%m%d"))[0] + market_info = pd.read_html( + "https://coinmarketcap.com/currencies/" + + crypto + + "/historical-data/?start=20160428&end=" + + time.strftime("%Y%m%d") + )[0] print(type(market_info)) - market_info = market_info.assign(Date=pd.to_datetime(market_info['Date'])) - #print(market_info) - #if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0 - #if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0 + market_info = market_info.assign(Date=pd.to_datetime(market_info["Date"])) + # print(market_info) + # if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0 + # if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0 market_info["Volume"] = market_info["Volume"].astype("int64") market_info.columns = market_info.columns.str.replace("*", "") - #print(type(market_info)) - #print(crypto + " head: ") - #print(market_info.head()) - kwargs = {'close_off_high': lambda x: 2*(x['High']- x['Close'])/(x['High']-x['Low'])-1, 'volatility': lambda x: (x['High']- x['Low'])/(x['Open'])} + # print(type(market_info)) + # print(crypto + " head: ") + # print(market_info.head()) + kwargs = { + "close_off_high": lambda x: 2 + * (x["High"] - x["Close"]) + / (x["High"] - x["Low"]) + - 1, + "volatility": lambda x: (x["High"] - x["Low"]) / (x["Open"]), + } market_info = market_info.assign(**kwargs) - model_data = market_info[['Date']+[coin+metric for coin in [""] for metric in ['Close','Volume','close_off_high','volatility']]] - model_data = model_data.sort_values(by='Date') - #print(model_data.head()) + model_data = market_info[ + ["Date"] + + [ + coin + metric + for coin in [""] + for metric in ["Close", "Volume", "close_off_high", "volatility"] + ] + ] + model_data = model_data.sort_values(by="Date") + # print(model_data.head()) print(type(model_data)) return model_data + def getData_Stock(name, period): - info = pd.read_csv("./data/"+name+"/"+period+".csv", encoding="utf-16") + info = pd.read_csv( + "./data/" + name + "/" + period + ".csv", encoding="utf-16" + ) return info + def get_sets(crypto, model_data): - training_set, test_set = model_data[model_data['Date']=split_date] - training_set = training_set.drop('Date', 1) - test_set = test_set.drop('Date', 1) - norm_cols = [coin+metric for coin in [] for metric in ['Close', 'Volume']] + training_set, test_set = ( + model_data[model_data["Date"] < split_date], + model_data[model_data["Date"] >= split_date], + ) + training_set = training_set.drop("Date", 1) + test_set = test_set.drop("Date", 1) + norm_cols = [ + coin + metric for coin in [] for metric in ["Close", "Volume"] + ] LSTM_training_inputs = [] for i in range(len(training_set) - window_len): - temp_set = training_set[i:(i+window_len)].copy() + temp_set = training_set[i : (i + window_len)].copy() for col in norm_cols: - temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1 + temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1 LSTM_training_inputs.append(temp_set) - LSTM_training_outputs = (training_set["Close"][window_len:].values/training_set["Close"][:-window_len].values) - 1 + LSTM_training_outputs = ( + training_set["Close"][window_len:].values + / training_set["Close"][:-window_len].values + ) - 1 LSTM_test_inputs = [] - for i in range(len(test_set)-window_len): - temp_set = test_set[i:(i+window_len)].copy() + for i in range(len(test_set) - window_len): + temp_set = test_set[i : (i + window_len)].copy() for col in norm_cols: - temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1 + temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1 LSTM_test_inputs.append(temp_set) - LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1 + LSTM_test_outputs = ( + test_set["Close"][window_len:].values + / test_set["Close"][:-window_len].values + ) - 1 print(LSTM_training_inputs[0]) - LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs] + LSTM_training_inputs = [ + np.array(LSTM_training_input) + for LSTM_training_input in LSTM_training_inputs + ] LSTM_training_inputs = np.array(LSTM_training_inputs) - LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs] + LSTM_test_inputs = [ + np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs + ] LSTM_test_inputs = np.array(LSTM_test_inputs) return LSTM_training_inputs, LSTM_test_inputs, training_set, test_set -def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25, loss="mae", optimizer="adam"): + +def build_model( + inputs, + output_size, + neurons, + activ_func="linear", + dropout=0.25, + loss="mae", + optimizer="adam", +): model = Sequential() model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2]))) model.add(Dropout(dropout)) @@ -98,19 +152,27 @@ def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25, model.compile(loss=loss, optimizer=optimizer) return model + def stock(): split_date = "2017.01.01" model_data = getData_Stock("irxo", "Daily") - model_data = model_data.sort_values(by='Date') + model_data = model_data.sort_values(by="Date") - training_set, test_set = model_data[model_data['Date']=split_date] - training_set = training_set.drop('Date', 1) - test_set = test_set.drop('Date', 1) + training_set, test_set = ( + model_data[model_data["Date"] < split_date], + model_data[model_data["Date"] >= split_date], + ) + training_set = training_set.drop("Date", 1) + test_set = test_set.drop("Date", 1) training_inputs = training_set - training_outputs = training_set.drop(['Open', 'High', 'Low', 'NTx', 'Volume'], axis=1) + training_outputs = training_set.drop( + ["Open", "High", "Low", "NTx", "Volume"], axis=1 + ) test_inputs = test_set - test_outputs = test_set.drop(["Open", "High", "Low", "NTx", "Volume"], axis=1) + test_outputs = test_set.drop( + ["Open", "High", "Low", "NTx", "Volume"], axis=1 + ) print(training_set.head) print(test_set.head) @@ -120,77 +182,184 @@ def stock(): print(test_outputs.shape) model = models.Sequential() - model.add(layers.Dense(64, activation="relu", input_shape=(training_inputs.shape[1],))) + model.add( + layers.Dense( + 64, activation="relu", input_shape=(training_inputs.shape[1],) + ) + ) model.add(layers.Dense(64, activation="relu")) model.add(layers.Dense(1)) model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) - history = model.fit(training_inputs, training_outputs, validation_data=(test_inputs, test_outputs), epochs=10, batch_size=1, verbose=2) + history = model.fit( + training_inputs, + training_outputs, + validation_data=(test_inputs, test_outputs), + epochs=10, + batch_size=1, + verbose=2, + ) + def lstm_type_1(crypto, crypto_short): model_data = getData_CMC(crypto, crypto_short) np.random.seed(202) - training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) + training_inputs, test_inputs, training_set, test_set = get_sets( + crypto, model_data + ) model = build_model(training_inputs, output_size=1, neurons=20, loss="mse") - training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 - history = model.fit(training_inputs, training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True) + training_outputs = ( + training_set["Close"][window_len:].values + / training_set["Close"][:-window_len].values + ) - 1 + history = model.fit( + training_inputs, + training_outputs, + epochs=50, + batch_size=1, + verbose=2, + shuffle=True, + ) + def lstm_type_4(crypto, crypto_short, crypto2, crypto_short2): model_data = getData_CMC(crypto, crypto_short) model_data2 = getData_CMC(crypto2, crypto_short2) np.random.seed(202) - training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) - training_inputs2, test_inputs2, training_set2, test_set2 = get_sets(crypto2, model_data2) + training_inputs, test_inputs, training_set, test_set = get_sets( + crypto, model_data + ) + training_inputs2, test_inputs2, training_set2, test_set2 = get_sets( + crypto2, model_data2 + ) return - model = build_model(training_inputs/training_inputs2, output_size=1, neurons=20, loss="mse") - training_outputs = ((training_set['Close'][window_len:].values)/(training_set['Close'][:-window_len].values))-1 - history = model.fit(training_inputs/training_inputs2, training_outputs, epochs=10, batch_size=1, verbose=2, shuffle=True) + model = build_model( + training_inputs / training_inputs2, + output_size=1, + neurons=20, + loss="mse", + ) + training_outputs = ( + (training_set["Close"][window_len:].values) + / (training_set["Close"][:-window_len].values) + ) - 1 + history = model.fit( + training_inputs / training_inputs2, + training_outputs, + epochs=10, + batch_size=1, + verbose=2, + shuffle=True, + ) + def lstm_type_2(crypto, crypto_short, pred_range, neuron_count): model_data = getData_CMC(crypto, crypto_short) np.random.seed(202) - training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) - model = build_model(training_inputs, output_size=pred_range, neurons=neuron_count, loss="mse") - training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 + training_inputs, test_inputs, training_set, test_set = get_sets( + crypto, model_data + ) + model = build_model( + training_inputs, + output_size=pred_range, + neurons=neuron_count, + loss="mse", + ) + training_outputs = ( + training_set["Close"][window_len:].values + / training_set["Close"][:-window_len].values + ) - 1 training_outputs = [] - for i in range(window_len, len(training_set['Close'])-pred_range): - training_outputs.append((training_set['Close'][i:i+pred_range].values/training_set['Close'].values[i-window_len])-1) + for i in range(window_len, len(training_set["Close"]) - pred_range): + training_outputs.append( + ( + training_set["Close"][i : i + pred_range].values + / training_set["Close"].values[i - window_len] + ) + - 1 + ) training_outputs = np.array(training_outputs) - history = model.fit(training_inputs[:-pred_range], training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True) + history = model.fit( + training_inputs[:-pred_range], + training_outputs, + epochs=50, + batch_size=1, + verbose=2, + shuffle=True, + ) + def lstm_type_3(crypto, crypto_short, pred_range, neuron_count): model_data = getData_CMC(crypto, crypto_short) np.random.seed(202) - training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) + training_inputs, test_inputs, training_set, test_set = get_sets( + crypto, model_data + ) model = build_model(training_inputs, output_size=1, neurons=neuron_count) - training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 + training_outputs = ( + training_set["Close"][window_len:].values + / training_set["Close"][:-window_len].values + ) - 1 training_outputs = [] for rand_seed in range(775, 800): print(rand_seed) np.random.seed(rand_seed) - temp_model = build_model(training_inputs, output_size=1, neurons=neuron_count) - temp_model.fit(training_inputs, (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1, epochs=50, batch_size=1, verbose=0, shuffle=True) - temp_model.save("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed) + temp_model = build_model( + training_inputs, output_size=1, neurons=neuron_count + ) + temp_model.fit( + training_inputs, + ( + training_set["Close"][window_len:].values + / training_set["Close"][:-window_len].values + ) + - 1, + epochs=50, + batch_size=1, + verbose=0, + shuffle=True, + ) + temp_model.save( + "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed + ) + def load_models(crypto, crypto_short): preds = [] model_data = getData_CMC(crypto, crypto_short) np.random.seed(202) - training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) - for rand_seed in range(775,800): - temp_model = load_model("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed) - preds.append(np.mean(abs(np.transpose(temp_model.predict(test_inputs))-(test_set['Close'].values[window_len:]/test_set['Close'].values[:-window_len]-1)))) + training_inputs, test_inputs, training_set, test_set = get_sets( + crypto, model_data + ) + for rand_seed in range(775, 800): + temp_model = load_model( + "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed + ) + preds.append( + np.mean( + abs( + np.transpose(temp_model.predict(test_inputs)) + - ( + test_set["Close"].values[window_len:] + / test_set["Close"].values[:-window_len] + - 1 + ) + ) + ) + ) + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here - #lstm_type_1("ethereum", "ether") - #lstm_type_2("ethereum", "ether", 5, 20) - #lstm_type_3("ethereum", "ether", 5, 20) - #lstm_type_4("ethereum", "ether", "dogecoin", "doge") - #load_models("ethereum", "eth") + # here + # lstm_type_1("ethereum", "ether") + # lstm_type_2("ethereum", "ether", 5, 20) + # lstm_type_3("ethereum", "ether", 5, 20) + # lstm_type_4("ethereum", "ether", "dogecoin", "doge") + # load_models("ethereum", "eth") stock() + def main(): argparser = Argparser() if argparser.args.dbg: @@ -198,7 +367,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -206,5 +376,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/marionette.py b/marionette.py index 86c2175..9b8a6e7 100755 --- a/marionette.py +++ b/marionette.py @@ -20,37 +20,57 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + def marrionette_type_1(): url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" - names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"] + names = [ + "sepal-length", + "sepal-width", + "petal-length", + "petal-width", + "class", + ] dataset = pandas.read_csv(url, names=names) print(dataset.shape) print(dataset.head(20)) print(dataset.describe()) print(dataset.groupby("class").size()) - #dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) - #dataset.hist() + # dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) + # dataset.hist() pandas.plotting.scatter_matrix(dataset) plt.show() array = dataset.values - X = array[:,0:4] - Y = array[:,4] + X = array[:, 0:4] + Y = array[:, 4] validation_size = 0.20 seed = 7 - X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size, random_state=seed) - scoring="accuracy" + ( + X_train, + X_validation, + Y_train, + Y_validation, + ) = model_selection.train_test_split( + X, Y, test_size=validation_size, random_state=seed + ) + scoring = "accuracy" models = [] models.append(("LR", LogisticRegression())) models.append(("LDA", LinearDiscriminantAnalysis())) @@ -62,7 +82,9 @@ def marrionette_type_1(): names = [] for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) - cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) + cv_results = model_selection.cross_val_score( + model, X_train, Y_train, cv=kfold, scoring=scoring + ) results.append(cv_results) names.append(name) msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std()) @@ -82,12 +104,14 @@ def marrionette_type_1(): print(confusion_matrix(Y_validation, predictions)) print(classification_report(Y_validation, predictions)) + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here + # here marrionette_type_1() + def main(): argparser = Argparser() if argparser.args.dbg: @@ -101,5 +125,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/seer.py b/seer.py index bf5fa0b..da56011 100755 --- a/seer.py +++ b/seer.py @@ -19,61 +19,106 @@ import googleapiclient.http import oauth2client.client import io + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--which", type=str, help="which one to run") - parser.add_argument("--download", type=str, help="file name to download") + parser.add_argument( + "--download", type=str, help="file name to download" + ) parser.add_argument("--what", type=str, help="train or predict") - parser.add_argument("--pysrcupdate", type=str, nargs="+", help="name of source files to update on the drive") - parser.add_argument("--gpu", action="store_true", help="use gpu. if false will use cpu", default=False) - parser.add_argument("--test1", action="store_true", help="test switch 1", default=False) - parser.add_argument("--test2", action="store_true", help="test switch 2", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--pysrcupdate", + type=str, + nargs="+", + help="name of source files to update on the drive", + ) + parser.add_argument( + "--gpu", + action="store_true", + help="use gpu. if false will use cpu", + default=False, + ) + parser.add_argument( + "--test1", action="store_true", help="test switch 1", default=False + ) + parser.add_argument( + "--test2", action="store_true", help="test switch 2", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + def get_name_from_path(path): path_pos = path.rfind("/") if path_pos == -1: return path else: - return path[path_pos+1:] + return path[path_pos + 1 :] + def authenticate_drive(): OAUTH2_SCOPE = "https://www.googleapis.com/auth/drive" CLIENT_SECRETS = "./secret.json" - flow = oauth2client.client.flow_from_clientsecrets(CLIENT_SECRETS, OAUTH2_SCOPE) + flow = oauth2client.client.flow_from_clientsecrets( + CLIENT_SECRETS, OAUTH2_SCOPE + ) flow.redirect_uri = oauth2client.client.OOB_CALLBACK_URN authorize_url = flow.step1_get_authorize_url() - print('Go to the following link in your browser: ' + authorize_url) - code = input('Enter verification code: ').strip() + print("Go to the following link in your browser: " + authorize_url) + code = input("Enter verification code: ").strip() credentials = flow.step2_exchange(code) http = httplib2.Http() credentials.authorize(http) - drive_service = build('drive', 'v3', http=http) + drive_service = build("drive", "v3", http=http) return drive_service + def get_folder_id(folder_name, drive_service): - parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+folder_name+"'", fields="files(id, name)", spaces="drive").execute() + parent_dir = ( + drive_service.files() + .list( + q="mimeType='application/vnd.google-apps.folder' and name='" + + folder_name + + "'", + fields="files(id, name)", + spaces="drive", + ) + .execute() + ) folder_id = str() for file in parent_dir.get("files", []): print(file.get("name") + "---" + file.get("id")) folder_id = file.get("id") return folder_id + def get_file_id(file_name, folder_name, drive_service): folder_id = get_folder_id(folder_name, drive_service) - download_to_be = drive_service.files().list(q="name='"+file_name+"' and '"+folder_id+"' in parents", fields="files(id, name)", spaces="drive").execute() + download_to_be = ( + drive_service.files() + .list( + q="name='" + file_name + "' and '" + folder_id + "' in parents", + fields="files(id, name)", + spaces="drive", + ) + .execute() + ) file_id = str() for file in download_to_be.get("files", []): print(file.get("name") + "---" + file.get("id")) file_id = file.get("id") return file_id + def g_drive_up(file_path, file_name, file_type, to_folder): FILENAME = file_path MIMETYPE = file_type @@ -81,64 +126,113 @@ def g_drive_up(file_path, file_name, file_type, to_folder): DESCRIPTION = "a file" drive_service = authenticate_drive() - media_body = googleapiclient.http.MediaFileUpload(FILENAME, mimetype=MIMETYPE, resumable=True) - parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+to_folder+"'", fields="files(id, name)", spaces="drive").execute() + media_body = googleapiclient.http.MediaFileUpload( + FILENAME, mimetype=MIMETYPE, resumable=True + ) + parent_dir = ( + drive_service.files() + .list( + q="mimeType='application/vnd.google-apps.folder' and name='" + + to_folder + + "'", + fields="files(id, name)", + spaces="drive", + ) + .execute() + ) folder_id = str() for file in parent_dir.get("files", []): print(file.get("name") + "---" + file.get("id")) folder_id = file.get("id") - body = {'name': TITLE, 'description': DESCRIPTION, 'parents': [folder_id]} - new_file = drive_service.files().create(body=body, media_body=media_body, fields="id").execute() + body = {"name": TITLE, "description": DESCRIPTION, "parents": [folder_id]} + new_file = ( + drive_service.files() + .create(body=body, media_body=media_body, fields="id") + .execute() + ) print(new_file.get("id")) - #pprint.pprint(new_file) + # pprint.pprint(new_file) + def g_drive_down(folder_name, file_name): drive_service = authenticate_drive() - #get folder id - parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+folder_name+"'", fields="files(id, name)", spaces="drive").execute() + # get folder id + parent_dir = ( + drive_service.files() + .list( + q="mimeType='application/vnd.google-apps.folder' and name='" + + folder_name + + "'", + fields="files(id, name)", + spaces="drive", + ) + .execute() + ) folder_id = str() for file in parent_dir.get("files", []): print(file.get("name") + "---" + file.get("id")) folder_id = file.get("id") - #get file id - download_to_be = drive_service.files().list(q="name='"+file_name+"' and '"+folder_id+"' in parents", fields="files(id, name)", spaces="drive").execute() + # get file id + download_to_be = ( + drive_service.files() + .list( + q="name='" + file_name + "' and '" + folder_id + "' in parents", + fields="files(id, name)", + spaces="drive", + ) + .execute() + ) file_id = str() for file in download_to_be.get("files", []): print(file.get("name") + "---" + file.get("id")) file_id = file.get("id") request = drive_service.files().get_media(fileId=file_id) - #fh = io.BytesIO() + # fh = io.BytesIO() fh = io.FileIO(file_name, "w") downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) - #print(downloader) + # print(downloader) + def g_drive_update(folder_name, file_name): drive_service = authenticate_drive() file_id = get_file_id(file_name, folder_name, drive_service) u_file = drive_service.files().get(fileId=file_id).execute() media_body = MediaFileUpload(resumable=True) - updated_file = drive_service.files().update(fileId=file_id, body=u_file, media_body=media_body).execute() + updated_file = ( + drive_service.files() + .update(fileId=file_id, body=u_file, media_body=media_body) + .execute() + ) + def launch_ais(which): - if which == "marionette": marrionette_type_1() - elif which == "lstm_type_1": lstm_type_1("ethereum", "ether") - elif which == "lstm_type_2": lstm_type_2("ethereum", "ether", 5, 20) - elif which == "lstm_type_3": lstm_type_3("ethereum", "ether", 5, 20) - elif which == "cnn_type_1": cnn_type_1() - elif which == "tfann_type_1": tfann_type_1() - else: pass + if which == "marionette": + marrionette_type_1() + elif which == "lstm_type_1": + lstm_type_1("ethereum", "ether") + elif which == "lstm_type_2": + lstm_type_2("ethereum", "ether", 5, 20) + elif which == "lstm_type_3": + lstm_type_3("ethereum", "ether", 5, 20) + elif which == "cnn_type_1": + cnn_type_1() + elif which == "tfann_type_1": + tfann_type_1() + else: + pass + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here + # here if argparser.args.pysrcupdate: for src in argparser.args.pysrcupdate: g_drive_up(src, get_name_from_path(src), "text/python", "colab") @@ -146,6 +240,7 @@ def premain(argparser): g_drive_down("colab", "main.py") launch_ais(argparser.args.which) + def main(): argparser = Argparser() if argparser.args.dbg: @@ -153,7 +248,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -161,5 +257,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/stock.py b/stock.py index 6ad60ed..1d40c51 100755 --- a/stock.py +++ b/stock.py @@ -18,30 +18,41 @@ from keras.layers import LSTM from keras.layers import Dropout from keras.models import load_model + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + def build_model(train_data): model = models.Sequential() - model.add(layers.Dense(64, activation="relu", input_shape=(train_data.shape[1],))) + model.add( + layers.Dense(64, activation="relu", input_shape=(train_data.shape[1],)) + ) model.add(layers.Dense(64, activation="relu")) model.add(layers.Dense(1)) model.compile(optimizer="rmsprop", loss="mse", metrics=["acc"]) return model + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here + # here + def main(): argparser = Argparser() @@ -50,7 +61,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -58,5 +70,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() diff --git a/tfann.py b/tfann.py index c6f3d08..65cf6be 100755 --- a/tfann.py +++ b/tfann.py @@ -2,7 +2,7 @@ # _*_ coding=utf-8 _*_ # original source-https://nicholastsmith.wordpress.com/2017/11/13/cryptocurrency-price-prediction-using-deep-learning-in-tensorflow/ -#@#!pip install TFANN +# @#!pip install TFANN import code import readline import signal @@ -14,120 +14,143 @@ import pandas as pd import urllib.request import matplotlib.pyplot as mpl -def GetAPIUrl(cur, sts = 1420070400): - return 'https://poloniex.com/public?command=returnChartData¤cyPair=USDT_{:s}&start={:d}&end=9999999999&period=7200'.format(cur, sts) + +def GetAPIUrl(cur, sts=1420070400): + return "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_{:s}&start={:d}&end=9999999999&period=7200".format( + cur, sts + ) + def GetCurDF(cur, fp): openUrl = urllib.request.urlopen(GetAPIUrl(cur)) r = openUrl.read() openUrl.close() df = pd.read_json(r.decode()) - df['date'] = df['date'].astype(np.int64) // 1000000000 + df["date"] = df["date"].astype(np.int64) // 1000000000 print(df.head()) return df + class PastSampler: def __init__(self, N, K): self.K = K self.N = N - def transform(self, A, Y = None): - M = self.N + self.K #Number of samples per row (sample + target) - #Matrix of sample indices like: {{1, 2..., M}, {2, 3, ..., M + 1}} + def transform(self, A, Y=None): + M = self.N + self.K # Number of samples per row (sample + target) + # Matrix of sample indices like: {{1, 2..., M}, {2, 3, ..., M + 1}} I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1) B = A[I].reshape(-1, M * A.shape[1], *A.shape[2:]) - ci = self.N * A.shape[1] #Number of features per sample - return B[:, :ci], B[:, ci:] #Sample matrix, Target matrix + ci = self.N * A.shape[1] # Number of features per sample + return B[:, :ci], B[:, ci:] # Sample matrix, Target matrix + def tfann_type_1(): #%%Path to store cached currency data - datPath = 'CurDat/' + datPath = "CurDat/" if not os.path.exists(datPath): os.mkdir(datPath) - #Different cryptocurrency types - cl = ['BTC', 'LTC', 'ETH', 'XMR'] - #Columns of price data to use - CN = ['close', 'high', 'low', 'open', 'volume'] - #Store data frames for each of above types + # Different cryptocurrency types + cl = ["BTC", "LTC", "ETH", "XMR"] + # Columns of price data to use + CN = ["close", "high", "low", "open", "volume"] + # Store data frames for each of above types D = [] for ci in cl: - dfp = os.path.join(datPath, ci + '.csv') + dfp = os.path.join(datPath, ci + ".csv") try: - df = pd.read_csv(dfp, sep = ',') + df = pd.read_csv(dfp, sep=",") except FileNotFoundError: df = GetCurDF(ci, dfp) D.append(df) #%%Only keep range of data that is common to all currency types cr = min(Di.shape[0] for Di in D) for i in range(len(cl)): - D[i] = D[i][(D[i].shape[0] - cr):] + D[i] = D[i][(D[i].shape[0] - cr) :] #%%Features are channels C = np.hstack((Di[CN] for Di in D))[:, None, :] - HP = 16 #Holdout period + HP = 16 # Holdout period A = C[0:-HP] - SV = A.mean(axis = 0) #Scale vector - C /= SV #Basic scaling of data + SV = A.mean(axis=0) # Scale vector + C /= SV # Basic scaling of data #%%Make samples of temporal sequences of pricing data (channel) - NPS, NFS = 256, 16 #Number of past and future samples + NPS, NFS = 256, 16 # Number of past and future samples ps = PastSampler(NPS, NFS) B, Y = ps.transform(A) #%%Architecture of the neural network NC = B.shape[2] - #2 1-D conv layers with relu followed by 1-d conv output layer - ns = [('C1d', [8, NC, NC * 2], 4), ('AF', 'relu'), - ('C1d', [8, NC * 2, NC * 2], 2), ('AF', 'relu'), - ('C1d', [8, NC * 2, NC], 2)] - #Create the neural network in TensorFlow - cnnr = ANNR(B[0].shape, ns, batchSize = 32, learnRate = 2e-5, - maxIter = 64, reg = 1e-5, tol = 1e-2, verbose = True) + # 2 1-D conv layers with relu followed by 1-d conv output layer + ns = [ + ("C1d", [8, NC, NC * 2], 4), + ("AF", "relu"), + ("C1d", [8, NC * 2, NC * 2], 2), + ("AF", "relu"), + ("C1d", [8, NC * 2, NC], 2), + ] + # Create the neural network in TensorFlow + cnnr = ANNR( + B[0].shape, + ns, + batchSize=32, + learnRate=2e-5, + maxIter=64, + reg=1e-5, + tol=1e-2, + verbose=True, + ) cnnr.fit(B, Y) - PTS = [] #Predicted time sequences - P, YH = B[[-1]], Y[[-1]] #Most recent time sequence - for i in range(HP // NFS): #Repeat prediction - P = np.concatenate([P[:, NFS:], YH], axis = 1) + PTS = [] # Predicted time sequences + P, YH = B[[-1]], Y[[-1]] # Most recent time sequence + for i in range(HP // NFS): # Repeat prediction + P = np.concatenate([P[:, NFS:], YH], axis=1) YH = cnnr.predict(P) PTS.append(YH) PTS = np.hstack(PTS).transpose((1, 0, 2)) - A = np.vstack([A, PTS]) #Combine predictions with original data - A = np.squeeze(A) * SV #Remove unittime dimension and rescale + A = np.vstack([A, PTS]) # Combine predictions with original data + A = np.squeeze(A) * SV # Remove unittime dimension and rescale C = np.squeeze(C) * SV nt = 4 PF = cnnr.PredictFull(B[:nt]) for i in range(nt): - fig, ax = mpl.subplots(1, 4, figsize = (16 / 1.24, 10 / 1.25)) + fig, ax = mpl.subplots(1, 4, figsize=(16 / 1.24, 10 / 1.25)) ax[0].plot(PF[0][i]) - ax[0].set_title('Input') + ax[0].set_title("Input") ax[1].plot(PF[2][i]) - ax[1].set_title('Layer 1') + ax[1].set_title("Layer 1") ax[2].plot(PF[4][i]) - ax[2].set_title('Layer 2') + ax[2].set_title("Layer 2") ax[3].plot(PF[5][i]) - ax[3].set_title('Output') - fig.text(0.5, 0.06, 'Time', ha='center') - fig.text(0.06, 0.5, 'Activation', va='center', rotation='vertical') + ax[3].set_title("Output") + fig.text(0.5, 0.06, "Time", ha="center") + fig.text(0.06, 0.5, "Activation", va="center", rotation="vertical") mpl.show() CI = list(range(C.shape[0])) AI = list(range(C.shape[0] + PTS.shape[0] - HP)) - NDP = PTS.shape[0] #Number of days predicted + NDP = PTS.shape[0] # Number of days predicted for i, cli in enumerate(cl): - fig, ax = mpl.subplots(figsize = (16 / 1.5, 10 / 1.5)) - hind = i * len(CN) + CN.index('high') - ax.plot(CI[-4 * HP:], C[-4 * HP:, hind], label = 'Actual') - ax.plot(AI[-(NDP + 1):], A[-(NDP + 1):, hind], '--', label = 'Prediction') - ax.legend(loc = 'upper left') - ax.set_title(cli + ' (High)') - ax.set_ylabel('USD') - ax.set_xlabel('Time') + fig, ax = mpl.subplots(figsize=(16 / 1.5, 10 / 1.5)) + hind = i * len(CN) + CN.index("high") + ax.plot(CI[-4 * HP :], C[-4 * HP :, hind], label="Actual") + ax.plot( + AI[-(NDP + 1) :], A[-(NDP + 1) :, hind], "--", label="Prediction" + ) + ax.legend(loc="upper left") + ax.set_title(cli + " (High)") + ax.set_ylabel("USD") + ax.set_xlabel("Time") ax.axes.xaxis.set_ticklabels([]) mpl.show() + # write code here def premain(): - #here + # here tfann_type_1() + def main(): premain() + if __name__ == "__main__": main() -- cgit v1.2.3