diff options
author | terminaldweller <thabogre@gmail.com> | 2022-01-27 17:51:54 +0000 |
---|---|---|
committer | terminaldweller <thabogre@gmail.com> | 2022-01-27 17:51:54 +0000 |
commit | 02c8661250be26dc35b71c7fa9fb0f2eb9890b44 (patch) | |
tree | 708839587fb6e16b6e37465e15259461fb0b13fe /cnn.py | |
parent | update (diff) | |
download | seer-02c8661250be26dc35b71c7fa9fb0f2eb9890b44.tar.gz seer-02c8661250be26dc35b71c7fa9fb0f2eb9890b44.zip |
black and update
Diffstat (limited to 'cnn.py')
-rwxr-xr-x | cnn.py | 285 |
1 files changed, 191 insertions, 94 deletions
@@ -34,18 +34,25 @@ import keras from keras import optimizers import matplotlib.pyplot as plt + def SigHandler_SIGINT(signum, frame): print() sys.exit(0) + class Argparser(object): def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--string", type=str, help="string") - parser.add_argument("--bool", action="store_true", help="bool", default=False) - parser.add_argument("--dbg", action="store_true", help="debug", default=False) + parser.add_argument( + "--bool", action="store_true", help="bool", default=False + ) + parser.add_argument( + "--dbg", action="store_true", help="debug", default=False + ) self.args = parser.parse_args() + class PastSampler(object): def __init__(self, N, K, sliding_window=True): self.N = N @@ -55,23 +62,30 @@ class PastSampler(object): def transform(self, A): M = self.N + self.K if self.sliding_window: - I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1,1) + I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1) else: - if A.shape[0]%M == 0: - I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1,1) + if A.shape[0] % M == 0: + I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1, 1) else: - I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape(-1,1) + I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape( + -1, 1 + ) - B = A[I].reshape(-1, M*A.shape[1], A.shape[2]) - ci = self.N*A.shape[1] + B = A[I].reshape(-1, M * A.shape[1], A.shape[2]) + ci = self.N * A.shape[1] return B[:, :ci], B[:, ci:] + def getData(symbol_str): data_file = Path("./cnn/" + symbol_str + ".csv") - original_columns =["close", "date", "high", "low", "open"] + original_columns = ["close", "date", "high", "low", "open"] new_columns = ["Close", "Timestamp", "High", "Low", "Open"] columns = ["Close"] - url = "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" + symbol_str + "&start=1356998100&end=9999999999&period=300" + url = ( + "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" + + symbol_str + + "&start=1356998100&end=9999999999&period=300" + ) r = requests.get(url) d = json.loads(r.content.decode("utf-8")) df = pd.DataFrame(d) @@ -85,15 +99,16 @@ def getData(symbol_str): original_df = pd.read_csv(data_file).loc[:, columns] return df, original_df, time_stamps + def Scaler(df, original_df, time_stamps, symbol_str): - file_name="./cnn/" + symbol_str + "_close.h5" + file_name = "./cnn/" + symbol_str + "_close.h5" scaler = MinMaxScaler() - columns= ["Close"] + columns = ["Close"] for c in columns: - df[c] = scaler.fit_transform(df[c].values.reshape(-1,1)) - A = np.array(df)[:,None,:] - original_A = np.array(original_df)[:,None,:] - time_stamps = np.array(time_stamps)[:,None,None] + df[c] = scaler.fit_transform(df[c].values.reshape(-1, 1)) + A = np.array(df)[:, None, :] + original_A = np.array(original_df)[:, None, :] + time_stamps = np.array(time_stamps)[:, None, None] NPS, NFS = 256, 16 ps = PastSampler(NPS, NFS, sliding_window=False) B, Y = ps.transform(A) @@ -109,15 +124,16 @@ def Scaler(df, original_df, time_stamps, symbol_str): f.create_dataset("original_inputs", data=original_B) f.create_dataset("original_outputs", data=original_Y) + def cnn_type_1(symbol_str): df, original_df, time_stamps = getData(symbol_str) Scaler(df, original_df, time_stamps, symbol_str) # run on gpu - ''' + """ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" - ''' + """ config = tf.ConfigProto() config.gpu_options.allow_growth = True @@ -134,21 +150,29 @@ def cnn_type_1(symbol_str): epochs = 100 - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:] - training_labels = labels[:training_size,:] - validation_datas = datas[training_size:,:] - validation_labels = labels[training_size:,:] + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :] + training_labels = labels[:training_size, :] + validation_datas = datas[training_size:, :] + validation_labels = labels[training_size:, :] model = Sequential() # 2 Layers - model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) + model.add( + Conv1D( + activation="relu", + input_shape=(step_size, nb_features), + strides=3, + filters=8, + kernel_size=20, + ) + ) model.add(Dropout(0.5)) - model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) + model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16)) - ''' + """ # 3 Layers model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=8)) #model.add(LeakyReLU()) @@ -168,10 +192,27 @@ def cnn_type_1(symbol_str): #model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Conv1D( strides=2, filters=nb_features, kernel_size=2)) - ''' + """ + + model.compile(loss="mse", optimizer="adam") + model.fit( + training_datas, + training_labels, + verbose=1, + batch_size=batch_size, + validation_data=(validation_datas, validation_labels), + epochs=epochs, + callbacks=[ + CSVLogger(output_file_name + ".csv", append=True), + ModelCheckpoint( + output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", + monitor="val_loss", + verbose=1, + mode="min", + ), + ], + ) - model.compile(loss='mse', optimizer='adam') - model.fit(training_datas, training_labels,verbose=1, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')]) def lstm_type_cnn_1(symbol_str, kind): df, original_df, time_stamps = getData(symbol_str) @@ -187,35 +228,65 @@ def lstm_type_cnn_1(symbol_str, kind): set_session(tf.Session(config=config)) with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: - datas = hf['inputs'].value - labels = hf['outputs'].value + datas = hf["inputs"].value + labels = hf["outputs"].value step_size = datas.shape[1] - units= 50 + units = 50 second_units = 30 batch_size = 8 nb_features = datas.shape[2] epochs = 100 - output_size=16 + output_size = 16 output_file_name = "cnn/" + symbol_str + "_CNN_LSTM_2_relu" - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:] - training_labels = labels[:training_size,:,0] - validation_datas = datas[training_size:,:] - validation_labels = labels[training_size:,:,0] - - #build model + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :] + training_labels = labels[:training_size, :, 0] + validation_datas = datas[training_size:, :] + validation_labels = labels[training_size:, :, 0] + + # build model model = Sequential() if kind == "GRU": - model.add(GRU(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) + model.add( + GRU( + units=units, + activation="tanh", + input_shape=(step_size, nb_features), + return_sequences=False, + ) + ) elif kind == "LSTM": - model.add(LSTM(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) + model.add( + LSTM( + units=units, + activation="tanh", + input_shape=(step_size, nb_features), + return_sequences=False, + ) + ) model.add(Dropout(0.8)) model.add(Dense(output_size)) model.add(LeakyReLU()) - model.compile(loss='mse', optimizer='adam') - model.fit(training_datas, training_labels, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')]) + model.compile(loss="mse", optimizer="adam") + model.fit( + training_datas, + training_labels, + batch_size=batch_size, + validation_data=(validation_datas, validation_labels), + epochs=epochs, + callbacks=[ + CSVLogger(output_file_name + ".csv", append=True), + ModelCheckpoint( + output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", + monitor="val_loss", + verbose=1, + mode="min", + ), + ], + ) + def load_cnn_type_1(symbol_str, vis_year, vis_month): df, original_df, time_stamps = getData(symbol_str) @@ -227,83 +298,107 @@ def load_cnn_type_1(symbol_str, vis_year, vis_month): """ with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: - datas = hf['inputs'].value - labels = hf['outputs'].value - input_times = hf['input_times'].value - output_times = hf['output_times'].value - original_inputs = hf['original_inputs'].value - original_outputs = hf['original_outputs'].value - original_datas = hf['original_datas'].value - - scaler=MinMaxScaler() - #split training validation - training_size = int(0.8* datas.shape[0]) - training_datas = datas[:training_size,:,:] - training_labels = labels[:training_size,:,:] - validation_datas = datas[training_size:,:,:] - validation_labels = labels[training_size:,:,:] - validation_original_outputs = original_outputs[training_size:,:,:] - validation_original_inputs = original_inputs[training_size:,:,:] - validation_input_times = input_times[training_size:,:,:] - validation_output_times = output_times[training_size:,:,:] - - ground_true = np.append(validation_original_inputs,validation_original_outputs, axis=1) - ground_true_times = np.append(validation_input_times,validation_output_times, axis=1) + datas = hf["inputs"].value + labels = hf["outputs"].value + input_times = hf["input_times"].value + output_times = hf["output_times"].value + original_inputs = hf["original_inputs"].value + original_outputs = hf["original_outputs"].value + original_datas = hf["original_datas"].value + + scaler = MinMaxScaler() + # split training validation + training_size = int(0.8 * datas.shape[0]) + training_datas = datas[:training_size, :, :] + training_labels = labels[:training_size, :, :] + validation_datas = datas[training_size:, :, :] + validation_labels = labels[training_size:, :, :] + validation_original_outputs = original_outputs[training_size:, :, :] + validation_original_inputs = original_inputs[training_size:, :, :] + validation_input_times = input_times[training_size:, :, :] + validation_output_times = output_times[training_size:, :, :] + + ground_true = np.append( + validation_original_inputs, validation_original_outputs, axis=1 + ) + ground_true_times = np.append( + validation_input_times, validation_output_times, axis=1 + ) step_size = datas.shape[1] - batch_size= 8 + batch_size = 8 nb_features = datas.shape[2] model = Sequential() # 2 layers - model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) + model.add( + Conv1D( + activation="relu", + input_shape=(step_size, nb_features), + strides=3, + filters=8, + kernel_size=20, + ) + ) # model.add(LeakyReLU()) model.add(Dropout(0.25)) - model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) + model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16)) model.load_weights("cnn/" + symbol_str + "_CNN_2_relu-76-0.00036.hdf5") - model.compile(loss='mse', optimizer='adam') + model.compile(loss="mse", optimizer="adam") predicted = model.predict(validation_datas) predicted_inverted = [] for i in range(original_datas.shape[1]): scaler.fit(original_datas[:, i].reshape(-1, 1)) - predicted_inverted.append(scaler.inverse_transform(predicted[:,:,i])) + predicted_inverted.append(scaler.inverse_transform(predicted[:, :, i])) print(np.array(predicted_inverted).shape) - #get only the close data - ground_true = ground_true[:,:,0].reshape(-1) + # get only the close data + ground_true = ground_true[:, :, 0].reshape(-1) ground_true_times = ground_true_times.reshape(-1) - ground_true_times = pd.to_datetime(ground_true_times, unit='s') + ground_true_times = pd.to_datetime(ground_true_times, unit="s") # since we are appending in the first dimension - predicted_inverted = np.array(predicted_inverted)[0,:,:].reshape(-1) + predicted_inverted = np.array(predicted_inverted)[0, :, :].reshape(-1) print(np.array(predicted_inverted).shape) - validation_output_times = pd.to_datetime(validation_output_times.reshape(-1), unit='s') + validation_output_times = pd.to_datetime( + validation_output_times.reshape(-1), unit="s" + ) ground_true_df = pd.DataFrame() - ground_true_df['times'] = ground_true_times - ground_true_df['value'] = ground_true + ground_true_df["times"] = ground_true_times + ground_true_df["value"] = ground_true prediction_df = pd.DataFrame() - prediction_df['times'] = validation_output_times - prediction_df['value'] = predicted_inverted - - prediction_df = prediction_df.loc[(prediction_df["times"].dt.year == vis_year )&(prediction_df["times"].dt.month > vis_month ),: ] - ground_true_df = ground_true_df.loc[(ground_true_df["times"].dt.year == vis_year )&(ground_true_df["times"].dt.month > vis_month ),:] - - plt.figure(figsize=(20,10)) - plt.plot(ground_true_df.times,ground_true_df.value, label = 'Actual') - plt.plot(prediction_df.times,prediction_df.value,'ro', label='Predicted') - plt.legend(loc='upper left') + prediction_df["times"] = validation_output_times + prediction_df["value"] = predicted_inverted + + prediction_df = prediction_df.loc[ + (prediction_df["times"].dt.year == vis_year) + & (prediction_df["times"].dt.month > vis_month), + :, + ] + ground_true_df = ground_true_df.loc[ + (ground_true_df["times"].dt.year == vis_year) + & (ground_true_df["times"].dt.month > vis_month), + :, + ] + + plt.figure(figsize=(20, 10)) + plt.plot(ground_true_df.times, ground_true_df.value, label="Actual") + plt.plot(prediction_df.times, prediction_df.value, "ro", label="Predicted") + plt.legend(loc="upper left") plt.show() + # write code here def premain(argparser): signal.signal(signal.SIGINT, SigHandler_SIGINT) - #here - #cnn_type_1("ETH") - #lstm_type_cnn_1("ETH", "GRU") + # here + # cnn_type_1("ETH") + # lstm_type_cnn_1("ETH", "GRU") load_cnn_type_1("ETH", 2018, 4) + def main(): argparser = Argparser() if argparser.args.dbg: @@ -311,7 +406,8 @@ def main(): premain(argparser) except Exception as e: print(e.__doc__) - if e.message: print(e.message) + if e.message: + print(e.message) variables = globals().copy() variables.update(locals()) shell = code.InteractiveConsole(variables) @@ -319,5 +415,6 @@ def main(): else: premain(argparser) + if __name__ == "__main__": main() |