aboutsummaryrefslogtreecommitdiffstats
path: root/cnn.py
diff options
context:
space:
mode:
Diffstat (limited to 'cnn.py')
-rwxr-xr-xcnn.py285
1 files changed, 191 insertions, 94 deletions
diff --git a/cnn.py b/cnn.py
index 23e450f..bd0cb78 100755
--- a/cnn.py
+++ b/cnn.py
@@ -34,18 +34,25 @@ import keras
from keras import optimizers
import matplotlib.pyplot as plt
+
def SigHandler_SIGINT(signum, frame):
print()
sys.exit(0)
+
class Argparser(object):
def __init__(self):
parser = argparse.ArgumentParser()
parser.add_argument("--string", type=str, help="string")
- parser.add_argument("--bool", action="store_true", help="bool", default=False)
- parser.add_argument("--dbg", action="store_true", help="debug", default=False)
+ parser.add_argument(
+ "--bool", action="store_true", help="bool", default=False
+ )
+ parser.add_argument(
+ "--dbg", action="store_true", help="debug", default=False
+ )
self.args = parser.parse_args()
+
class PastSampler(object):
def __init__(self, N, K, sliding_window=True):
self.N = N
@@ -55,23 +62,30 @@ class PastSampler(object):
def transform(self, A):
M = self.N + self.K
if self.sliding_window:
- I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1,1)
+ I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1)
else:
- if A.shape[0]%M == 0:
- I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1,1)
+ if A.shape[0] % M == 0:
+ I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1, 1)
else:
- I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape(-1,1)
+ I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape(
+ -1, 1
+ )
- B = A[I].reshape(-1, M*A.shape[1], A.shape[2])
- ci = self.N*A.shape[1]
+ B = A[I].reshape(-1, M * A.shape[1], A.shape[2])
+ ci = self.N * A.shape[1]
return B[:, :ci], B[:, ci:]
+
def getData(symbol_str):
data_file = Path("./cnn/" + symbol_str + ".csv")
- original_columns =["close", "date", "high", "low", "open"]
+ original_columns = ["close", "date", "high", "low", "open"]
new_columns = ["Close", "Timestamp", "High", "Low", "Open"]
columns = ["Close"]
- url = "https://poloniex.com/public?command=returnChartData&currencyPair=USDT_" + symbol_str + "&start=1356998100&end=9999999999&period=300"
+ url = (
+ "https://poloniex.com/public?command=returnChartData&currencyPair=USDT_"
+ + symbol_str
+ + "&start=1356998100&end=9999999999&period=300"
+ )
r = requests.get(url)
d = json.loads(r.content.decode("utf-8"))
df = pd.DataFrame(d)
@@ -85,15 +99,16 @@ def getData(symbol_str):
original_df = pd.read_csv(data_file).loc[:, columns]
return df, original_df, time_stamps
+
def Scaler(df, original_df, time_stamps, symbol_str):
- file_name="./cnn/" + symbol_str + "_close.h5"
+ file_name = "./cnn/" + symbol_str + "_close.h5"
scaler = MinMaxScaler()
- columns= ["Close"]
+ columns = ["Close"]
for c in columns:
- df[c] = scaler.fit_transform(df[c].values.reshape(-1,1))
- A = np.array(df)[:,None,:]
- original_A = np.array(original_df)[:,None,:]
- time_stamps = np.array(time_stamps)[:,None,None]
+ df[c] = scaler.fit_transform(df[c].values.reshape(-1, 1))
+ A = np.array(df)[:, None, :]
+ original_A = np.array(original_df)[:, None, :]
+ time_stamps = np.array(time_stamps)[:, None, None]
NPS, NFS = 256, 16
ps = PastSampler(NPS, NFS, sliding_window=False)
B, Y = ps.transform(A)
@@ -109,15 +124,16 @@ def Scaler(df, original_df, time_stamps, symbol_str):
f.create_dataset("original_inputs", data=original_B)
f.create_dataset("original_outputs", data=original_Y)
+
def cnn_type_1(symbol_str):
df, original_df, time_stamps = getData(symbol_str)
Scaler(df, original_df, time_stamps, symbol_str)
# run on gpu
- '''
+ """
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
- '''
+ """
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
@@ -134,21 +150,29 @@ def cnn_type_1(symbol_str):
epochs = 100
- #split training validation
- training_size = int(0.8* datas.shape[0])
- training_datas = datas[:training_size,:]
- training_labels = labels[:training_size,:]
- validation_datas = datas[training_size:,:]
- validation_labels = labels[training_size:,:]
+ # split training validation
+ training_size = int(0.8 * datas.shape[0])
+ training_datas = datas[:training_size, :]
+ training_labels = labels[:training_size, :]
+ validation_datas = datas[training_size:, :]
+ validation_labels = labels[training_size:, :]
model = Sequential()
# 2 Layers
- model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20))
+ model.add(
+ Conv1D(
+ activation="relu",
+ input_shape=(step_size, nb_features),
+ strides=3,
+ filters=8,
+ kernel_size=20,
+ )
+ )
model.add(Dropout(0.5))
- model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16))
+ model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16))
- '''
+ """
# 3 Layers
model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=8))
#model.add(LeakyReLU())
@@ -168,10 +192,27 @@ def cnn_type_1(symbol_str):
#model.add(LeakyReLU())
model.add(Dropout(0.5))
model.add(Conv1D( strides=2, filters=nb_features, kernel_size=2))
- '''
+ """
+
+ model.compile(loss="mse", optimizer="adam")
+ model.fit(
+ training_datas,
+ training_labels,
+ verbose=1,
+ batch_size=batch_size,
+ validation_data=(validation_datas, validation_labels),
+ epochs=epochs,
+ callbacks=[
+ CSVLogger(output_file_name + ".csv", append=True),
+ ModelCheckpoint(
+ output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5",
+ monitor="val_loss",
+ verbose=1,
+ mode="min",
+ ),
+ ],
+ )
- model.compile(loss='mse', optimizer='adam')
- model.fit(training_datas, training_labels,verbose=1, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')])
def lstm_type_cnn_1(symbol_str, kind):
df, original_df, time_stamps = getData(symbol_str)
@@ -187,35 +228,65 @@ def lstm_type_cnn_1(symbol_str, kind):
set_session(tf.Session(config=config))
with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf:
- datas = hf['inputs'].value
- labels = hf['outputs'].value
+ datas = hf["inputs"].value
+ labels = hf["outputs"].value
step_size = datas.shape[1]
- units= 50
+ units = 50
second_units = 30
batch_size = 8
nb_features = datas.shape[2]
epochs = 100
- output_size=16
+ output_size = 16
output_file_name = "cnn/" + symbol_str + "_CNN_LSTM_2_relu"
- #split training validation
- training_size = int(0.8* datas.shape[0])
- training_datas = datas[:training_size,:]
- training_labels = labels[:training_size,:,0]
- validation_datas = datas[training_size:,:]
- validation_labels = labels[training_size:,:,0]
-
- #build model
+ # split training validation
+ training_size = int(0.8 * datas.shape[0])
+ training_datas = datas[:training_size, :]
+ training_labels = labels[:training_size, :, 0]
+ validation_datas = datas[training_size:, :]
+ validation_labels = labels[training_size:, :, 0]
+
+ # build model
model = Sequential()
if kind == "GRU":
- model.add(GRU(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False))
+ model.add(
+ GRU(
+ units=units,
+ activation="tanh",
+ input_shape=(step_size, nb_features),
+ return_sequences=False,
+ )
+ )
elif kind == "LSTM":
- model.add(LSTM(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False))
+ model.add(
+ LSTM(
+ units=units,
+ activation="tanh",
+ input_shape=(step_size, nb_features),
+ return_sequences=False,
+ )
+ )
model.add(Dropout(0.8))
model.add(Dense(output_size))
model.add(LeakyReLU())
- model.compile(loss='mse', optimizer='adam')
- model.fit(training_datas, training_labels, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')])
+ model.compile(loss="mse", optimizer="adam")
+ model.fit(
+ training_datas,
+ training_labels,
+ batch_size=batch_size,
+ validation_data=(validation_datas, validation_labels),
+ epochs=epochs,
+ callbacks=[
+ CSVLogger(output_file_name + ".csv", append=True),
+ ModelCheckpoint(
+ output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5",
+ monitor="val_loss",
+ verbose=1,
+ mode="min",
+ ),
+ ],
+ )
+
def load_cnn_type_1(symbol_str, vis_year, vis_month):
df, original_df, time_stamps = getData(symbol_str)
@@ -227,83 +298,107 @@ def load_cnn_type_1(symbol_str, vis_year, vis_month):
"""
with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf:
- datas = hf['inputs'].value
- labels = hf['outputs'].value
- input_times = hf['input_times'].value
- output_times = hf['output_times'].value
- original_inputs = hf['original_inputs'].value
- original_outputs = hf['original_outputs'].value
- original_datas = hf['original_datas'].value
-
- scaler=MinMaxScaler()
- #split training validation
- training_size = int(0.8* datas.shape[0])
- training_datas = datas[:training_size,:,:]
- training_labels = labels[:training_size,:,:]
- validation_datas = datas[training_size:,:,:]
- validation_labels = labels[training_size:,:,:]
- validation_original_outputs = original_outputs[training_size:,:,:]
- validation_original_inputs = original_inputs[training_size:,:,:]
- validation_input_times = input_times[training_size:,:,:]
- validation_output_times = output_times[training_size:,:,:]
-
- ground_true = np.append(validation_original_inputs,validation_original_outputs, axis=1)
- ground_true_times = np.append(validation_input_times,validation_output_times, axis=1)
+ datas = hf["inputs"].value
+ labels = hf["outputs"].value
+ input_times = hf["input_times"].value
+ output_times = hf["output_times"].value
+ original_inputs = hf["original_inputs"].value
+ original_outputs = hf["original_outputs"].value
+ original_datas = hf["original_datas"].value
+
+ scaler = MinMaxScaler()
+ # split training validation
+ training_size = int(0.8 * datas.shape[0])
+ training_datas = datas[:training_size, :, :]
+ training_labels = labels[:training_size, :, :]
+ validation_datas = datas[training_size:, :, :]
+ validation_labels = labels[training_size:, :, :]
+ validation_original_outputs = original_outputs[training_size:, :, :]
+ validation_original_inputs = original_inputs[training_size:, :, :]
+ validation_input_times = input_times[training_size:, :, :]
+ validation_output_times = output_times[training_size:, :, :]
+
+ ground_true = np.append(
+ validation_original_inputs, validation_original_outputs, axis=1
+ )
+ ground_true_times = np.append(
+ validation_input_times, validation_output_times, axis=1
+ )
step_size = datas.shape[1]
- batch_size= 8
+ batch_size = 8
nb_features = datas.shape[2]
model = Sequential()
# 2 layers
- model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20))
+ model.add(
+ Conv1D(
+ activation="relu",
+ input_shape=(step_size, nb_features),
+ strides=3,
+ filters=8,
+ kernel_size=20,
+ )
+ )
# model.add(LeakyReLU())
model.add(Dropout(0.25))
- model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16))
+ model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16))
model.load_weights("cnn/" + symbol_str + "_CNN_2_relu-76-0.00036.hdf5")
- model.compile(loss='mse', optimizer='adam')
+ model.compile(loss="mse", optimizer="adam")
predicted = model.predict(validation_datas)
predicted_inverted = []
for i in range(original_datas.shape[1]):
scaler.fit(original_datas[:, i].reshape(-1, 1))
- predicted_inverted.append(scaler.inverse_transform(predicted[:,:,i]))
+ predicted_inverted.append(scaler.inverse_transform(predicted[:, :, i]))
print(np.array(predicted_inverted).shape)
- #get only the close data
- ground_true = ground_true[:,:,0].reshape(-1)
+ # get only the close data
+ ground_true = ground_true[:, :, 0].reshape(-1)
ground_true_times = ground_true_times.reshape(-1)
- ground_true_times = pd.to_datetime(ground_true_times, unit='s')
+ ground_true_times = pd.to_datetime(ground_true_times, unit="s")
# since we are appending in the first dimension
- predicted_inverted = np.array(predicted_inverted)[0,:,:].reshape(-1)
+ predicted_inverted = np.array(predicted_inverted)[0, :, :].reshape(-1)
print(np.array(predicted_inverted).shape)
- validation_output_times = pd.to_datetime(validation_output_times.reshape(-1), unit='s')
+ validation_output_times = pd.to_datetime(
+ validation_output_times.reshape(-1), unit="s"
+ )
ground_true_df = pd.DataFrame()
- ground_true_df['times'] = ground_true_times
- ground_true_df['value'] = ground_true
+ ground_true_df["times"] = ground_true_times
+ ground_true_df["value"] = ground_true
prediction_df = pd.DataFrame()
- prediction_df['times'] = validation_output_times
- prediction_df['value'] = predicted_inverted
-
- prediction_df = prediction_df.loc[(prediction_df["times"].dt.year == vis_year )&(prediction_df["times"].dt.month > vis_month ),: ]
- ground_true_df = ground_true_df.loc[(ground_true_df["times"].dt.year == vis_year )&(ground_true_df["times"].dt.month > vis_month ),:]
-
- plt.figure(figsize=(20,10))
- plt.plot(ground_true_df.times,ground_true_df.value, label = 'Actual')
- plt.plot(prediction_df.times,prediction_df.value,'ro', label='Predicted')
- plt.legend(loc='upper left')
+ prediction_df["times"] = validation_output_times
+ prediction_df["value"] = predicted_inverted
+
+ prediction_df = prediction_df.loc[
+ (prediction_df["times"].dt.year == vis_year)
+ & (prediction_df["times"].dt.month > vis_month),
+ :,
+ ]
+ ground_true_df = ground_true_df.loc[
+ (ground_true_df["times"].dt.year == vis_year)
+ & (ground_true_df["times"].dt.month > vis_month),
+ :,
+ ]
+
+ plt.figure(figsize=(20, 10))
+ plt.plot(ground_true_df.times, ground_true_df.value, label="Actual")
+ plt.plot(prediction_df.times, prediction_df.value, "ro", label="Predicted")
+ plt.legend(loc="upper left")
plt.show()
+
# write code here
def premain(argparser):
signal.signal(signal.SIGINT, SigHandler_SIGINT)
- #here
- #cnn_type_1("ETH")
- #lstm_type_cnn_1("ETH", "GRU")
+ # here
+ # cnn_type_1("ETH")
+ # lstm_type_cnn_1("ETH", "GRU")
load_cnn_type_1("ETH", 2018, 4)
+
def main():
argparser = Argparser()
if argparser.args.dbg:
@@ -311,7 +406,8 @@ def main():
premain(argparser)
except Exception as e:
print(e.__doc__)
- if e.message: print(e.message)
+ if e.message:
+ print(e.message)
variables = globals().copy()
variables.update(locals())
shell = code.InteractiveConsole(variables)
@@ -319,5 +415,6 @@ def main():
else:
premain(argparser)
+
if __name__ == "__main__":
main()