aboutsummaryrefslogtreecommitdiffstats
path: root/lstm.py
diff options
context:
space:
mode:
Diffstat (limited to 'lstm.py')
-rwxr-xr-xlstm.py311
1 files changed, 241 insertions, 70 deletions
diff --git a/lstm.py b/lstm.py
index 935a8c2..7d65552 100755
--- a/lstm.py
+++ b/lstm.py
@@ -1,9 +1,9 @@
#!python
# _*_ coding=utf-8 _*_
-#original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb
+# original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb
-#@#!pip install lxml
-#@#!mkdir lstm-models
+# @#!pip install lxml
+# @#!mkdir lstm-models
import argparse
import code
import readline
@@ -26,70 +26,124 @@ from keras import layers
window_len = 10
split_date = "2018-03-01"
+
def SigHandler_SIGINT(signum, frame):
print()
sys.exit(0)
+
class Argparser(object):
def __init__(self):
parser = argparse.ArgumentParser()
parser.add_argument("--string", type=str, help="string")
- parser.add_argument("--bool", action="store_true", help="bool", default=False)
- parser.add_argument("--dbg", action="store_true", help="debug", default=False)
+ parser.add_argument(
+ "--bool", action="store_true", help="bool", default=False
+ )
+ parser.add_argument(
+ "--dbg", action="store_true", help="debug", default=False
+ )
self.args = parser.parse_args()
+
def getData_CMC(crypto, crypto_short):
- market_info = pd.read_html("https://coinmarketcap.com/currencies/"+crypto+"/historical-data/?start=20160428&end="+time.strftime("%Y%m%d"))[0]
+ market_info = pd.read_html(
+ "https://coinmarketcap.com/currencies/"
+ + crypto
+ + "/historical-data/?start=20160428&end="
+ + time.strftime("%Y%m%d")
+ )[0]
print(type(market_info))
- market_info = market_info.assign(Date=pd.to_datetime(market_info['Date']))
- #print(market_info)
- #if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0
- #if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0
+ market_info = market_info.assign(Date=pd.to_datetime(market_info["Date"]))
+ # print(market_info)
+ # if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0
+ # if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0
market_info["Volume"] = market_info["Volume"].astype("int64")
market_info.columns = market_info.columns.str.replace("*", "")
- #print(type(market_info))
- #print(crypto + " head: ")
- #print(market_info.head())
- kwargs = {'close_off_high': lambda x: 2*(x['High']- x['Close'])/(x['High']-x['Low'])-1, 'volatility': lambda x: (x['High']- x['Low'])/(x['Open'])}
+ # print(type(market_info))
+ # print(crypto + " head: ")
+ # print(market_info.head())
+ kwargs = {
+ "close_off_high": lambda x: 2
+ * (x["High"] - x["Close"])
+ / (x["High"] - x["Low"])
+ - 1,
+ "volatility": lambda x: (x["High"] - x["Low"]) / (x["Open"]),
+ }
market_info = market_info.assign(**kwargs)
- model_data = market_info[['Date']+[coin+metric for coin in [""] for metric in ['Close','Volume','close_off_high','volatility']]]
- model_data = model_data.sort_values(by='Date')
- #print(model_data.head())
+ model_data = market_info[
+ ["Date"]
+ + [
+ coin + metric
+ for coin in [""]
+ for metric in ["Close", "Volume", "close_off_high", "volatility"]
+ ]
+ ]
+ model_data = model_data.sort_values(by="Date")
+ # print(model_data.head())
print(type(model_data))
return model_data
+
def getData_Stock(name, period):
- info = pd.read_csv("./data/"+name+"/"+period+".csv", encoding="utf-16")
+ info = pd.read_csv(
+ "./data/" + name + "/" + period + ".csv", encoding="utf-16"
+ )
return info
+
def get_sets(crypto, model_data):
- training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date]
- training_set = training_set.drop('Date', 1)
- test_set = test_set.drop('Date', 1)
- norm_cols = [coin+metric for coin in [] for metric in ['Close', 'Volume']]
+ training_set, test_set = (
+ model_data[model_data["Date"] < split_date],
+ model_data[model_data["Date"] >= split_date],
+ )
+ training_set = training_set.drop("Date", 1)
+ test_set = test_set.drop("Date", 1)
+ norm_cols = [
+ coin + metric for coin in [] for metric in ["Close", "Volume"]
+ ]
LSTM_training_inputs = []
for i in range(len(training_set) - window_len):
- temp_set = training_set[i:(i+window_len)].copy()
+ temp_set = training_set[i : (i + window_len)].copy()
for col in norm_cols:
- temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
+ temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1
LSTM_training_inputs.append(temp_set)
- LSTM_training_outputs = (training_set["Close"][window_len:].values/training_set["Close"][:-window_len].values) - 1
+ LSTM_training_outputs = (
+ training_set["Close"][window_len:].values
+ / training_set["Close"][:-window_len].values
+ ) - 1
LSTM_test_inputs = []
- for i in range(len(test_set)-window_len):
- temp_set = test_set[i:(i+window_len)].copy()
+ for i in range(len(test_set) - window_len):
+ temp_set = test_set[i : (i + window_len)].copy()
for col in norm_cols:
- temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
+ temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1
LSTM_test_inputs.append(temp_set)
- LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1
+ LSTM_test_outputs = (
+ test_set["Close"][window_len:].values
+ / test_set["Close"][:-window_len].values
+ ) - 1
print(LSTM_training_inputs[0])
- LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
+ LSTM_training_inputs = [
+ np.array(LSTM_training_input)
+ for LSTM_training_input in LSTM_training_inputs
+ ]
LSTM_training_inputs = np.array(LSTM_training_inputs)
- LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
+ LSTM_test_inputs = [
+ np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs
+ ]
LSTM_test_inputs = np.array(LSTM_test_inputs)
return LSTM_training_inputs, LSTM_test_inputs, training_set, test_set
-def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25, loss="mae", optimizer="adam"):
+
+def build_model(
+ inputs,
+ output_size,
+ neurons,
+ activ_func="linear",
+ dropout=0.25,
+ loss="mae",
+ optimizer="adam",
+):
model = Sequential()
model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
model.add(Dropout(dropout))
@@ -98,19 +152,27 @@ def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25,
model.compile(loss=loss, optimizer=optimizer)
return model
+
def stock():
split_date = "2017.01.01"
model_data = getData_Stock("irxo", "Daily")
- model_data = model_data.sort_values(by='Date')
+ model_data = model_data.sort_values(by="Date")
- training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date]
- training_set = training_set.drop('Date', 1)
- test_set = test_set.drop('Date', 1)
+ training_set, test_set = (
+ model_data[model_data["Date"] < split_date],
+ model_data[model_data["Date"] >= split_date],
+ )
+ training_set = training_set.drop("Date", 1)
+ test_set = test_set.drop("Date", 1)
training_inputs = training_set
- training_outputs = training_set.drop(['Open', 'High', 'Low', 'NTx', 'Volume'], axis=1)
+ training_outputs = training_set.drop(
+ ["Open", "High", "Low", "NTx", "Volume"], axis=1
+ )
test_inputs = test_set
- test_outputs = test_set.drop(["Open", "High", "Low", "NTx", "Volume"], axis=1)
+ test_outputs = test_set.drop(
+ ["Open", "High", "Low", "NTx", "Volume"], axis=1
+ )
print(training_set.head)
print(test_set.head)
@@ -120,77 +182,184 @@ def stock():
print(test_outputs.shape)
model = models.Sequential()
- model.add(layers.Dense(64, activation="relu", input_shape=(training_inputs.shape[1],)))
+ model.add(
+ layers.Dense(
+ 64, activation="relu", input_shape=(training_inputs.shape[1],)
+ )
+ )
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(1))
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
- history = model.fit(training_inputs, training_outputs, validation_data=(test_inputs, test_outputs), epochs=10, batch_size=1, verbose=2)
+ history = model.fit(
+ training_inputs,
+ training_outputs,
+ validation_data=(test_inputs, test_outputs),
+ epochs=10,
+ batch_size=1,
+ verbose=2,
+ )
+
def lstm_type_1(crypto, crypto_short):
model_data = getData_CMC(crypto, crypto_short)
np.random.seed(202)
- training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data)
+ training_inputs, test_inputs, training_set, test_set = get_sets(
+ crypto, model_data
+ )
model = build_model(training_inputs, output_size=1, neurons=20, loss="mse")
- training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1
- history = model.fit(training_inputs, training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True)
+ training_outputs = (
+ training_set["Close"][window_len:].values
+ / training_set["Close"][:-window_len].values
+ ) - 1
+ history = model.fit(
+ training_inputs,
+ training_outputs,
+ epochs=50,
+ batch_size=1,
+ verbose=2,
+ shuffle=True,
+ )
+
def lstm_type_4(crypto, crypto_short, crypto2, crypto_short2):
model_data = getData_CMC(crypto, crypto_short)
model_data2 = getData_CMC(crypto2, crypto_short2)
np.random.seed(202)
- training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data)
- training_inputs2, test_inputs2, training_set2, test_set2 = get_sets(crypto2, model_data2)
+ training_inputs, test_inputs, training_set, test_set = get_sets(
+ crypto, model_data
+ )
+ training_inputs2, test_inputs2, training_set2, test_set2 = get_sets(
+ crypto2, model_data2
+ )
return
- model = build_model(training_inputs/training_inputs2, output_size=1, neurons=20, loss="mse")
- training_outputs = ((training_set['Close'][window_len:].values)/(training_set['Close'][:-window_len].values))-1
- history = model.fit(training_inputs/training_inputs2, training_outputs, epochs=10, batch_size=1, verbose=2, shuffle=True)
+ model = build_model(
+ training_inputs / training_inputs2,
+ output_size=1,
+ neurons=20,
+ loss="mse",
+ )
+ training_outputs = (
+ (training_set["Close"][window_len:].values)
+ / (training_set["Close"][:-window_len].values)
+ ) - 1
+ history = model.fit(
+ training_inputs / training_inputs2,
+ training_outputs,
+ epochs=10,
+ batch_size=1,
+ verbose=2,
+ shuffle=True,
+ )
+
def lstm_type_2(crypto, crypto_short, pred_range, neuron_count):
model_data = getData_CMC(crypto, crypto_short)
np.random.seed(202)
- training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data)
- model = build_model(training_inputs, output_size=pred_range, neurons=neuron_count, loss="mse")
- training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1
+ training_inputs, test_inputs, training_set, test_set = get_sets(
+ crypto, model_data
+ )
+ model = build_model(
+ training_inputs,
+ output_size=pred_range,
+ neurons=neuron_count,
+ loss="mse",
+ )
+ training_outputs = (
+ training_set["Close"][window_len:].values
+ / training_set["Close"][:-window_len].values
+ ) - 1
training_outputs = []
- for i in range(window_len, len(training_set['Close'])-pred_range):
- training_outputs.append((training_set['Close'][i:i+pred_range].values/training_set['Close'].values[i-window_len])-1)
+ for i in range(window_len, len(training_set["Close"]) - pred_range):
+ training_outputs.append(
+ (
+ training_set["Close"][i : i + pred_range].values
+ / training_set["Close"].values[i - window_len]
+ )
+ - 1
+ )
training_outputs = np.array(training_outputs)
- history = model.fit(training_inputs[:-pred_range], training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True)
+ history = model.fit(
+ training_inputs[:-pred_range],
+ training_outputs,
+ epochs=50,
+ batch_size=1,
+ verbose=2,
+ shuffle=True,
+ )
+
def lstm_type_3(crypto, crypto_short, pred_range, neuron_count):
model_data = getData_CMC(crypto, crypto_short)
np.random.seed(202)
- training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data)
+ training_inputs, test_inputs, training_set, test_set = get_sets(
+ crypto, model_data
+ )
model = build_model(training_inputs, output_size=1, neurons=neuron_count)
- training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1
+ training_outputs = (
+ training_set["Close"][window_len:].values
+ / training_set["Close"][:-window_len].values
+ ) - 1
training_outputs = []
for rand_seed in range(775, 800):
print(rand_seed)
np.random.seed(rand_seed)
- temp_model = build_model(training_inputs, output_size=1, neurons=neuron_count)
- temp_model.fit(training_inputs, (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1, epochs=50, batch_size=1, verbose=0, shuffle=True)
- temp_model.save("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed)
+ temp_model = build_model(
+ training_inputs, output_size=1, neurons=neuron_count
+ )
+ temp_model.fit(
+ training_inputs,
+ (
+ training_set["Close"][window_len:].values
+ / training_set["Close"][:-window_len].values
+ )
+ - 1,
+ epochs=50,
+ batch_size=1,
+ verbose=0,
+ shuffle=True,
+ )
+ temp_model.save(
+ "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed
+ )
+
def load_models(crypto, crypto_short):
preds = []
model_data = getData_CMC(crypto, crypto_short)
np.random.seed(202)
- training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data)
- for rand_seed in range(775,800):
- temp_model = load_model("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed)
- preds.append(np.mean(abs(np.transpose(temp_model.predict(test_inputs))-(test_set['Close'].values[window_len:]/test_set['Close'].values[:-window_len]-1))))
+ training_inputs, test_inputs, training_set, test_set = get_sets(
+ crypto, model_data
+ )
+ for rand_seed in range(775, 800):
+ temp_model = load_model(
+ "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed
+ )
+ preds.append(
+ np.mean(
+ abs(
+ np.transpose(temp_model.predict(test_inputs))
+ - (
+ test_set["Close"].values[window_len:]
+ / test_set["Close"].values[:-window_len]
+ - 1
+ )
+ )
+ )
+ )
+
# write code here
def premain(argparser):
signal.signal(signal.SIGINT, SigHandler_SIGINT)
- #here
- #lstm_type_1("ethereum", "ether")
- #lstm_type_2("ethereum", "ether", 5, 20)
- #lstm_type_3("ethereum", "ether", 5, 20)
- #lstm_type_4("ethereum", "ether", "dogecoin", "doge")
- #load_models("ethereum", "eth")
+ # here
+ # lstm_type_1("ethereum", "ether")
+ # lstm_type_2("ethereum", "ether", 5, 20)
+ # lstm_type_3("ethereum", "ether", 5, 20)
+ # lstm_type_4("ethereum", "ether", "dogecoin", "doge")
+ # load_models("ethereum", "eth")
stock()
+
def main():
argparser = Argparser()
if argparser.args.dbg:
@@ -198,7 +367,8 @@ def main():
premain(argparser)
except Exception as e:
print(e.__doc__)
- if e.message: print(e.message)
+ if e.message:
+ print(e.message)
variables = globals().copy()
variables.update(locals())
shell = code.InteractiveConsole(variables)
@@ -206,5 +376,6 @@ def main():
else:
premain(argparser)
+
if __name__ == "__main__":
main()