diff options
| -rwxr-xr-x | ACO.py | 107 | ||||
| -rwxr-xr-x | PSO.py | 173 | ||||
| -rwxr-xr-x | SA.py | 97 | ||||
| -rwxr-xr-x | cnn.py | 285 | ||||
| -rwxr-xr-x | digester.py | 90 | ||||
| -rwxr-xr-x | lstm.py | 311 | ||||
| -rwxr-xr-x | marionette.py | 47 | ||||
| -rwxr-xr-x | seer.py | 165 | ||||
| -rwxr-xr-x | stock.py | 23 | ||||
| -rwxr-xr-x | tfann.py | 127 | 
10 files changed, 1140 insertions, 285 deletions
| @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +import math +import time +import random + + +Nodes = [ +    [0, 10, 20, 30, 40], +    [10, 0, 20, 30, 40], +    [20, 20, 0, 30, 40], +    [30, 30, 30, 0, 40], +    [40, 40, 40, 40, 0], +] + +Pheromone = [ +    [0, 0, 0, 0, 0], +    [0, 0, 0, 0, 0], +    [0, 0, 0, 0, 0], +    [0, 0, 0, 0, 0], +    [0, 0, 0, 0, 0], +] + + +class Ant: +    def __init__(self, node): +        self.node = node +        self.route = int() +        self.exclusion_list = list() + +    def __repr__(self): +        return "{node: %s}" % (self.node) + +    def __str__(self): +        return "{node: %s}" % (self.node) + +    def nextHop(self): +        n = math.ceil(random.random() * len(Nodes)) - 1 +        if ( +            n not in self.exclusion_list +            and n != self.exclusion_list[-1] +            and Nodes[self.exclusion_list[-1]][n] != 0 +        ): +            self.exclusion_list.append(n) +            return n +        else: +            if len(self.exclusion_list) == len(Nodes): +                return None +            else: +                self.nextHop() + +    def nextHop2(self): +        pass + +    def walk(self): +        self.exclusion_list.append(self.node) +        for _ in range(0, len(Nodes)): +            next = self.nextHop() +            if next is not None: +                self.route += Nodes[self.node][next] +                self.node = next + +    def printRoute(self): +        print(self.exclusion_list) + + +class ACO: +    def __init__(self, ant_count): +        self.ant_count = ant_count +        self.Ants = self.factory() + +    def factory(self): +        result = list() +        for _ in range(0, self.ant_count): +            node = math.ceil(random.random() * 5) - 1 +            result.append(Ant(node)) +        return result + +    def fitness(self): +        pass + +    def run(self): +        for ant in self.Ants: +            ant.walk() +            ant.printRoute() +            for i in range(0, len(Nodes) - 1): +                Pheromone[ant.exclusion_list[i]][ant.exclusion_list[i + 1]] = ( +                    1 / ant.route +                ) + + +def main(): +    random.seed(time.time()) +    rho = 0 +    alpha = 1 +    beta = 1 +    aco = ACO(5) +    # for ant in aco.Ants: +    #     print("ant:", ant) +    aco.run() +    for i in range(0, len(Pheromone)): +        for j in range(0, len(Pheromone)): +            print(Pheromone[i][j], end=" ") +        print() + + +if __name__ == "__main__": +    main() @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +import time +import random + + +class Particle: +    def __init__(self, x, y, z): +        self.Pbest_x = 0 +        self.Pbest_y = 0 +        self.Pbest_z = 0 +        self.x = x +        self.y = y +        self.z = z +        self.x_k1 = 0 +        self.y_k1 = 0 +        self.z_k1 = 0 +        self.V_x = 0 +        self.V_y = 0 +        self.V_z = 0 +        self.V_xk1 = 0 +        self.V_yk1 = 0 +        self.V_zk1 = 0 + +    def __repr__(self): +        return ( +            "{x: %s , y: %s, z: %s, Pbest_x: %s, Pbest_y: %s, Pbest_z: %s}\n" +            % ( +                self.x, +                self.y, +                self.z, +                self.Pbest_x, +                self.Pbest_y, +                self.Pbest_z, +            ) +        ) + +    def __str__(self): +        return ( +            "{x: %s , y: %s, z: %s, Pbest_x: %s, Pbest_y: %s, Pbest_z: %s}\n" +            % ( +                self.x, +                self.y, +                self.z, +                self.Pbest_x, +                self.Pbest_y, +                self.Pbest_z, +            ) +        ) + +    def update_X(self): +        self.x = self.x + self.V_xk1 +        self.y = self.y + self.V_yk1 +        self.z = self.z + self.V_zk1 + +    def update_V(self, w, c1, c2, Gbest_x, Gbest_y, Gbest_z): +        rand1 = random.random() +        rand2 = random.random() +        self.V_xk1 = ( +            w * self.V_x +            + c1 * rand1 * (self.Pbest_x - self.x) +            + c2 * rand2 * (Gbest_x - self.x) +        ) +        self.V_yk1 = ( +            w * self.V_y +            + c1 * rand1 * (self.Pbest_y - self.y) +            + c2 * rand2 * (Gbest_y - self.y) +        ) +        self.V_zk1 = ( +            w * self.V_z +            + c1 * rand1 * (self.Pbest_z - self.z) +            + c2 * rand2 * (Gbest_z - self.z) +        ) + +    def update_Pbest(self, x, y, z: float): +        self.Pbest_x = x +        self.Pbest_y = y +        self.Pbest_z = z + +    def doRound(self, w, c1, c2, Gbest_x, Gbest_y, Gbest_z, fitness): +        fitness_x = fitness(self.x, self.y, self.z) +        self.update_V(w, c1, c2, Gbest_x, Gbest_y, Gbest_z) +        self.update_X() +        if abs(fitness(self.Pbest_x, self.Pbest_y, self.Pbest_z)) > abs( +            fitness_x +        ): +            self.update_Pbest(self.x, self.y, self.z) + + +class PSO: +    def __init__(self, w, c1, c2, particle_count): +        self.Gbest_x = 0 +        self.Gbest_y = 0 +        self.Gbest_z = 0 +        self.particle_count = particle_count +        self.Particles = self.factory() +        self.w = w +        self.c1 = c1 +        self.c2 = c2 + +    def factory(self): +        result = list() +        for _ in range(1, self.particle_count): +            x = ( +                random.random() * 10 +                if random.random() > 0.5 +                else -random.random() * 10 +            ) +            y = ( +                random.random() * 10 +                if random.random() > 0.5 +                else -random.random() * 10 +            ) +            z = ( +                random.random() * 10 +                if random.random() > 0.5 +                else -random.random() * 10 +            ) +            result.append(Particle(x, y, z)) +        return result + +    def fitness(self, x, y, z: float): +        return ( +            (x ** 5) - ((x ** 2) * y * z) + (z * x) + (y ** 2) - (z ** 3) - 10 +        ) + +    def doRround(self): +        roundBest_x = float() +        roundBest_y = float() +        roundBest_z = float() +        for particle in self.Particles: +            if abs(self.fitness(roundBest_x, roundBest_y, roundBest_z)) > abs( +                self.fitness(particle.x, particle.y, particle.z) +            ): +                roundBest_x = particle.x +                roundBest_y = particle.y +                roundBest_z = particle.z +        self.Gbest_x = roundBest_x +        self.Gbest_y = roundBest_y +        self.Gbest_z = roundBest_z +        for particle in self.Particles: +            particle.doRound( +                self.w, +                self.c1, +                self.c2, +                self.Gbest_x, +                self.Gbest_y, +                self.Gbest_z, +                self.fitness, +            ) + +    def printGlobalBest(self): +        print( +            "x: %s, y: %s, z: %s, fitness: %s" +            % ( +                self.Gbest_x, +                self.Gbest_y, +                self.Gbest_z, +                self.fitness(self.Gbest_x, self.Gbest_y, self.Gbest_z), +            ), +        ) + + +def main(): +    random.seed(time.time()) +    round_count = 10 +    pso = PSO(5, 1.5, 1.5, 50) +    for _ in range(1, round_count): +        pso.doRround() +    pso.printGlobalBest() + + +if __name__ == "__main__": +    main() @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +from random import seed, random +from time import time +import math + + +class X: +    def __init__(self, x: float, y: float, z: float): +        self.x_now = 0 +        self.y_now = 0 +        self.z_now = 0 +        self.x_k = x +        self.y_k = y +        self.z_k = z +        self.x_k1 = 0 +        self.y_k1 = 0 +        self.z_k1 = 0 +        self.k = 0 + +    def neighbour(self): +        self.X_k_to_now() +        self.x_now = self.x_now + ( +            random() / 100 if random() > 0.5 else -random() / 100 +        ) +        self.y_now = self.y_now + ( +            random() / 100 if random() > 0.5 else -random() / 100 +        ) +        self.z_now = self.z_now + ( +            random() / 100 if random() > 0.5 else -random() / 100 +        ) +        self.k = self.k + 1 + +    def function(self, x: float, y: float, z: float): +        return ( +            (x ** 5) - ((x ** 2) * y * z) + (z * x) + (y ** 2) - (z ** 3) - 10 +        ) + +    def X_now(self): +        return self.function(self.x_now, self.y_now, self.z_now) + +    def X_k(self): +        return self.function(self.x_k, self.y_k, self.z_k) + +    def X_k_to_now(self): +        self.x_now = self.x_k +        self.y_now = self.y_k +        self.z_now = self.z_k + +    def X_now_to_k1(self): +        self.x_k1 = self.x_now +        self.y_k1 = self.y_now +        self.z_k1 = self.z_now + +    def X_k_to_k1(self): +        self.x_k1 = self.x_k +        self.y_k1 = self.y_k +        self.z_k1 = self.z_k + +    def X_k1_to_k(self): +        self.x_k = self.x_k1 +        self.y_k = self.y_k1 +        self.z_k = self.z_k1 + + +def SA(): +    K_max = 600 +    T_zero = 30 +    alpha = 0.90 +    alpha_step = 20 +    x = X(1, 0, -1) +    T = T_zero +    seed(time()) +    for k in range(1, K_max): +        x.neighbour() +        if x.X_now() <= x.X_k(): +            x.X_now_to_k1() +        else: +            p = math.e ** ((-(x.X_now() - x.X_k())) / T) +            seed(time()) +            r = random() +            if p >= r: +                x.X_now_to_k1() +            else: +                x.X_k_to_k1() +        if k % alpha_step == 0: +            T = T * alpha +        x.X_k1_to_k() +    print(x.x_k, x.y_k, x.z_k) +    print("k=", x.X_k()) + + +def main(): +    SA() + + +if __name__ == "__main__": +    main() @@ -34,18 +34,25 @@ import keras  from keras import optimizers  import matplotlib.pyplot as plt +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--string", type=str, help="string") -        parser.add_argument("--bool", action="store_true", help="bool", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--bool", action="store_true", help="bool", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  class PastSampler(object):      def __init__(self, N, K, sliding_window=True):          self.N = N @@ -55,23 +62,30 @@ class PastSampler(object):      def transform(self, A):          M = self.N + self.K          if self.sliding_window: -            I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1,1) +            I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1)          else: -            if A.shape[0]%M == 0: -                I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1,1) +            if A.shape[0] % M == 0: +                I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1, 1)              else: -                I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape(-1,1) +                I = np.arange(M) + np.arange(0, A.shape[0] - M, M).reshape( +                    -1, 1 +                ) -        B = A[I].reshape(-1, M*A.shape[1], A.shape[2]) -        ci = self.N*A.shape[1] +        B = A[I].reshape(-1, M * A.shape[1], A.shape[2]) +        ci = self.N * A.shape[1]          return B[:, :ci], B[:, ci:] +  def getData(symbol_str):      data_file = Path("./cnn/" + symbol_str + ".csv") -    original_columns =["close", "date", "high", "low", "open"] +    original_columns = ["close", "date", "high", "low", "open"]      new_columns = ["Close", "Timestamp", "High", "Low", "Open"]      columns = ["Close"] -    url = "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" + symbol_str + "&start=1356998100&end=9999999999&period=300" +    url = ( +        "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_" +        + symbol_str +        + "&start=1356998100&end=9999999999&period=300" +    )      r = requests.get(url)      d = json.loads(r.content.decode("utf-8"))      df = pd.DataFrame(d) @@ -85,15 +99,16 @@ def getData(symbol_str):      original_df = pd.read_csv(data_file).loc[:, columns]      return df, original_df, time_stamps +  def Scaler(df, original_df, time_stamps, symbol_str): -    file_name="./cnn/" + symbol_str + "_close.h5" +    file_name = "./cnn/" + symbol_str + "_close.h5"      scaler = MinMaxScaler() -    columns= ["Close"] +    columns = ["Close"]      for c in columns: -        df[c] = scaler.fit_transform(df[c].values.reshape(-1,1)) -    A = np.array(df)[:,None,:] -    original_A = np.array(original_df)[:,None,:] -    time_stamps = np.array(time_stamps)[:,None,None] +        df[c] = scaler.fit_transform(df[c].values.reshape(-1, 1)) +    A = np.array(df)[:, None, :] +    original_A = np.array(original_df)[:, None, :] +    time_stamps = np.array(time_stamps)[:, None, None]      NPS, NFS = 256, 16      ps = PastSampler(NPS, NFS, sliding_window=False)      B, Y = ps.transform(A) @@ -109,15 +124,16 @@ def Scaler(df, original_df, time_stamps, symbol_str):          f.create_dataset("original_inputs", data=original_B)          f.create_dataset("original_outputs", data=original_Y) +  def cnn_type_1(symbol_str):      df, original_df, time_stamps = getData(symbol_str)      Scaler(df, original_df, time_stamps, symbol_str)      # run on gpu -    ''' +    """      os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"      os.environ["CUDA_VISIBLE_DEVICES"] = "1"      os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" -    ''' +    """      config = tf.ConfigProto()      config.gpu_options.allow_growth = True @@ -134,21 +150,29 @@ def cnn_type_1(symbol_str):      epochs = 100 -    #split training validation -    training_size = int(0.8* datas.shape[0]) -    training_datas = datas[:training_size,:] -    training_labels = labels[:training_size,:] -    validation_datas = datas[training_size:,:] -    validation_labels = labels[training_size:,:] +    # split training validation +    training_size = int(0.8 * datas.shape[0]) +    training_datas = datas[:training_size, :] +    training_labels = labels[:training_size, :] +    validation_datas = datas[training_size:, :] +    validation_labels = labels[training_size:, :]      model = Sequential()      # 2 Layers -    model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) +    model.add( +        Conv1D( +            activation="relu", +            input_shape=(step_size, nb_features), +            strides=3, +            filters=8, +            kernel_size=20, +        ) +    )      model.add(Dropout(0.5)) -    model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) +    model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16)) -    ''' +    """      # 3 Layers      model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=8))      #model.add(LeakyReLU()) @@ -168,10 +192,27 @@ def cnn_type_1(symbol_str):      #model.add(LeakyReLU())      model.add(Dropout(0.5))      model.add(Conv1D( strides=2, filters=nb_features, kernel_size=2)) -    ''' +    """ + +    model.compile(loss="mse", optimizer="adam") +    model.fit( +        training_datas, +        training_labels, +        verbose=1, +        batch_size=batch_size, +        validation_data=(validation_datas, validation_labels), +        epochs=epochs, +        callbacks=[ +            CSVLogger(output_file_name + ".csv", append=True), +            ModelCheckpoint( +                output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", +                monitor="val_loss", +                verbose=1, +                mode="min", +            ), +        ], +    ) -    model.compile(loss='mse', optimizer='adam') -    model.fit(training_datas, training_labels,verbose=1, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')])  def lstm_type_cnn_1(symbol_str, kind):      df, original_df, time_stamps = getData(symbol_str) @@ -187,35 +228,65 @@ def lstm_type_cnn_1(symbol_str, kind):      set_session(tf.Session(config=config))      with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: -        datas = hf['inputs'].value -        labels = hf['outputs'].value +        datas = hf["inputs"].value +        labels = hf["outputs"].value      step_size = datas.shape[1] -    units= 50 +    units = 50      second_units = 30      batch_size = 8      nb_features = datas.shape[2]      epochs = 100 -    output_size=16 +    output_size = 16      output_file_name = "cnn/" + symbol_str + "_CNN_LSTM_2_relu" -    #split training validation -    training_size = int(0.8* datas.shape[0]) -    training_datas = datas[:training_size,:] -    training_labels = labels[:training_size,:,0] -    validation_datas = datas[training_size:,:] -    validation_labels = labels[training_size:,:,0] - -    #build model +    # split training validation +    training_size = int(0.8 * datas.shape[0]) +    training_datas = datas[:training_size, :] +    training_labels = labels[:training_size, :, 0] +    validation_datas = datas[training_size:, :] +    validation_labels = labels[training_size:, :, 0] + +    # build model      model = Sequential()      if kind == "GRU": -        model.add(GRU(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) +        model.add( +            GRU( +                units=units, +                activation="tanh", +                input_shape=(step_size, nb_features), +                return_sequences=False, +            ) +        )      elif kind == "LSTM": -        model.add(LSTM(units=units,activation='tanh', input_shape=(step_size,nb_features),return_sequences=False)) +        model.add( +            LSTM( +                units=units, +                activation="tanh", +                input_shape=(step_size, nb_features), +                return_sequences=False, +            ) +        )      model.add(Dropout(0.8))      model.add(Dense(output_size))      model.add(LeakyReLU()) -    model.compile(loss='mse', optimizer='adam') -    model.fit(training_datas, training_labels, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True),ModelCheckpoint(output_file_name+'-{epoch:02d}-{val_loss:.5f}.hdf5', monitor='val_loss', verbose=1,mode='min')]) +    model.compile(loss="mse", optimizer="adam") +    model.fit( +        training_datas, +        training_labels, +        batch_size=batch_size, +        validation_data=(validation_datas, validation_labels), +        epochs=epochs, +        callbacks=[ +            CSVLogger(output_file_name + ".csv", append=True), +            ModelCheckpoint( +                output_file_name + "-{epoch:02d}-{val_loss:.5f}.hdf5", +                monitor="val_loss", +                verbose=1, +                mode="min", +            ), +        ], +    ) +  def load_cnn_type_1(symbol_str, vis_year, vis_month):      df, original_df, time_stamps = getData(symbol_str) @@ -227,83 +298,107 @@ def load_cnn_type_1(symbol_str, vis_year, vis_month):      """      with h5py.File("".join("./cnn/" + symbol_str + "_close.h5"), "r") as hf: -        datas = hf['inputs'].value -        labels = hf['outputs'].value -        input_times = hf['input_times'].value -        output_times = hf['output_times'].value -        original_inputs = hf['original_inputs'].value -        original_outputs = hf['original_outputs'].value -        original_datas = hf['original_datas'].value - -    scaler=MinMaxScaler() -    #split training validation -    training_size = int(0.8* datas.shape[0]) -    training_datas = datas[:training_size,:,:] -    training_labels = labels[:training_size,:,:] -    validation_datas = datas[training_size:,:,:] -    validation_labels = labels[training_size:,:,:] -    validation_original_outputs = original_outputs[training_size:,:,:] -    validation_original_inputs = original_inputs[training_size:,:,:] -    validation_input_times = input_times[training_size:,:,:] -    validation_output_times = output_times[training_size:,:,:] - -    ground_true = np.append(validation_original_inputs,validation_original_outputs, axis=1) -    ground_true_times = np.append(validation_input_times,validation_output_times, axis=1) +        datas = hf["inputs"].value +        labels = hf["outputs"].value +        input_times = hf["input_times"].value +        output_times = hf["output_times"].value +        original_inputs = hf["original_inputs"].value +        original_outputs = hf["original_outputs"].value +        original_datas = hf["original_datas"].value + +    scaler = MinMaxScaler() +    # split training validation +    training_size = int(0.8 * datas.shape[0]) +    training_datas = datas[:training_size, :, :] +    training_labels = labels[:training_size, :, :] +    validation_datas = datas[training_size:, :, :] +    validation_labels = labels[training_size:, :, :] +    validation_original_outputs = original_outputs[training_size:, :, :] +    validation_original_inputs = original_inputs[training_size:, :, :] +    validation_input_times = input_times[training_size:, :, :] +    validation_output_times = output_times[training_size:, :, :] + +    ground_true = np.append( +        validation_original_inputs, validation_original_outputs, axis=1 +    ) +    ground_true_times = np.append( +        validation_input_times, validation_output_times, axis=1 +    )      step_size = datas.shape[1] -    batch_size= 8 +    batch_size = 8      nb_features = datas.shape[2]      model = Sequential()      # 2 layers -    model.add(Conv1D(activation='relu', input_shape=(step_size, nb_features), strides=3, filters=8, kernel_size=20)) +    model.add( +        Conv1D( +            activation="relu", +            input_shape=(step_size, nb_features), +            strides=3, +            filters=8, +            kernel_size=20, +        ) +    )      # model.add(LeakyReLU())      model.add(Dropout(0.25)) -    model.add(Conv1D( strides=4, filters=nb_features, kernel_size=16)) +    model.add(Conv1D(strides=4, filters=nb_features, kernel_size=16))      model.load_weights("cnn/" + symbol_str + "_CNN_2_relu-76-0.00036.hdf5") -    model.compile(loss='mse', optimizer='adam') +    model.compile(loss="mse", optimizer="adam")      predicted = model.predict(validation_datas)      predicted_inverted = []      for i in range(original_datas.shape[1]):          scaler.fit(original_datas[:, i].reshape(-1, 1)) -        predicted_inverted.append(scaler.inverse_transform(predicted[:,:,i])) +        predicted_inverted.append(scaler.inverse_transform(predicted[:, :, i]))      print(np.array(predicted_inverted).shape) -    #get only the close data -    ground_true = ground_true[:,:,0].reshape(-1) +    # get only the close data +    ground_true = ground_true[:, :, 0].reshape(-1)      ground_true_times = ground_true_times.reshape(-1) -    ground_true_times = pd.to_datetime(ground_true_times, unit='s') +    ground_true_times = pd.to_datetime(ground_true_times, unit="s")      # since we are appending in the first dimension -    predicted_inverted = np.array(predicted_inverted)[0,:,:].reshape(-1) +    predicted_inverted = np.array(predicted_inverted)[0, :, :].reshape(-1)      print(np.array(predicted_inverted).shape) -    validation_output_times = pd.to_datetime(validation_output_times.reshape(-1), unit='s') +    validation_output_times = pd.to_datetime( +        validation_output_times.reshape(-1), unit="s" +    )      ground_true_df = pd.DataFrame() -    ground_true_df['times'] = ground_true_times -    ground_true_df['value'] = ground_true +    ground_true_df["times"] = ground_true_times +    ground_true_df["value"] = ground_true      prediction_df = pd.DataFrame() -    prediction_df['times'] = validation_output_times -    prediction_df['value'] = predicted_inverted - -    prediction_df = prediction_df.loc[(prediction_df["times"].dt.year == vis_year )&(prediction_df["times"].dt.month > vis_month ),: ] -    ground_true_df = ground_true_df.loc[(ground_true_df["times"].dt.year == vis_year )&(ground_true_df["times"].dt.month > vis_month ),:] - -    plt.figure(figsize=(20,10)) -    plt.plot(ground_true_df.times,ground_true_df.value, label = 'Actual') -    plt.plot(prediction_df.times,prediction_df.value,'ro', label='Predicted') -    plt.legend(loc='upper left') +    prediction_df["times"] = validation_output_times +    prediction_df["value"] = predicted_inverted + +    prediction_df = prediction_df.loc[ +        (prediction_df["times"].dt.year == vis_year) +        & (prediction_df["times"].dt.month > vis_month), +        :, +    ] +    ground_true_df = ground_true_df.loc[ +        (ground_true_df["times"].dt.year == vis_year) +        & (ground_true_df["times"].dt.month > vis_month), +        :, +    ] + +    plt.figure(figsize=(20, 10)) +    plt.plot(ground_true_df.times, ground_true_df.value, label="Actual") +    plt.plot(prediction_df.times, prediction_df.value, "ro", label="Predicted") +    plt.legend(loc="upper left")      plt.show() +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here -    #cnn_type_1("ETH") -    #lstm_type_cnn_1("ETH", "GRU") +    # here +    # cnn_type_1("ETH") +    # lstm_type_cnn_1("ETH", "GRU")      load_cnn_type_1("ETH", 2018, 4) +  def main():      argparser = Argparser()      if argparser.args.dbg: @@ -311,7 +406,8 @@ def main():              premain(argparser)          except Exception as e:              print(e.__doc__) -            if e.message: print(e.message) +            if e.message: +                print(e.message)              variables = globals().copy()              variables.update(locals())              shell = code.InteractiveConsole(variables) @@ -319,5 +415,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() diff --git a/digester.py b/digester.py index 0c17b1c..9f7bff9 100755 --- a/digester.py +++ b/digester.py @@ -17,37 +17,67 @@ from sklearn.ensemble import RandomForestClassifier  from sklearn.naive_bayes import GaussianNB, MultinomialNB  from sklearn.svm import SVC +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--string", type=str, help="string") -        parser.add_argument("--bool", action="store_true", help="bool", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--bool", action="store_true", help="bool", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here -    dataframe = pd.read_csv('/tmp/features.csv') +    # here +    dataframe = pd.read_csv("/tmp/features.csv")      dataframe.head()      y = dataframe.target -    X = dataframe.drop(['target'], axis=1) +    X = dataframe.drop(["target"], axis=1) -    corpus = X['attr'] +    corpus = X["attr"]      vc = CountVectorizer()      vc.fit(corpus) -    numeric_features = pd.concat([X.drop(['attr'], axis=1), pd.DataFrame(vc.transform(corpus).toarray(), columns=vc.vocabulary_)], axis=1) +    numeric_features = pd.concat( +        [ +            X.drop(["attr"], axis=1), +            pd.DataFrame( +                vc.transform(corpus).toarray(), columns=vc.vocabulary_ +            ), +        ], +        axis=1, +    )      numeric_features.head() -    plt.scatter(dataframe.index, dataframe.target, color='red', label='target') -    plt.scatter(numeric_features.index, numeric_features.depth, color='green', label='depth') -    plt.scatter(numeric_features.index, numeric_features.text_ratio, color='blue', label='text_ratio') -    plt.scatter(numeric_features.index, numeric_features.alink_text_ratio, color='skyblue', label='alink_text_ratio') +    plt.scatter(dataframe.index, dataframe.target, color="red", label="target") +    plt.scatter( +        numeric_features.index, +        numeric_features.depth, +        color="green", +        label="depth", +    ) +    plt.scatter( +        numeric_features.index, +        numeric_features.text_ratio, +        color="blue", +        label="text_ratio", +    ) +    plt.scatter( +        numeric_features.index, +        numeric_features.alink_text_ratio, +        color="skyblue", +        label="alink_text_ratio", +    )      plt.legend(loc=(1, 0))      plt.show()      scaler = preprocessing.StandardScaler() @@ -56,23 +86,43 @@ def premain(argparser):      # clf = MultinomialNB()      # clf = RandomForestClassifier() -    clf = SVC(C=1, kernel='poly', probability=True) +    clf = SVC(C=1, kernel="poly", probability=True)      clf.fit(scaled_X, y)      predicted_index = clf.predict(scaled_X).tolist().index(True)      scaled_X = scaler.transform(numeric_features)      pred_y = clf.predict(scaled_X) -    print pd.DataFrame(clf.predict_log_proba(scaled_X),columns=clf.classes_) -    print 'Number of mispredicted out of %d is %d (%.2f%%)' % (y.shape[0], (y!=pred_y).sum(), (y!=pred_y).sum()*100.0/y.shape[0]) -    print -    print 'Predicted rows:' -    print dataframe[pred_y].drop(['text_ratio', 'alink_text_ratio', 'contain_title'], axis=1).merge(pd.DataFrame(clf.predict_log_proba(scaled_X)[pred_y],columns=clf.classes_, index=dataframe[pred_y].index), left_index=True, right_index=True) -    print +    print(pd.DataFrame(clf.predict_log_proba(scaled_X), columns=clf.classes_)) +    print( +        "Number of mispredicted out of %d is %d (%.2f%%)" +        % ( +            y.shape[0], +            (y != pred_y).sum(), +            (y != pred_y).sum() * 100.0 / y.shape[0], +        ) +    ) +    print() +    print("Predicted rows:") +    print( +        dataframe[pred_y] +        .drop(["text_ratio", "alink_text_ratio", "contain_title"], axis=1) +        .merge( +            pd.DataFrame( +                clf.predict_log_proba(scaled_X)[pred_y], +                columns=clf.classes_, +                index=dataframe[pred_y].index, +            ), +            left_index=True, +            right_index=True, +        ) +    ) +    print()      # print 'Acutual rows:'      # print dataframe[dataframe.target] +  def main():      argparser = Argparser()      if argparser.args.dbg: @@ -80,7 +130,8 @@ def main():              premain(argparser)          except Exception as e:              print(e.__doc__) -            if e.message: print(e.message) +            if e.message: +                print(e.message)              variables = globals().copy()              variables.update(locals())              shell = code.InteractiveConsole(variables) @@ -88,5 +139,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() @@ -1,9 +1,9 @@  #!python  # _*_ coding=utf-8 _*_ -#original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb +# original source:https://github.com/dashee87/blogScripts/blob/master/Jupyter/2017-11-20-predicting-cryptocurrency-prices-with-deep-learning.ipynb -#@#!pip install lxml -#@#!mkdir lstm-models +# @#!pip install lxml +# @#!mkdir lstm-models  import argparse  import code  import readline @@ -26,70 +26,124 @@ from keras import layers  window_len = 10  split_date = "2018-03-01" +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--string", type=str, help="string") -        parser.add_argument("--bool", action="store_true", help="bool", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--bool", action="store_true", help="bool", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  def getData_CMC(crypto, crypto_short): -    market_info = pd.read_html("https://coinmarketcap.com/currencies/"+crypto+"/historical-data/?start=20160428&end="+time.strftime("%Y%m%d"))[0] +    market_info = pd.read_html( +        "https://coinmarketcap.com/currencies/" +        + crypto +        + "/historical-data/?start=20160428&end=" +        + time.strftime("%Y%m%d") +    )[0]      print(type(market_info)) -    market_info =  market_info.assign(Date=pd.to_datetime(market_info['Date'])) -    #print(market_info) -    #if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0 -    #if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0 +    market_info = market_info.assign(Date=pd.to_datetime(market_info["Date"])) +    # print(market_info) +    # if crypto == "ethereum": market_info.loc[market_info["Market Cap"]=="-","Market Cap"]=0 +    # if crypto == "dogecoin": market_info.loc[market_info["Volume"]=="-","Volume"]=0      market_info["Volume"] = market_info["Volume"].astype("int64")      market_info.columns = market_info.columns.str.replace("*", "") -    #print(type(market_info)) -    #print(crypto + " head: ") -    #print(market_info.head()) -    kwargs = {'close_off_high': lambda x: 2*(x['High']- x['Close'])/(x['High']-x['Low'])-1, 'volatility': lambda x: (x['High']- x['Low'])/(x['Open'])} +    # print(type(market_info)) +    # print(crypto + " head: ") +    # print(market_info.head()) +    kwargs = { +        "close_off_high": lambda x: 2 +        * (x["High"] - x["Close"]) +        / (x["High"] - x["Low"]) +        - 1, +        "volatility": lambda x: (x["High"] - x["Low"]) / (x["Open"]), +    }      market_info = market_info.assign(**kwargs) -    model_data = market_info[['Date']+[coin+metric for coin in [""] for metric in ['Close','Volume','close_off_high','volatility']]] -    model_data = model_data.sort_values(by='Date') -    #print(model_data.head()) +    model_data = market_info[ +        ["Date"] +        + [ +            coin + metric +            for coin in [""] +            for metric in ["Close", "Volume", "close_off_high", "volatility"] +        ] +    ] +    model_data = model_data.sort_values(by="Date") +    # print(model_data.head())      print(type(model_data))      return model_data +  def getData_Stock(name, period): -    info = pd.read_csv("./data/"+name+"/"+period+".csv", encoding="utf-16") +    info = pd.read_csv( +        "./data/" + name + "/" + period + ".csv", encoding="utf-16" +    )      return info +  def get_sets(crypto, model_data): -    training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date] -    training_set = training_set.drop('Date', 1) -    test_set = test_set.drop('Date', 1) -    norm_cols = [coin+metric for coin in [] for metric in ['Close', 'Volume']] +    training_set, test_set = ( +        model_data[model_data["Date"] < split_date], +        model_data[model_data["Date"] >= split_date], +    ) +    training_set = training_set.drop("Date", 1) +    test_set = test_set.drop("Date", 1) +    norm_cols = [ +        coin + metric for coin in [] for metric in ["Close", "Volume"] +    ]      LSTM_training_inputs = []      for i in range(len(training_set) - window_len): -        temp_set = training_set[i:(i+window_len)].copy() +        temp_set = training_set[i : (i + window_len)].copy()          for col in norm_cols: -            temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1 +            temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1          LSTM_training_inputs.append(temp_set) -    LSTM_training_outputs = (training_set["Close"][window_len:].values/training_set["Close"][:-window_len].values) - 1 +    LSTM_training_outputs = ( +        training_set["Close"][window_len:].values +        / training_set["Close"][:-window_len].values +    ) - 1      LSTM_test_inputs = [] -    for i in range(len(test_set)-window_len): -        temp_set = test_set[i:(i+window_len)].copy() +    for i in range(len(test_set) - window_len): +        temp_set = test_set[i : (i + window_len)].copy()          for col in norm_cols: -            temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1 +            temp_set.loc[:, col] = temp_set[col] / temp_set[col].iloc[0] - 1          LSTM_test_inputs.append(temp_set) -    LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1 +    LSTM_test_outputs = ( +        test_set["Close"][window_len:].values +        / test_set["Close"][:-window_len].values +    ) - 1      print(LSTM_training_inputs[0]) -    LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs] +    LSTM_training_inputs = [ +        np.array(LSTM_training_input) +        for LSTM_training_input in LSTM_training_inputs +    ]      LSTM_training_inputs = np.array(LSTM_training_inputs) -    LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs] +    LSTM_test_inputs = [ +        np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs +    ]      LSTM_test_inputs = np.array(LSTM_test_inputs)      return LSTM_training_inputs, LSTM_test_inputs, training_set, test_set -def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25, loss="mae", optimizer="adam"): + +def build_model( +    inputs, +    output_size, +    neurons, +    activ_func="linear", +    dropout=0.25, +    loss="mae", +    optimizer="adam", +):      model = Sequential()      model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))      model.add(Dropout(dropout)) @@ -98,19 +152,27 @@ def build_model(inputs, output_size, neurons, activ_func="linear", dropout=0.25,      model.compile(loss=loss, optimizer=optimizer)      return model +  def stock():      split_date = "2017.01.01"      model_data = getData_Stock("irxo", "Daily") -    model_data = model_data.sort_values(by='Date') +    model_data = model_data.sort_values(by="Date") -    training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date] -    training_set = training_set.drop('Date', 1) -    test_set = test_set.drop('Date', 1) +    training_set, test_set = ( +        model_data[model_data["Date"] < split_date], +        model_data[model_data["Date"] >= split_date], +    ) +    training_set = training_set.drop("Date", 1) +    test_set = test_set.drop("Date", 1)      training_inputs = training_set -    training_outputs = training_set.drop(['Open', 'High', 'Low', 'NTx', 'Volume'], axis=1) +    training_outputs = training_set.drop( +        ["Open", "High", "Low", "NTx", "Volume"], axis=1 +    )      test_inputs = test_set -    test_outputs = test_set.drop(["Open", "High", "Low", "NTx", "Volume"], axis=1) +    test_outputs = test_set.drop( +        ["Open", "High", "Low", "NTx", "Volume"], axis=1 +    )      print(training_set.head)      print(test_set.head) @@ -120,77 +182,184 @@ def stock():      print(test_outputs.shape)      model = models.Sequential() -    model.add(layers.Dense(64, activation="relu", input_shape=(training_inputs.shape[1],))) +    model.add( +        layers.Dense( +            64, activation="relu", input_shape=(training_inputs.shape[1],) +        ) +    )      model.add(layers.Dense(64, activation="relu"))      model.add(layers.Dense(1))      model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) -    history = model.fit(training_inputs, training_outputs, validation_data=(test_inputs, test_outputs), epochs=10, batch_size=1, verbose=2) +    history = model.fit( +        training_inputs, +        training_outputs, +        validation_data=(test_inputs, test_outputs), +        epochs=10, +        batch_size=1, +        verbose=2, +    ) +  def lstm_type_1(crypto, crypto_short):      model_data = getData_CMC(crypto, crypto_short)      np.random.seed(202) -    training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) +    training_inputs, test_inputs, training_set, test_set = get_sets( +        crypto, model_data +    )      model = build_model(training_inputs, output_size=1, neurons=20, loss="mse") -    training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 -    history = model.fit(training_inputs, training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True) +    training_outputs = ( +        training_set["Close"][window_len:].values +        / training_set["Close"][:-window_len].values +    ) - 1 +    history = model.fit( +        training_inputs, +        training_outputs, +        epochs=50, +        batch_size=1, +        verbose=2, +        shuffle=True, +    ) +  def lstm_type_4(crypto, crypto_short, crypto2, crypto_short2):      model_data = getData_CMC(crypto, crypto_short)      model_data2 = getData_CMC(crypto2, crypto_short2)      np.random.seed(202) -    training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) -    training_inputs2, test_inputs2, training_set2, test_set2 = get_sets(crypto2, model_data2) +    training_inputs, test_inputs, training_set, test_set = get_sets( +        crypto, model_data +    ) +    training_inputs2, test_inputs2, training_set2, test_set2 = get_sets( +        crypto2, model_data2 +    )      return -    model = build_model(training_inputs/training_inputs2, output_size=1, neurons=20, loss="mse") -    training_outputs = ((training_set['Close'][window_len:].values)/(training_set['Close'][:-window_len].values))-1 -    history = model.fit(training_inputs/training_inputs2, training_outputs, epochs=10, batch_size=1, verbose=2, shuffle=True) +    model = build_model( +        training_inputs / training_inputs2, +        output_size=1, +        neurons=20, +        loss="mse", +    ) +    training_outputs = ( +        (training_set["Close"][window_len:].values) +        / (training_set["Close"][:-window_len].values) +    ) - 1 +    history = model.fit( +        training_inputs / training_inputs2, +        training_outputs, +        epochs=10, +        batch_size=1, +        verbose=2, +        shuffle=True, +    ) +  def lstm_type_2(crypto, crypto_short, pred_range, neuron_count):      model_data = getData_CMC(crypto, crypto_short)      np.random.seed(202) -    training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) -    model = build_model(training_inputs, output_size=pred_range, neurons=neuron_count, loss="mse") -    training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 +    training_inputs, test_inputs, training_set, test_set = get_sets( +        crypto, model_data +    ) +    model = build_model( +        training_inputs, +        output_size=pred_range, +        neurons=neuron_count, +        loss="mse", +    ) +    training_outputs = ( +        training_set["Close"][window_len:].values +        / training_set["Close"][:-window_len].values +    ) - 1      training_outputs = [] -    for i in range(window_len, len(training_set['Close'])-pred_range): -        training_outputs.append((training_set['Close'][i:i+pred_range].values/training_set['Close'].values[i-window_len])-1) +    for i in range(window_len, len(training_set["Close"]) - pred_range): +        training_outputs.append( +            ( +                training_set["Close"][i : i + pred_range].values +                / training_set["Close"].values[i - window_len] +            ) +            - 1 +        )      training_outputs = np.array(training_outputs) -    history = model.fit(training_inputs[:-pred_range], training_outputs, epochs=50, batch_size=1, verbose=2, shuffle=True) +    history = model.fit( +        training_inputs[:-pred_range], +        training_outputs, +        epochs=50, +        batch_size=1, +        verbose=2, +        shuffle=True, +    ) +  def lstm_type_3(crypto, crypto_short, pred_range, neuron_count):      model_data = getData_CMC(crypto, crypto_short)      np.random.seed(202) -    training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) +    training_inputs, test_inputs, training_set, test_set = get_sets( +        crypto, model_data +    )      model = build_model(training_inputs, output_size=1, neurons=neuron_count) -    training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 +    training_outputs = ( +        training_set["Close"][window_len:].values +        / training_set["Close"][:-window_len].values +    ) - 1      training_outputs = []      for rand_seed in range(775, 800):          print(rand_seed)          np.random.seed(rand_seed) -        temp_model = build_model(training_inputs, output_size=1, neurons=neuron_count) -        temp_model.fit(training_inputs, (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1, epochs=50, batch_size=1, verbose=0, shuffle=True) -        temp_model.save("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed) +        temp_model = build_model( +            training_inputs, output_size=1, neurons=neuron_count +        ) +        temp_model.fit( +            training_inputs, +            ( +                training_set["Close"][window_len:].values +                / training_set["Close"][:-window_len].values +            ) +            - 1, +            epochs=50, +            batch_size=1, +            verbose=0, +            shuffle=True, +        ) +        temp_model.save( +            "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed +        ) +  def load_models(crypto, crypto_short):      preds = []      model_data = getData_CMC(crypto, crypto_short)      np.random.seed(202) -    training_inputs, test_inputs, training_set, test_set = get_sets(crypto, model_data) -    for rand_seed in range(775,800): -        temp_model = load_model("./lstm-models/" + crypto + '_model_randseed_%d.h5'%rand_seed) -        preds.append(np.mean(abs(np.transpose(temp_model.predict(test_inputs))-(test_set['Close'].values[window_len:]/test_set['Close'].values[:-window_len]-1)))) +    training_inputs, test_inputs, training_set, test_set = get_sets( +        crypto, model_data +    ) +    for rand_seed in range(775, 800): +        temp_model = load_model( +            "./lstm-models/" + crypto + "_model_randseed_%d.h5" % rand_seed +        ) +        preds.append( +            np.mean( +                abs( +                    np.transpose(temp_model.predict(test_inputs)) +                    - ( +                        test_set["Close"].values[window_len:] +                        / test_set["Close"].values[:-window_len] +                        - 1 +                    ) +                ) +            ) +        ) +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here -    #lstm_type_1("ethereum", "ether") -    #lstm_type_2("ethereum", "ether", 5, 20) -    #lstm_type_3("ethereum", "ether", 5, 20) -    #lstm_type_4("ethereum", "ether", "dogecoin", "doge") -    #load_models("ethereum", "eth") +    # here +    # lstm_type_1("ethereum", "ether") +    # lstm_type_2("ethereum", "ether", 5, 20) +    # lstm_type_3("ethereum", "ether", 5, 20) +    # lstm_type_4("ethereum", "ether", "dogecoin", "doge") +    # load_models("ethereum", "eth")      stock() +  def main():      argparser = Argparser()      if argparser.args.dbg: @@ -198,7 +367,8 @@ def main():              premain(argparser)          except Exception as e:              print(e.__doc__) -            if e.message: print(e.message) +            if e.message: +                print(e.message)              variables = globals().copy()              variables.update(locals())              shell = code.InteractiveConsole(variables) @@ -206,5 +376,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() diff --git a/marionette.py b/marionette.py index 86c2175..9b8a6e7 100755 --- a/marionette.py +++ b/marionette.py @@ -20,37 +20,57 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis  from sklearn.naive_bayes import GaussianNB  from sklearn.svm import SVC +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--string", type=str, help="string") -        parser.add_argument("--bool", action="store_true", help="bool", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--bool", action="store_true", help="bool", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  def marrionette_type_1():      url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" -    names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"] +    names = [ +        "sepal-length", +        "sepal-width", +        "petal-length", +        "petal-width", +        "class", +    ]      dataset = pandas.read_csv(url, names=names)      print(dataset.shape)      print(dataset.head(20))      print(dataset.describe())      print(dataset.groupby("class").size()) -    #dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) -    #dataset.hist() +    # dataset.plot(kind="box", subplots=True, layout=(2,2), sharex=False, sharey=False) +    # dataset.hist()      pandas.plotting.scatter_matrix(dataset)      plt.show()      array = dataset.values -    X = array[:,0:4] -    Y = array[:,4] +    X = array[:, 0:4] +    Y = array[:, 4]      validation_size = 0.20      seed = 7 -    X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size, random_state=seed) -    scoring="accuracy" +    ( +        X_train, +        X_validation, +        Y_train, +        Y_validation, +    ) = model_selection.train_test_split( +        X, Y, test_size=validation_size, random_state=seed +    ) +    scoring = "accuracy"      models = []      models.append(("LR", LogisticRegression()))      models.append(("LDA", LinearDiscriminantAnalysis())) @@ -62,7 +82,9 @@ def marrionette_type_1():      names = []      for name, model in models:          kfold = model_selection.KFold(n_splits=10, random_state=seed) -        cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) +        cv_results = model_selection.cross_val_score( +            model, X_train, Y_train, cv=kfold, scoring=scoring +        )          results.append(cv_results)          names.append(name)          msg = "%s:%f(%f)" % (name, cv_results.mean(), cv_results.std()) @@ -82,12 +104,14 @@ def marrionette_type_1():      print(confusion_matrix(Y_validation, predictions))      print(classification_report(Y_validation, predictions)) +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here +    # here      marrionette_type_1() +  def main():      argparser = Argparser()      if argparser.args.dbg: @@ -101,5 +125,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() @@ -19,61 +19,106 @@ import googleapiclient.http  import oauth2client.client  import io +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--which", type=str, help="which one to run") -        parser.add_argument("--download", type=str, help="file name to download") +        parser.add_argument( +            "--download", type=str, help="file name to download" +        )          parser.add_argument("--what", type=str, help="train or predict") -        parser.add_argument("--pysrcupdate", type=str, nargs="+", help="name of source files to update on the drive") -        parser.add_argument("--gpu", action="store_true", help="use gpu. if false will use cpu", default=False) -        parser.add_argument("--test1", action="store_true", help="test switch 1", default=False) -        parser.add_argument("--test2", action="store_true", help="test switch 2", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--pysrcupdate", +            type=str, +            nargs="+", +            help="name of source files to update on the drive", +        ) +        parser.add_argument( +            "--gpu", +            action="store_true", +            help="use gpu. if false will use cpu", +            default=False, +        ) +        parser.add_argument( +            "--test1", action="store_true", help="test switch 1", default=False +        ) +        parser.add_argument( +            "--test2", action="store_true", help="test switch 2", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  def get_name_from_path(path):      path_pos = path.rfind("/")      if path_pos == -1:          return path      else: -        return path[path_pos+1:] +        return path[path_pos + 1 :] +  def authenticate_drive():      OAUTH2_SCOPE = "https://www.googleapis.com/auth/drive"      CLIENT_SECRETS = "./secret.json" -    flow = oauth2client.client.flow_from_clientsecrets(CLIENT_SECRETS, OAUTH2_SCOPE) +    flow = oauth2client.client.flow_from_clientsecrets( +        CLIENT_SECRETS, OAUTH2_SCOPE +    )      flow.redirect_uri = oauth2client.client.OOB_CALLBACK_URN      authorize_url = flow.step1_get_authorize_url() -    print('Go to the following link in your browser: ' + authorize_url) -    code = input('Enter verification code: ').strip() +    print("Go to the following link in your browser: " + authorize_url) +    code = input("Enter verification code: ").strip()      credentials = flow.step2_exchange(code)      http = httplib2.Http()      credentials.authorize(http) -    drive_service = build('drive', 'v3', http=http) +    drive_service = build("drive", "v3", http=http)      return drive_service +  def get_folder_id(folder_name, drive_service): -    parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+folder_name+"'", fields="files(id, name)", spaces="drive").execute() +    parent_dir = ( +        drive_service.files() +        .list( +            q="mimeType='application/vnd.google-apps.folder' and name='" +            + folder_name +            + "'", +            fields="files(id, name)", +            spaces="drive", +        ) +        .execute() +    )      folder_id = str()      for file in parent_dir.get("files", []):          print(file.get("name") + "---" + file.get("id"))          folder_id = file.get("id")      return folder_id +  def get_file_id(file_name, folder_name, drive_service):      folder_id = get_folder_id(folder_name, drive_service) -    download_to_be = drive_service.files().list(q="name='"+file_name+"' and '"+folder_id+"' in parents", fields="files(id, name)", spaces="drive").execute() +    download_to_be = ( +        drive_service.files() +        .list( +            q="name='" + file_name + "' and '" + folder_id + "' in parents", +            fields="files(id, name)", +            spaces="drive", +        ) +        .execute() +    )      file_id = str()      for file in download_to_be.get("files", []):          print(file.get("name") + "---" + file.get("id"))          file_id = file.get("id")      return file_id +  def g_drive_up(file_path, file_name, file_type, to_folder):      FILENAME = file_path      MIMETYPE = file_type @@ -81,64 +126,113 @@ def g_drive_up(file_path, file_name, file_type, to_folder):      DESCRIPTION = "a file"      drive_service = authenticate_drive() -    media_body = googleapiclient.http.MediaFileUpload(FILENAME, mimetype=MIMETYPE, resumable=True) -    parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+to_folder+"'", fields="files(id, name)", spaces="drive").execute() +    media_body = googleapiclient.http.MediaFileUpload( +        FILENAME, mimetype=MIMETYPE, resumable=True +    ) +    parent_dir = ( +        drive_service.files() +        .list( +            q="mimeType='application/vnd.google-apps.folder' and name='" +            + to_folder +            + "'", +            fields="files(id, name)", +            spaces="drive", +        ) +        .execute() +    )      folder_id = str()      for file in parent_dir.get("files", []):          print(file.get("name") + "---" + file.get("id"))          folder_id = file.get("id") -    body = {'name': TITLE, 'description': DESCRIPTION, 'parents': [folder_id]} -    new_file = drive_service.files().create(body=body, media_body=media_body, fields="id").execute() +    body = {"name": TITLE, "description": DESCRIPTION, "parents": [folder_id]} +    new_file = ( +        drive_service.files() +        .create(body=body, media_body=media_body, fields="id") +        .execute() +    )      print(new_file.get("id")) -    #pprint.pprint(new_file) +    # pprint.pprint(new_file) +  def g_drive_down(folder_name, file_name):      drive_service = authenticate_drive() -    #get folder id -    parent_dir = drive_service.files().list(q="mimeType='application/vnd.google-apps.folder' and name='"+folder_name+"'", fields="files(id, name)", spaces="drive").execute() +    # get folder id +    parent_dir = ( +        drive_service.files() +        .list( +            q="mimeType='application/vnd.google-apps.folder' and name='" +            + folder_name +            + "'", +            fields="files(id, name)", +            spaces="drive", +        ) +        .execute() +    )      folder_id = str()      for file in parent_dir.get("files", []):          print(file.get("name") + "---" + file.get("id"))          folder_id = file.get("id") -    #get file id -    download_to_be = drive_service.files().list(q="name='"+file_name+"' and '"+folder_id+"' in parents", fields="files(id, name)", spaces="drive").execute() +    # get file id +    download_to_be = ( +        drive_service.files() +        .list( +            q="name='" + file_name + "' and '" + folder_id + "' in parents", +            fields="files(id, name)", +            spaces="drive", +        ) +        .execute() +    )      file_id = str()      for file in download_to_be.get("files", []):          print(file.get("name") + "---" + file.get("id"))          file_id = file.get("id")      request = drive_service.files().get_media(fileId=file_id) -    #fh = io.BytesIO() +    # fh = io.BytesIO()      fh = io.FileIO(file_name, "w")      downloader = MediaIoBaseDownload(fh, request)      done = False      while done is False:          status, done = downloader.next_chunk()          print("Download %d%%." % int(status.progress() * 100)) -    #print(downloader) +    # print(downloader) +  def g_drive_update(folder_name, file_name):      drive_service = authenticate_drive()      file_id = get_file_id(file_name, folder_name, drive_service)      u_file = drive_service.files().get(fileId=file_id).execute()      media_body = MediaFileUpload(resumable=True) -    updated_file = drive_service.files().update(fileId=file_id, body=u_file, media_body=media_body).execute() +    updated_file = ( +        drive_service.files() +        .update(fileId=file_id, body=u_file, media_body=media_body) +        .execute() +    ) +  def launch_ais(which): -    if which == "marionette": marrionette_type_1() -    elif which == "lstm_type_1": lstm_type_1("ethereum", "ether") -    elif which == "lstm_type_2": lstm_type_2("ethereum", "ether", 5, 20) -    elif which == "lstm_type_3": lstm_type_3("ethereum", "ether", 5, 20) -    elif which == "cnn_type_1": cnn_type_1() -    elif which == "tfann_type_1": tfann_type_1() -    else: pass +    if which == "marionette": +        marrionette_type_1() +    elif which == "lstm_type_1": +        lstm_type_1("ethereum", "ether") +    elif which == "lstm_type_2": +        lstm_type_2("ethereum", "ether", 5, 20) +    elif which == "lstm_type_3": +        lstm_type_3("ethereum", "ether", 5, 20) +    elif which == "cnn_type_1": +        cnn_type_1() +    elif which == "tfann_type_1": +        tfann_type_1() +    else: +        pass +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here +    # here      if argparser.args.pysrcupdate:          for src in argparser.args.pysrcupdate:              g_drive_up(src, get_name_from_path(src), "text/python", "colab") @@ -146,6 +240,7 @@ def premain(argparser):          g_drive_down("colab", "main.py")      launch_ais(argparser.args.which) +  def main():      argparser = Argparser()      if argparser.args.dbg: @@ -153,7 +248,8 @@ def main():              premain(argparser)          except Exception as e:              print(e.__doc__) -            if e.message: print(e.message) +            if e.message: +                print(e.message)              variables = globals().copy()              variables.update(locals())              shell = code.InteractiveConsole(variables) @@ -161,5 +257,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() @@ -18,30 +18,41 @@ from keras.layers import LSTM  from keras.layers import Dropout  from keras.models import load_model +  def SigHandler_SIGINT(signum, frame):      print()      sys.exit(0) +  class Argparser(object):      def __init__(self):          parser = argparse.ArgumentParser()          parser.add_argument("--string", type=str, help="string") -        parser.add_argument("--bool", action="store_true", help="bool", default=False) -        parser.add_argument("--dbg", action="store_true", help="debug", default=False) +        parser.add_argument( +            "--bool", action="store_true", help="bool", default=False +        ) +        parser.add_argument( +            "--dbg", action="store_true", help="debug", default=False +        )          self.args = parser.parse_args() +  def build_model(train_data):      model = models.Sequential() -    model.add(layers.Dense(64, activation="relu", input_shape=(train_data.shape[1],))) +    model.add( +        layers.Dense(64, activation="relu", input_shape=(train_data.shape[1],)) +    )      model.add(layers.Dense(64, activation="relu"))      model.add(layers.Dense(1))      model.compile(optimizer="rmsprop", loss="mse", metrics=["acc"])      return model +  # write code here  def premain(argparser):      signal.signal(signal.SIGINT, SigHandler_SIGINT) -    #here +    # here +  def main():      argparser = Argparser() @@ -50,7 +61,8 @@ def main():              premain(argparser)          except Exception as e:              print(e.__doc__) -            if e.message: print(e.message) +            if e.message: +                print(e.message)              variables = globals().copy()              variables.update(locals())              shell = code.InteractiveConsole(variables) @@ -58,5 +70,6 @@ def main():      else:          premain(argparser) +  if __name__ == "__main__":      main() @@ -2,7 +2,7 @@  # _*_ coding=utf-8 _*_  # original source-https://nicholastsmith.wordpress.com/2017/11/13/cryptocurrency-price-prediction-using-deep-learning-in-tensorflow/ -#@#!pip install TFANN +# @#!pip install TFANN  import code  import readline  import signal @@ -14,120 +14,143 @@ import pandas as pd  import urllib.request  import matplotlib.pyplot as mpl -def GetAPIUrl(cur, sts = 1420070400): -    return 'https://poloniex.com/public?command=returnChartData¤cyPair=USDT_{:s}&start={:d}&end=9999999999&period=7200'.format(cur, sts) + +def GetAPIUrl(cur, sts=1420070400): +    return "https://poloniex.com/public?command=returnChartData¤cyPair=USDT_{:s}&start={:d}&end=9999999999&period=7200".format( +        cur, sts +    ) +  def GetCurDF(cur, fp):      openUrl = urllib.request.urlopen(GetAPIUrl(cur))      r = openUrl.read()      openUrl.close()      df = pd.read_json(r.decode()) -    df['date'] = df['date'].astype(np.int64) // 1000000000 +    df["date"] = df["date"].astype(np.int64) // 1000000000      print(df.head())      return df +  class PastSampler:      def __init__(self, N, K):          self.K = K          self.N = N -    def transform(self, A, Y = None): -        M = self.N + self.K     #Number of samples per row (sample + target) -        #Matrix of sample indices like: {{1, 2..., M}, {2, 3, ..., M + 1}} +    def transform(self, A, Y=None): +        M = self.N + self.K  # Number of samples per row (sample + target) +        # Matrix of sample indices like: {{1, 2..., M}, {2, 3, ..., M + 1}}          I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1)          B = A[I].reshape(-1, M * A.shape[1], *A.shape[2:]) -        ci = self.N * A.shape[1]    #Number of features per sample -        return B[:, :ci], B[:, ci:] #Sample matrix, Target matrix +        ci = self.N * A.shape[1]  # Number of features per sample +        return B[:, :ci], B[:, ci:]  # Sample matrix, Target matrix +  def tfann_type_1():      #%%Path to store cached currency data -    datPath = 'CurDat/' +    datPath = "CurDat/"      if not os.path.exists(datPath):          os.mkdir(datPath) -    #Different cryptocurrency types -    cl = ['BTC', 'LTC', 'ETH', 'XMR'] -    #Columns of price data to use -    CN = ['close', 'high', 'low', 'open', 'volume'] -    #Store data frames for each of above types +    # Different cryptocurrency types +    cl = ["BTC", "LTC", "ETH", "XMR"] +    # Columns of price data to use +    CN = ["close", "high", "low", "open", "volume"] +    # Store data frames for each of above types      D = []      for ci in cl: -        dfp = os.path.join(datPath, ci + '.csv') +        dfp = os.path.join(datPath, ci + ".csv")          try: -            df = pd.read_csv(dfp, sep = ',') +            df = pd.read_csv(dfp, sep=",")          except FileNotFoundError:              df = GetCurDF(ci, dfp)          D.append(df)      #%%Only keep range of data that is common to all currency types      cr = min(Di.shape[0] for Di in D)      for i in range(len(cl)): -        D[i] = D[i][(D[i].shape[0] - cr):] +        D[i] = D[i][(D[i].shape[0] - cr) :]      #%%Features are channels      C = np.hstack((Di[CN] for Di in D))[:, None, :] -    HP = 16                 #Holdout period +    HP = 16  # Holdout period      A = C[0:-HP] -    SV = A.mean(axis = 0)   #Scale vector -    C /= SV                 #Basic scaling of data +    SV = A.mean(axis=0)  # Scale vector +    C /= SV  # Basic scaling of data      #%%Make samples of temporal sequences of pricing data (channel) -    NPS, NFS = 256, 16         #Number of past and future samples +    NPS, NFS = 256, 16  # Number of past and future samples      ps = PastSampler(NPS, NFS)      B, Y = ps.transform(A)      #%%Architecture of the neural network      NC = B.shape[2] -    #2 1-D conv layers with relu followed by 1-d conv output layer -    ns = [('C1d', [8, NC, NC * 2], 4), ('AF', 'relu'), -          ('C1d', [8, NC * 2, NC * 2], 2), ('AF', 'relu'), -          ('C1d', [8, NC * 2, NC], 2)] -    #Create the neural network in TensorFlow -    cnnr = ANNR(B[0].shape, ns, batchSize = 32, learnRate = 2e-5, -                maxIter = 64, reg = 1e-5, tol = 1e-2, verbose = True) +    # 2 1-D conv layers with relu followed by 1-d conv output layer +    ns = [ +        ("C1d", [8, NC, NC * 2], 4), +        ("AF", "relu"), +        ("C1d", [8, NC * 2, NC * 2], 2), +        ("AF", "relu"), +        ("C1d", [8, NC * 2, NC], 2), +    ] +    # Create the neural network in TensorFlow +    cnnr = ANNR( +        B[0].shape, +        ns, +        batchSize=32, +        learnRate=2e-5, +        maxIter=64, +        reg=1e-5, +        tol=1e-2, +        verbose=True, +    )      cnnr.fit(B, Y) -    PTS = []                        #Predicted time sequences -    P, YH = B[[-1]], Y[[-1]]        #Most recent time sequence -    for i in range(HP // NFS):  #Repeat prediction -        P = np.concatenate([P[:, NFS:], YH], axis = 1) +    PTS = []  # Predicted time sequences +    P, YH = B[[-1]], Y[[-1]]  # Most recent time sequence +    for i in range(HP // NFS):  # Repeat prediction +        P = np.concatenate([P[:, NFS:], YH], axis=1)          YH = cnnr.predict(P)          PTS.append(YH)      PTS = np.hstack(PTS).transpose((1, 0, 2)) -    A = np.vstack([A, PTS]) #Combine predictions with original data -    A = np.squeeze(A) * SV  #Remove unittime dimension and rescale +    A = np.vstack([A, PTS])  # Combine predictions with original data +    A = np.squeeze(A) * SV  # Remove unittime dimension and rescale      C = np.squeeze(C) * SV      nt = 4      PF = cnnr.PredictFull(B[:nt])      for i in range(nt): -        fig, ax = mpl.subplots(1, 4, figsize = (16 / 1.24, 10 / 1.25)) +        fig, ax = mpl.subplots(1, 4, figsize=(16 / 1.24, 10 / 1.25))          ax[0].plot(PF[0][i]) -        ax[0].set_title('Input') +        ax[0].set_title("Input")          ax[1].plot(PF[2][i]) -        ax[1].set_title('Layer 1') +        ax[1].set_title("Layer 1")          ax[2].plot(PF[4][i]) -        ax[2].set_title('Layer 2') +        ax[2].set_title("Layer 2")          ax[3].plot(PF[5][i]) -        ax[3].set_title('Output') -        fig.text(0.5, 0.06, 'Time', ha='center') -        fig.text(0.06, 0.5, 'Activation', va='center', rotation='vertical') +        ax[3].set_title("Output") +        fig.text(0.5, 0.06, "Time", ha="center") +        fig.text(0.06, 0.5, "Activation", va="center", rotation="vertical")          mpl.show()      CI = list(range(C.shape[0]))      AI = list(range(C.shape[0] + PTS.shape[0] - HP)) -    NDP = PTS.shape[0] #Number of days predicted +    NDP = PTS.shape[0]  # Number of days predicted      for i, cli in enumerate(cl): -        fig, ax = mpl.subplots(figsize = (16 / 1.5, 10 / 1.5)) -        hind = i * len(CN) + CN.index('high') -        ax.plot(CI[-4 * HP:], C[-4 * HP:, hind], label = 'Actual') -        ax.plot(AI[-(NDP + 1):], A[-(NDP + 1):, hind], '--', label = 'Prediction') -        ax.legend(loc = 'upper left') -        ax.set_title(cli + ' (High)') -        ax.set_ylabel('USD') -        ax.set_xlabel('Time') +        fig, ax = mpl.subplots(figsize=(16 / 1.5, 10 / 1.5)) +        hind = i * len(CN) + CN.index("high") +        ax.plot(CI[-4 * HP :], C[-4 * HP :, hind], label="Actual") +        ax.plot( +            AI[-(NDP + 1) :], A[-(NDP + 1) :, hind], "--", label="Prediction" +        ) +        ax.legend(loc="upper left") +        ax.set_title(cli + " (High)") +        ax.set_ylabel("USD") +        ax.set_xlabel("Time")          ax.axes.xaxis.set_ticklabels([])          mpl.show() +  # write code here  def premain(): -    #here +    # here      tfann_type_1() +  def main():      premain() +  if __name__ == "__main__":      main() | 
