import glob from sklearn.preprocessing import MinMaxScaler training_files = glob.glob(os.path.join(train_dir, '*')) validation_files = glob.glob(os.path.join(validation_dir, '*')) min_max_scaler_train = MinMaxScaler() # constants declaration. Notice the constants are same for meaningful comparison. BATCH_SIZE = 32 NO_OF_EPOCHS = 3 train_data_descriptor = np.empty((1,3)) for train_file_name in training_files: file_np = np.load(train_file_name) rows = file_np.shape[0] labels = np.repeat(0, rows) locations = np.repeat(train_file_name, rows) offsets = np.arange(0, rows, 1, dtype=int) # we have created labels, locations and offsets. # We now combine all these to form a single data descriptor. single_file_data = np.vstack((locations, labels, offsets)).T # Now this single file descriptor is added to our list of descriptor. train_data_descriptor = np.vstack((train_data_descriptor, single_file_data)) # we will fit the min max scaler to training data here. # Notice we will use partial fit for large data. min_max_scaler_train.partial_fit(file_np) # Leave the file descriptor out. It is empty. train_data_descriptor = train_data_descriptor[1:, :].tolist() # Repeat the same process for validation. validation_data_descriptor = np.empty((1,3)) for validation_file_name in validation_files: file_np = np.load(validation_file_name) rows = file_np.shape[0] labels = np.repeat(0, rows) locations = np.repeat(validation_file_name, rows) offsets = np.arange(0, rows, 1, dtype=int) single_file_data = np.vstack((locations, labels, offsets)).T validation_data_descriptor = np.vstack((validation_data_descriptor, single_file_data)) validation_data_descriptor = validation_data_descriptor[1:, :].tolist()