Created
August 1, 2020 19:18
-
-
Save abhinavagarwalla/ed8e6b6e83a21ed06e6fe326fb13a4f3 to your computer and use it in GitHub Desktop.
Convert ImageNet-Small (64x64) into the same format as ImageNet (as an ImageFolder)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from torchvision.datasets import CIFAR10 | |
| from torchvision import transforms | |
| import os | |
| import shutil | |
| import pickle | |
| from PIL import Image | |
| from skimage.io import imsave | |
| import numpy as np | |
| from tqdm import tqdm | |
| def convert_to_imagenet_folders(root, dest_dir, train=False): | |
| base_folder = 'imagenet_64x64' | |
| url = "" | |
| filename = "" | |
| tgz_md5 = '' | |
| train_list = [ | |
| ["train_data_batch_1", "7d78180ed6d675199904d73e97363aa3"], | |
| ["train_data_batch_2", "62979cbd524679ea440f2eb998cf70ed"], | |
| ["train_data_batch_3", "022d13e31ebf76e3a3b4995f59d5898b"], | |
| ["train_data_batch_4", "b595889ec25e147ac6a807e55fa0a5a0"], | |
| ["train_data_batch_5", "d0ca147cd803ad8ad2ea89a82838ff61"], | |
| ["train_data_batch_6", "559bdb1c5dc35865d698de7e96067db5"], | |
| ["train_data_batch_7", "9799a53adf03a4f99599822ac8ee65ad"], | |
| ["train_data_batch_8", "97c910a5c1807fb7ff08f5722c1a6cb8"], | |
| ["train_data_batch_9", "fc1ef2b2a8667a72a046e165413ef006"], | |
| ["train_data_batch_10", "b31fa3cd87885f6161c6e12cb6fd6030"], | |
| ] | |
| test_list = [ | |
| ['val_data', '68a29f115231937c359924d8af1b0922'], | |
| ] | |
| meta = open('map_clsloc.txt').readlines() | |
| classes = {int(m.strip().split(' ')[1]): m.strip().split(' ')[0] for m in meta} | |
| if not os.path.exists(dest_dir): | |
| os.mkdir(dest_dir) | |
| if train: | |
| split_dir = 'train' | |
| else: | |
| split_dir = 'val' | |
| if not os.path.exists(dest_dir + split_dir): | |
| os.mkdir(os.path.join(dest_dir, split_dir)) | |
| for c in classes.values(): | |
| os.mkdir(dest_dir + split_dir + os.sep + c) | |
| if train: | |
| downloaded_list = train_list | |
| else: | |
| downloaded_list = test_list | |
| # now load the picked numpy arrays | |
| for file_name, checksum in downloaded_list: | |
| file_path = os.path.join(root, base_folder, file_name) | |
| with open(file_path, 'rb') as f: | |
| entry = pickle.load(f) | |
| data = entry['data'] | |
| if 'labels' in entry: | |
| targets = entry['labels'] | |
| data = np.vstack(data).reshape(-1, 3, 64, 64) | |
| data = data.transpose((0, 2, 3, 1)) # convert to HWC | |
| for i, d in enumerate(tqdm(data)): | |
| imsave(os.path.join(dest_dir, split_dir, classes[targets[i]], str(i) + ".png"), d) | |
| if __name__ == "__main__": | |
| convert_to_imagenet_folders(root='../../datasets/', | |
| dest_dir='../../datasets/imagenet_folder_64/', | |
| train=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment