Skip to content

Instantly share code, notes, and snippets.

@abhinavagarwalla
Created August 1, 2020 19:18
Show Gist options
  • Select an option

  • Save abhinavagarwalla/ed8e6b6e83a21ed06e6fe326fb13a4f3 to your computer and use it in GitHub Desktop.

Select an option

Save abhinavagarwalla/ed8e6b6e83a21ed06e6fe326fb13a4f3 to your computer and use it in GitHub Desktop.
Convert ImageNet-Small (64x64) into the same format as ImageNet (as an ImageFolder)
import torch
from torchvision.datasets import CIFAR10
from torchvision import transforms
import os
import shutil
import pickle
from PIL import Image
from skimage.io import imsave
import numpy as np
from tqdm import tqdm
def convert_to_imagenet_folders(root, dest_dir, train=False):
base_folder = 'imagenet_64x64'
url = ""
filename = ""
tgz_md5 = ''
train_list = [
["train_data_batch_1", "7d78180ed6d675199904d73e97363aa3"],
["train_data_batch_2", "62979cbd524679ea440f2eb998cf70ed"],
["train_data_batch_3", "022d13e31ebf76e3a3b4995f59d5898b"],
["train_data_batch_4", "b595889ec25e147ac6a807e55fa0a5a0"],
["train_data_batch_5", "d0ca147cd803ad8ad2ea89a82838ff61"],
["train_data_batch_6", "559bdb1c5dc35865d698de7e96067db5"],
["train_data_batch_7", "9799a53adf03a4f99599822ac8ee65ad"],
["train_data_batch_8", "97c910a5c1807fb7ff08f5722c1a6cb8"],
["train_data_batch_9", "fc1ef2b2a8667a72a046e165413ef006"],
["train_data_batch_10", "b31fa3cd87885f6161c6e12cb6fd6030"],
]
test_list = [
['val_data', '68a29f115231937c359924d8af1b0922'],
]
meta = open('map_clsloc.txt').readlines()
classes = {int(m.strip().split(' ')[1]): m.strip().split(' ')[0] for m in meta}
if not os.path.exists(dest_dir):
os.mkdir(dest_dir)
if train:
split_dir = 'train'
else:
split_dir = 'val'
if not os.path.exists(dest_dir + split_dir):
os.mkdir(os.path.join(dest_dir, split_dir))
for c in classes.values():
os.mkdir(dest_dir + split_dir + os.sep + c)
if train:
downloaded_list = train_list
else:
downloaded_list = test_list
# now load the picked numpy arrays
for file_name, checksum in downloaded_list:
file_path = os.path.join(root, base_folder, file_name)
with open(file_path, 'rb') as f:
entry = pickle.load(f)
data = entry['data']
if 'labels' in entry:
targets = entry['labels']
data = np.vstack(data).reshape(-1, 3, 64, 64)
data = data.transpose((0, 2, 3, 1)) # convert to HWC
for i, d in enumerate(tqdm(data)):
imsave(os.path.join(dest_dir, split_dir, classes[targets[i]], str(i) + ".png"), d)
if __name__ == "__main__":
convert_to_imagenet_folders(root='../../datasets/',
dest_dir='../../datasets/imagenet_folder_64/',
train=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment