max_len = 20 vocab = build_vocab_from_iterator( list(map(str,range(10))) # Arabic symbols + ['I','V','X','L','C','D','M'], # Roman symbols specials=['', '', ''] # Special symbols ) vocab_size = len(vocab) pad_idx = vocab[''] collate_fn = partial(collate, pad_idx=pad_idx, max_len=max_len) proc = Processor(vocab) train_ds = NumberDataset.from_file('train', processor=proc) valid_ds = NumberDataset.from_file('valid', processor=proc) train_dl = DataLoader(train_ds, batch_size=10, collate_fn=collate_fn) valid_dl = DataLoader(valid_ds, batch_size=10, collate_fn=collate_fn)