# Setup conda conda create -n mosaic python=3.10 -y conda activate mosaic # Load cuda module load cuda/11.7.0 # Install torch pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 # Setup mosaic git clone https://github.com/mosaicml/examples.git cd examples/benchmarks/bert pip install -r requirements.txt # Install Apex # Got this from the mosaic/composer repo Dockerfile # https://github.com/mosaicml/composer/blob/dev/docker/Dockerfile#L280-L287 git clone https://github.com/NVIDIA/apex apex_external cd apex_external git checkout 82ee367f3da74b4cd62a1fb47aa9806f0f47b58b pip install --no-cache-dir -r requirements.txt # NOTE: Replace the path I used with adrozdov to one that works for you. pip install --no-cache-dir -v \ --global-option="--cpp_ext" \ --global-option="--cuda_ext" \ --target /work/pi_miyyer_umass_edu/adrozdov_umass_edu/miniconda3/envs/mosaic/lib/python3.10/site-packages \ ./ cd .. # Download small sample of C4 python src/convert_dataset.py --dataset c4 --data_subset en --out_root ./my-copy-c4 --splits train_small val # NOTE: Make sure to modify yaml file so that "train" is "train_small". This file: yamls/main/mosaic-bert-base-uncased.yaml # Run training. composer main.py yamls/main/mosaic-bert-base-uncased.yaml