{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install torch plotly tqdm numpy matplotlib seaborn" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import gc\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[device(type='cuda', index=0),\n", " device(type='cuda', index=1),\n", " device(type='cuda', index=2),\n", " device(type='cuda', index=3),\n", " device(type='cuda', index=4),\n", " device(type='cuda', index=5)]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list_of_gpus = [torch.device(f\"cuda:{i}\") for i in range(torch.cuda.device_count())]\n", "list_of_gpus" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda:0 -> cuda:3\n", "cuda:1 -> cuda:4\n", "cuda:5 -> cuda:3\n", "cuda:0 -> cuda:1\n", "cuda:4 -> cuda:5\n", "cuda:2 -> cuda:1\n", "cuda:4 -> cuda:0\n", "cuda:5 -> cuda:1\n", "cuda:3 -> cuda:0\n", "cuda:1 -> cuda:5\n", "cuda:3 -> cuda:2\n", "cuda:4 -> cuda:1\n", "cuda:5 -> cuda:0\n", "cuda:4 -> cuda:2\n", "cuda:0 -> cuda:5\n", "cuda:3 -> cuda:1\n", "cuda:2 -> cuda:0\n", "cuda:4 -> cuda:3\n", "cuda:1 -> cuda:0\n", "cuda:2 -> cuda:4\n", "cuda:3 -> cuda:5\n", "cuda:1 -> cuda:2\n", "cuda:0 -> cuda:4\n", "cuda:2 -> cuda:5\n", "cuda:3 -> cuda:4\n", "cuda:5 -> cuda:2\n", "cuda:1 -> cuda:3\n", "cuda:5 -> cuda:4\n", "cuda:2 -> cuda:3\n", "cuda:0 -> cuda:2\n", "All 30 GPU transfer combinations are present\n" ] } ], "source": [ "\n", "speed_matrix = np.zeros((len(list_of_gpus), len(list_of_gpus)))\n", "\n", "test_pattern = []\n", "for gpu1 in list_of_gpus:\n", " for gpu2 in list_of_gpus:\n", " if gpu1 == gpu2:\n", " continue\n", " test_pattern.append((gpu1, gpu2))\n", "\n", "# Shuffle and ensure no adjacent pairs share GPUs\n", "np.random.shuffle(test_pattern)\n", "i = 0\n", "while i < len(test_pattern)-1:\n", " gpu1, gpu2 = test_pattern[i]\n", " next_gpu1, next_gpu2 = test_pattern[i+1]\n", " \n", " # If adjacent pairs share any GPU, swap with next non-conflicting pair\n", " if gpu1 in (next_gpu1, next_gpu2) or gpu2 in (next_gpu1, next_gpu2):\n", " for j in range(i+2, len(test_pattern)):\n", " candidate_gpu1, candidate_gpu2 = test_pattern[j]\n", " if gpu1 not in (candidate_gpu1, candidate_gpu2) and gpu2 not in (candidate_gpu1, candidate_gpu2):\n", " test_pattern[i+1], test_pattern[j] = test_pattern[j], test_pattern[i+1]\n", " break\n", " i += 1\n", "\n", "\n", "for gpu1, gpu2 in test_pattern:\n", " print(f\"{gpu1} -> {gpu2}\")\n", "\n", "# Verify all GPU combinations are present\n", "expected_combinations = set()\n", "for gpu1 in list_of_gpus:\n", " for gpu2 in list_of_gpus:\n", " if gpu1 != gpu2:\n", " expected_combinations.add((gpu1, gpu2))\n", "\n", "test_pattern_set = set(test_pattern)\n", "\n", "assert len(expected_combinations) == len(test_pattern_set), \"Some GPU combinations are missing\"\n", "assert expected_combinations == test_pattern_set, \"Test pattern doesn't match expected GPU combinations\"\n", "\n", "print(f\"All {len(test_pattern)} GPU transfer combinations are present\")\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU cuda:0 can access: GPU cuda:1: ✓, GPU cuda:2: ✓, GPU cuda:3: ✓, GPU cuda:4: ✓, GPU cuda:5: ✓, \n", "GPU cuda:1 can access: GPU cuda:0: ✓, GPU cuda:2: ✓, GPU cuda:3: ✓, GPU cuda:4: ✓, GPU cuda:5: ✓, \n", "GPU cuda:2 can access: GPU cuda:0: ✓, GPU cuda:1: ✓, GPU cuda:3: ✓, GPU cuda:4: ✓, GPU cuda:5: ✓, \n", "GPU cuda:3 can access: GPU cuda:0: ✓, GPU cuda:1: ✓, GPU cuda:2: ✓, GPU cuda:4: ✓, GPU cuda:5: ✓, \n", "GPU cuda:4 can access: GPU cuda:0: ✓, GPU cuda:1: ✓, GPU cuda:2: ✓, GPU cuda:3: ✓, GPU cuda:5: ✓, \n", "GPU cuda:5 can access: GPU cuda:0: ✓, GPU cuda:1: ✓, GPU cuda:2: ✓, GPU cuda:3: ✓, GPU cuda:4: ✓, \n" ] } ], "source": [ "for gpu1 in list_of_gpus:\n", " print(f\"GPU {gpu1} can access: \", end=\"\")\n", " for gpu2 in list_of_gpus:\n", " if gpu1 == gpu2:\n", " continue\n", " can_access = torch.cuda.can_device_access_peer(gpu1, gpu2)\n", " print(f\"GPU {gpu2}: {'✓' if can_access else '✗'}, \", end=\" \")\n", " print()\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/30 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "# Create a copy of speed_matrix and set diagonal to None to ignore same-GPU transfers\n", "masked_matrix = speed_matrix.copy()\n", "for i in range(len(list_of_gpus)):\n", " masked_matrix[i,i] = np.nan\n", "\n", "fig, ax = plt.subplots(figsize=(8, 6))\n", "im = ax.imshow(masked_matrix, cmap='viridis')\n", "\n", "# Add labels\n", "ax.set_xticks(np.arange(len(list_of_gpus)))\n", "ax.set_yticks(np.arange(len(list_of_gpus)))\n", "ax.set_xticklabels([f'GPU {i}' for i in range(len(list_of_gpus))])\n", "ax.set_yticklabels([f'GPU {i}' for i in range(len(list_of_gpus))])\n", "\n", "# Add colorbar\n", "cbar = plt.colorbar(im)\n", "cbar.set_label('GB/s')\n", "\n", "# Add title and axis labels\n", "plt.title('GPU to GPU Transfer Speeds')\n", "plt.xlabel('Destination GPU')\n", "plt.ylabel('Source GPU')\n", "\n", "# Rotate x-axis labels for better readability\n", "plt.setp(ax.get_xticklabels(), rotation=45, ha=\"right\")\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 2 }