Last active
August 16, 2022 23:15
-
-
Save rmcgibbo/7178576 to your computer and use it in GitHub Desktop.
Revisions
-
rmcgibbo renamed this gist
Oct 27, 2013 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
rmcgibbo revised this gist
Oct 27, 2013 . 1 changed file with 58 additions and 74 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,108 +1,92 @@ /* * An efficient MPI parallel reduction without MPI_Scan or MPI_Reduce. (i.e. * only send/recv). * * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION * 0. You just DO WHAT THE FUCK YOU WANT TO. */ #include <mpi.h> #include <cstdio> #include <vector> static int fastlog2(uint32_t v); int MPI_ManualReduce(int value) { /* * Reduces values on all processes to a single value on rank 0. * * This function does the same thing as the function MPI_Reduce using * only MPI_Send and MPI_Recv. As shown, it operates with additions on * integers, so you could trivially use MPI_Reduce, but for operations * on variable size structs for which you cannot define an MPI_Datatype, * you can still use this method, by modifying it to use your op * and your datastructure. */ int recvbuffer; int tag = 0; const int size = MPI::COMM_WORLD.Get_size(); const int rank = MPI::COMM_WORLD.Get_rank(); const int lastpower = 1 << fastlog2(size); // each of the ranks greater than the last power of 2 less than size // need to downshift their data, since the binary tree reduction below // only works when N is a power of two. for (int i = lastpower; i < size; i++) if (rank == i) MPI::COMM_WORLD.Send(&value, 1, MPI_INT, i-lastpower, tag); for (int i = 0; i < size-lastpower; i++) if (rank == i) { MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, i+lastpower, tag); value += recvbuffer; // your operation } for (int d = 0; d < fastlog2(lastpower); d++) for (int k = 0; k < lastpower; k += 1 << (d + 1)) { const int receiver = k; const int sender = k + (1 << d); if (rank == receiver) { MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, sender, tag); value += recvbuffer; // your operation } else if (rank == sender) MPI::COMM_WORLD.Send(&value, 1, MPI_INT, receiver, tag); } return value; } static int fastlog2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html int r; static const int MultiplyDeBruijnBitPosition[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; v |= v >> 1; // first round down to one less than a power of 2 v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; r = MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; return r; } int main(int argc, char* argv[]) { MPI::Init(argc, argv); const int size = MPI::COMM_WORLD.Get_size(); const int rank = MPI::COMM_WORLD.Get_rank(); std::vector<int> data(size); for (int i = 0; i < size; i++) data[i] = i; // each rank only gets one entry, and // they need to sum them by sending messages int result = MPI_ManualReduce(data[rank]); MPI::COMM_WORLD.Barrier(); // Compute the correct result @@ -111,9 +95,9 @@ int main(int argc, char* argv[]) { sum += data[i]; if (rank == 0) { printf("MPI Result = %d\n", result); printf("Correct Result = %d\n", sum); } MPI::Finalize(); } -
rmcgibbo revised this gist
Oct 27, 2013 . 1 changed file with 43 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,24 +27,61 @@ int log2(int v) { int parallel_reduce(int mydata) { int recvbuffer; int tag = 0; const int size = MPI::COMM_WORLD.Get_size(); const int rank = MPI::COMM_WORLD.Get_rank(); for (int i = 0; i < size; i++) { MPI::COMM_WORLD.Barrier(); if (i == rank) printf("data[%d]=%d\n", rank, mydata); } MPI::COMM_WORLD.Barrier(); // each of the ranks greater than the last power of 2 less than size // need to downshift their data, since the binary tree reduction below // only works when N is a power of two. const int lastpower = 1<<log2(size); for (int i = lastpower; i < size; i++) if (rank == i) { printf("rank %d sending down to %d\n", i, i-lastpower); MPI::COMM_WORLD.Send(&mydata, 1, MPI_INT, i-lastpower, tag); mydata = 0; } for (int i = 0; i < size-lastpower; i++) if (rank == i) { printf("rank %d receiving up from %d\n", i, i+lastpower); MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, i+lastpower, tag); mydata += recvbuffer; } for (int i = 0; i < size; i++) { MPI::COMM_WORLD.Barrier(); if (i == rank) printf("data[%d]=%d\n", rank, mydata); } MPI::COMM_WORLD.Barrier(); for (int d = 0; d < log2(lastpower); d++) { tag += 1; for (int k = 0; k < lastpower; k += 1<<(d+1)) { int receiver = k + (1<<(d+1)) - 1; int sender = k + (1<<d) - 1; if (rank == 0) printf("x[%d] = x[%d] + x[%d]\n", receiver, sender, receiver); if (rank == receiver) { MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, sender, tag); mydata += recvbuffer; } else if (rank == sender) { MPI::COMM_WORLD.Send(&mydata, 1, MPI_INT, receiver, tag); } } MPI::COMM_WORLD.Barrier(); @@ -73,8 +110,8 @@ int main(int argc, char* argv[]) { for (int i = 0; i < size; i++) sum += data[i]; if (rank == (1<<log2(size))-1) { printf("Final Result on Rank%d = %d\n", rank, result); printf("Correct result = %d\n", sum); } -
rmcgibbo created this gist
Oct 27, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,82 @@ #include <mpi.h> #include <cstdio> #include <cmath> #include <vector> using std::vector; int log2(int v) { // http://graphics.stanford.edu/~seander/bithacks.html // i think this is 32 bit specific int r; static const int MultiplyDeBruijnBitPosition[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; v |= v >> 1; // first round down to one less than a power of 2 v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; r = MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; return r; } int parallel_reduce(int mydata) { int recvbuffer; const int size = MPI::COMM_WORLD.Get_size(); const int rank = MPI::COMM_WORLD.Get_rank(); for (int d = 0; d < log2(size); d++) { for (int k = 0; k < size; k += 1<<(d+1)) { int receiver = k + (1<<(d+1)) - 1; int sender = k + (1<<d) - 1; if (rank == 0) printf("x[%d] = x[%d] + x[%d]\n", receiver, sender, receiver); if (rank == receiver) { MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, sender, d); mydata += recvbuffer; } else if (rank == sender) { MPI::COMM_WORLD.Send(&mydata, 1, MPI_INT, receiver, d); } } MPI::COMM_WORLD.Barrier(); if (rank == 0) printf("\n"); } return mydata; } int main(int argc, char* argv[]) { MPI::Init(argc, argv); const int size = MPI::COMM_WORLD.Get_size(); const int rank = MPI::COMM_WORLD.Get_rank(); vector<int> data(size); for (int i = 0; i < size; i++) data[i] = i; // each rank only gets one entry, and // they need to sum them by sending messages int result = parallel_reduce(data[rank]); MPI::COMM_WORLD.Barrier(); // Compute the correct result int sum = 0; for (int i = 0; i < size; i++) sum += data[i]; if (rank == size-1) { printf("Final Result on Rank%d = %d\n", rank, result); printf("Correct result = %d\n", sum); } MPI::Finalize(); }