Last active
August 16, 2022 23:15
-
-
Save rmcgibbo/7178576 to your computer and use it in GitHub Desktop.
Efficient MPI Parallel Reduction Without MPI_Reduce or MPI_Scan (only Send/Recv)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <mpi.h> | |
| #include <cstdio> | |
| #include <cmath> | |
| #include <vector> | |
| using std::vector; | |
| int log2(int v) { | |
| // http://graphics.stanford.edu/~seander/bithacks.html | |
| // i think this is 32 bit specific | |
| int r; | |
| static const int MultiplyDeBruijnBitPosition[32] = | |
| { | |
| 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, | |
| 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 | |
| }; | |
| v |= v >> 1; // first round down to one less than a power of 2 | |
| v |= v >> 2; | |
| v |= v >> 4; | |
| v |= v >> 8; | |
| v |= v >> 16; | |
| r = MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; | |
| return r; | |
| } | |
| int parallel_reduce(int mydata) { | |
| int recvbuffer; | |
| const int size = MPI::COMM_WORLD.Get_size(); | |
| const int rank = MPI::COMM_WORLD.Get_rank(); | |
| for (int d = 0; d < log2(size); d++) { | |
| for (int k = 0; k < size; k += 1<<(d+1)) { | |
| int receiver = k + (1<<(d+1)) - 1; | |
| int sender = k + (1<<d) - 1; | |
| if (rank == 0) | |
| printf("x[%d] = x[%d] + x[%d]\n", receiver, sender, receiver); | |
| if (rank == receiver) { | |
| MPI::COMM_WORLD.Recv(&recvbuffer, 1, MPI_INT, sender, d); | |
| mydata += recvbuffer; | |
| } | |
| else if (rank == sender) { | |
| MPI::COMM_WORLD.Send(&mydata, 1, MPI_INT, receiver, d); | |
| } | |
| } | |
| MPI::COMM_WORLD.Barrier(); | |
| if (rank == 0) | |
| printf("\n"); | |
| } | |
| return mydata; | |
| } | |
| int main(int argc, char* argv[]) { | |
| MPI::Init(argc, argv); | |
| const int size = MPI::COMM_WORLD.Get_size(); | |
| const int rank = MPI::COMM_WORLD.Get_rank(); | |
| vector<int> data(size); | |
| for (int i = 0; i < size; i++) | |
| data[i] = i; | |
| // each rank only gets one entry, and | |
| // they need to sum them by sending messages | |
| int result = parallel_reduce(data[rank]); | |
| MPI::COMM_WORLD.Barrier(); | |
| // Compute the correct result | |
| int sum = 0; | |
| for (int i = 0; i < size; i++) | |
| sum += data[i]; | |
| if (rank == size-1) { | |
| printf("Final Result on Rank%d = %d\n", rank, result); | |
| printf("Correct result = %d\n", sum); | |
| } | |
| MPI::Finalize(); | |
| } |
Author
rmcgibbo
commented
Oct 27, 2013
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment