-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/coll/allgather.c
-: 0:Graph:allgather.gcno
-: 0:Data:allgather.gcda
-: 0:Runs:4000
-: 0:Programs:1232
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:/*
-: 3: *
-: 4: * (C) 2001 by Argonne National Laboratory.
-: 5: * See COPYRIGHT in top-level directory.
-: 6: */
-: 7:
-: 8:#include "mpiimpl.h"
-: 9:
-: 10:/* -- Begin Profiling Symbol Block for routine MPI_Allgather */
-: 11:#if defined(HAVE_PRAGMA_WEAK)
-: 12:#pragma weak MPI_Allgather = PMPI_Allgather
-: 13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
-: 14:#pragma _HP_SECONDARY_DEF PMPI_Allgather MPI_Allgather
-: 15:#elif defined(HAVE_PRAGMA_CRI_DUP)
-: 16:#pragma _CRI duplicate MPI_Allgather as PMPI_Allgather
-: 17:#endif
-: 18:/* -- End Profiling Symbol Block */
-: 19:
-: 20:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
-: 21: the MPI routines */
-: 22:#ifndef MPICH_MPI_FROM_PMPI
-: 23:#undef MPI_Allgather
-: 24:#define MPI_Allgather PMPI_Allgather
-: 25:
-: 26:/* This is the default implementation of allgather. The algorithm is:
-: 27:
-: 28: Algorithm: MPI_Allgather
-: 29:
-: 30: For short messages and non-power-of-two no. of processes, we use
-: 31: the algorithm from the Jehoshua Bruck et al IEEE TPDS Nov 97
-: 32: paper. It is a variant of the disemmination algorithm for
-: 33: barrier. It takes ceiling(lg p) steps.
-: 34:
-: 35: Cost = lgp.alpha + n.((p-1)/p).beta
-: 36: where n is total size of data gathered on each process.
-: 37:
-: 38: For short or medium-size messages and power-of-two no. of
-: 39: processes, we use the recursive doubling algorithm.
-: 40:
-: 41: Cost = lgp.alpha + n.((p-1)/p).beta
-: 42:
-: 43: TODO: On TCP, we may want to use recursive doubling instead of the Bruck
-: 44: algorithm in all cases because of the pairwise-exchange property of
-: 45: recursive doubling (see Benson et al paper in Euro PVM/MPI
-: 46: 2003).
-: 47:
-: 48: It is interesting to note that either of the above algorithms for
-: 49: MPI_Allgather has the same cost as the tree algorithm for MPI_Gather!
-: 50:
-: 51: For long messages or medium-size messages and non-power-of-two
-: 52: no. of processes, we use a ring algorithm. In the first step, each
-: 53: process i sends its contribution to process i+1 and receives
-: 54: the contribution from process i-1 (with wrap-around). From the
-: 55: second step onwards, each process i forwards to process i+1 the
-: 56: data it received from process i-1 in the previous step. This takes
-: 57: a total of p-1 steps.
-: 58:
-: 59: Cost = (p-1).alpha + n.((p-1)/p).beta
-: 60:
-: 61: We use this algorithm instead of recursive doubling for long
-: 62: messages because we find that this communication pattern (nearest
-: 63: neighbor) performs twice as fast as recursive doubling for long
-: 64: messages (on Myrinet and IBM SP).
-: 65:
-: 66: Possible improvements:
-: 67:
-: 68: End Algorithm: MPI_Allgather
-: 69:*/
-: 70:/* begin:nested */
-: 71:/* not declared static because a machine-specific function may call this
-: 72: one in some cases */
-: 73:int MPIR_Allgather (
-: 74: void *sendbuf,
-: 75: int sendcount,
-: 76: MPI_Datatype sendtype,
-: 77: void *recvbuf,
-: 78: int recvcount,
-: 79: MPI_Datatype recvtype,
-: 80: MPID_Comm *comm_ptr )
2571022: 81:{
-: 82: int comm_size, rank;
2571022: 83: int mpi_errno = MPI_SUCCESS;
-: 84: MPI_Aint recvtype_extent, tot_bytes;
-: 85: MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb;
-: 86: int j, i, pof2, src, rem;
-: 87: static const char FCNAME[] = "MPIR_Allgather";
2571022: 88: void *tmp_buf = NULL;
-: 89: int curr_cnt, dst, type_size, left, right, jnext, comm_size_is_pof2;
-: 90: MPI_Comm comm;
-: 91: MPI_Status status;
-: 92: int mask, dst_tree_root, my_tree_root, is_homogeneous,
2571022: 93: send_offset, recv_offset, last_recv_cnt = 0, nprocs_completed, k,
-: 94: offset, tmp_mask, tree_root;
-: 95:#ifdef MPID_HAS_HETERO
-: 96: int position, tmp_buf_size, nbytes;
-: 97:#endif
-: 98:
2571022: 99: MPIU_CHKLMEM_DECL(1);
-: 100:
2571022: 101: if (((sendcount == 0) && (sendbuf != MPI_IN_PLACE)) || (recvcount == 0))
220: 102: return MPI_SUCCESS;
-: 103:
2570802: 104: comm = comm_ptr->handle;
2570802: 105: comm_size = comm_ptr->local_size;
2570802: 106: rank = comm_ptr->rank;
-: 107:
2570802: 108: MPID_Datatype_get_extent_macro( recvtype, recvtype_extent );
2570802: 109: MPID_Datatype_get_size_macro( recvtype, type_size );
-: 110:
-: 111: /* This is the largest offset we add to recvbuf */
-: 112: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
-: 113: (comm_size * recvcount * recvtype_extent));
-: 114:
-: 115: /* check if comm_size is a power of two */
2570802: 116: pof2 = 1;
10280982: 117: while (pof2 < comm_size)
5139378: 118: pof2 *= 2;
2570802: 119: if (pof2 == comm_size)
1981625: 120: comm_size_is_pof2 = 1;
-: 121: else
589177: 122: comm_size_is_pof2 = 0;
-: 123:
-: 124: /* check if multiple threads are calling this collective function */
-: 125: MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-: 126:
2570802: 127: tot_bytes = (MPI_Aint)recvcount * comm_size * type_size;
2570802: 128: if ((tot_bytes < MPIR_ALLGATHER_LONG_MSG) && (comm_size_is_pof2 == 1)) {
-: 129:
-: 130: /* Short or medium size message and power-of-two no. of processes. Use
-: 131: * recursive doubling algorithm */
-: 132:
1981609: 133: is_homogeneous = 1;
-: 134:#ifdef MPID_HAS_HETERO
-: 135: if (comm_ptr->is_hetero)
-: 136: is_homogeneous = 0;
-: 137:#endif
-: 138:
1981609: 139: if (is_homogeneous) {
-: 140: /* homogeneous. no need to pack into tmp_buf on each node. copy
-: 141: local data into recvbuf */
1981609: 142: if (sendbuf != MPI_IN_PLACE) {
1553348: 143: mpi_errno = MPIR_Localcopy (sendbuf, sendcount, sendtype,
-: 144: ((char *)recvbuf +
-: 145: rank*recvcount*recvtype_extent),
-: 146: recvcount, recvtype);
1553348: 147: if (mpi_errno) {
|
#####: 148: MPIU_ERR_POP(mpi_errno);
-: 149: }
-: 150: }
-: 151:
|
1981609: 152: curr_cnt = recvcount;
-: 153:
1981609: 154: mask = 0x1;
1981609: 155: i = 0;
7915852: 156: while (mask < comm_size) {
3952634: 157: dst = rank ^ mask;
-: 158:
-: 159: /* find offset into send and recv buffers. zero out
-: 160: the least significant "i" bits of rank and dst to
-: 161: find root of src and dst subtrees. Use ranks of
-: 162: roots as index to send from and recv into buffer */
-: 163:
3952634: 164: dst_tree_root = dst >> i;
3952634: 165: dst_tree_root <<= i;
-: 166:
3952634: 167: my_tree_root = rank >> i;
3952634: 168: my_tree_root <<= i;
-: 169:
-: 170: /* FIXME: saving an MPI_Aint into an int */
3952634: 171: send_offset = my_tree_root * recvcount * recvtype_extent;
3952634: 172: recv_offset = dst_tree_root * recvcount * recvtype_extent;
-: 173:
3952634: 174: if (dst < comm_size) {
3952634: 175: mpi_errno = MPIC_Sendrecv(((char *)recvbuf + send_offset),
-: 176: curr_cnt, recvtype, dst,
-: 177: MPIR_ALLGATHER_TAG,
-: 178: ((char *)recvbuf + recv_offset),
-: 179: (comm_size-dst_tree_root)*recvcount,
-: 180: recvtype, dst,
-: 181: MPIR_ALLGATHER_TAG, comm, &status);
3952634: 182: if (mpi_errno) {
|
#####: 183: MPIU_ERR_POP(mpi_errno);
-: 184: }
-: 185:
|
3952634: 186: NMPI_Get_count(&status, recvtype, &last_recv_cnt);
3952634: 187: curr_cnt += last_recv_cnt;
-: 188: }
-: 189:
-: 190: /* if some processes in this process's subtree in this step
-: 191: did not have any destination process to communicate with
-: 192: because of non-power-of-two, we need to send them the
-: 193: data that they would normally have received from those
-: 194: processes. That is, the haves in this subtree must send to
-: 195: the havenots. We use a logarithmic recursive-halfing algorithm
-: 196: for this. */
-: 197:
-: 198: /* This part of the code will not currently be
-: 199: executed because we are not using recursive
-: 200: doubling for non power of two. Mark it as experimental
-: 201: so that it doesn't show up as red in the coverage
-: 202: tests. */
-: 203:
|
-: 204: /* --BEGIN EXPERIMENTAL-- */
3952634: 205: if (dst_tree_root + mask > comm_size) {
#####: 206: nprocs_completed = comm_size - my_tree_root - mask;
-: 207: /* nprocs_completed is the number of processes in this
-: 208: subtree that have all the data. Send data to others
-: 209: in a tree fashion. First find root of current tree
-: 210: that is being divided into two. k is the number of
-: 211: least-significant bits in this process's rank that
-: 212: must be zeroed out to find the rank of the root */
#####: 213: j = mask;
#####: 214: k = 0;
#####: 215: while (j) {
#####: 216: j >>= 1;
#####: 217: k++;
-: 218: }
#####: 219: k--;
-: 220:
-: 221: /* FIXME: saving an MPI_Aint into an int */
#####: 222: offset = recvcount * (my_tree_root + mask) * recvtype_extent;
#####: 223: tmp_mask = mask >> 1;
-: 224:
#####: 225: while (tmp_mask) {
#####: 226: dst = rank ^ tmp_mask;
-: 227:
#####: 228: tree_root = rank >> k;
#####: 229: tree_root <<= k;
-: 230:
-: 231: /* send only if this proc has data and destination
-: 232: doesn't have data. at any step, multiple processes
-: 233: can send if they have the data */
#####: 234: if ((dst > rank) &&
-: 235: (rank < tree_root + nprocs_completed)
-: 236: && (dst >= tree_root + nprocs_completed)) {
#####: 237: mpi_errno = MPIC_Send(((char *)recvbuf + offset),
-: 238: last_recv_cnt,
-: 239: recvtype, dst,
-: 240: MPIR_ALLGATHER_TAG, comm);
-: 241: /* last_recv_cnt was set in the previous
-: 242: receive. that's the amount of data to be
-: 243: sent now. */
#####: 244: if (mpi_errno) {
#####: 245: MPIU_ERR_POP(mpi_errno);
-: 246: }
-: 247: }
-: 248: /* recv only if this proc. doesn't have data and sender
-: 249: has data */
#####: 250: else if ((dst < rank) &&
-: 251: (dst < tree_root + nprocs_completed) &&
-: 252: (rank >= tree_root + nprocs_completed)) {
#####: 253: mpi_errno = MPIC_Recv(((char *)recvbuf + offset),
-: 254: (comm_size - (my_tree_root + mask))*recvcount,
-: 255: recvtype, dst,
-: 256: MPIR_ALLGATHER_TAG,
-: 257: comm, &status);
-: 258: /* nprocs_completed is also equal to the
-: 259: no. of processes whose data we don't have */
#####: 260: if (mpi_errno) {
#####: 261: MPIU_ERR_POP(mpi_errno);
-: 262: }
#####: 263: NMPI_Get_count(&status, recvtype, &last_recv_cnt);
#####: 264: curr_cnt += last_recv_cnt;
-: 265: }
#####: 266: tmp_mask >>= 1;
#####: 267: k--;
-: 268: }
-: 269: }
-: 270: /* --END EXPERIMENTAL-- */
-: 271:
|
3952634: 272: mask <<= 1;
3952634: 273: i++;
-: 274: }
-: 275: }
-: 276:
-: 277:#ifdef MPID_HAS_HETERO
-: 278: else {
-: 279: /* heterogeneous. need to use temp. buffer. */
-: 280:
-: 281: NMPI_Pack_size(recvcount*comm_size, recvtype, comm, &tmp_buf_size);
-: 282:
-: 283: MPIU_CHKLMEM_MALLOC(tmp_buf, void*, tmp_buf_size, mpi_errno, "tmp_buf");
-: 284:
-: 285: /* calculate the value of nbytes, the number of bytes in packed
-: 286: representation that each process contributes. We can't simply divide
-: 287: tmp_buf_size by comm_size because tmp_buf_size is an upper
-: 288: bound on the amount of memory required. (For example, for
-: 289: a single integer, MPICH-1 returns pack_size=12.) Therefore, we
-: 290: actually pack some data into tmp_buf and see by how much
-: 291: 'position' is incremented. */
-: 292:
-: 293: position = 0;
-: 294: NMPI_Pack(recvbuf, 1, recvtype, tmp_buf, tmp_buf_size,
-: 295: &position, comm);
-: 296: nbytes = position*recvcount;
-: 297:
-: 298: /* pack local data into right location in tmp_buf */
-: 299: position = rank * nbytes;
-: 300: if (sendbuf != MPI_IN_PLACE) {
-: 301: NMPI_Pack(sendbuf, sendcount, sendtype, tmp_buf, tmp_buf_size,
-: 302: &position, comm);
-: 303: }
-: 304: else {
-: 305: /* if in_place specified, local data is found in recvbuf */
-: 306: NMPI_Pack(((char *)recvbuf + recvtype_extent*rank), recvcount,
-: 307: recvtype, tmp_buf, tmp_buf_size,
-: 308: &position, comm);
-: 309: }
-: 310:
-: 311: curr_cnt = nbytes;
-: 312:
-: 313: mask = 0x1;
-: 314: i = 0;
-: 315: while (mask < comm_size) {
-: 316: dst = rank ^ mask;
-: 317:
-: 318: /* find offset into send and recv buffers. zero out
-: 319: the least significant "i" bits of rank and dst to
-: 320: find root of src and dst subtrees. Use ranks of
-: 321: roots as index to send from and recv into buffer. */
-: 322:
-: 323: dst_tree_root = dst >> i;
-: 324: dst_tree_root <<= i;
-: 325:
-: 326: my_tree_root = rank >> i;
-: 327: my_tree_root <<= i;
-: 328:
-: 329: send_offset = my_tree_root * nbytes;
-: 330: recv_offset = dst_tree_root * nbytes;
-: 331:
-: 332: if (dst < comm_size) {
-: 333: mpi_errno = MPIC_Sendrecv(((char *)tmp_buf + send_offset),
-: 334: curr_cnt, MPI_BYTE, dst,
-: 335: MPIR_ALLGATHER_TAG,
-: 336: ((char *)tmp_buf + recv_offset),
-: 337: tmp_buf_size - recv_offset,
-: 338: MPI_BYTE, dst,
-: 339: MPIR_ALLGATHER_TAG, comm, &status);
-: 340: if (mpi_errno) {
|
-: 341: MPIU_ERR_POP(mpi_errno);
-: 342: }
-: 343:
-: 344: NMPI_Get_count(&status, MPI_BYTE, &last_recv_cnt);
-: 345: curr_cnt += last_recv_cnt;
-: 346: }
-: 347:
-: 348: /* if some processes in this process's subtree in this step
-: 349: did not have any destination process to communicate with
-: 350: because of non-power-of-two, we need to send them the
-: 351: data that they would normally have received from those
-: 352: processes. That is, the haves in this subtree must send to
-: 353: the havenots. We use a logarithmic recursive-halfing
-: 354: algorithm for this. */
-: 355:
-: 356: if (dst_tree_root + mask > comm_size) {
-: 357: nprocs_completed = comm_size - my_tree_root - mask;
-: 358: /* nprocs_completed is the number of processes in this
-: 359: subtree that have all the data. Send data to others
-: 360: in a tree fashion. First find root of current tree
-: 361: that is being divided into two. k is the number of
-: 362: least-significant bits in this process's rank that
-: 363: must be zeroed out to find the rank of the root */
-: 364: j = mask;
-: 365: k = 0;
-: 366: while (j) {
-: 367: j >>= 1;
-: 368: k++;
-: 369: }
-: 370: k--;
-: 371:
-: 372: offset = nbytes * (my_tree_root + mask);
-: 373: tmp_mask = mask >> 1;
-: 374:
-: 375: while (tmp_mask) {
-: 376: dst = rank ^ tmp_mask;
-: 377:
-: 378: tree_root = rank >> k;
-: 379: tree_root <<= k;
-: 380:
-: 381: /* send only if this proc has data and destination
-: 382: doesn't have data. at any step, multiple processes
-: 383: can send if they have the data */
-: 384: if ((dst > rank) &&
-: 385: (rank < tree_root + nprocs_completed)
-: 386: && (dst >= tree_root + nprocs_completed)) {
-: 387:
-: 388: mpi_errno = MPIC_Send(((char *)tmp_buf + offset),
-: 389: last_recv_cnt, MPI_BYTE,
-: 390: dst, MPIR_ALLGATHER_TAG,
-: 391: comm);
-: 392: /* last_recv_cnt was set in the previous
-: 393: receive. that's the amount of data to be
-: 394: sent now. */
-: 395: if (mpi_errno) {
-: 396: MPIU_ERR_POP(mpi_errno);
-: 397: }
-: 398: }
-: 399: /* recv only if this proc. doesn't have data and sender
-: 400: has data */
-: 401: else if ((dst < rank) &&
-: 402: (dst < tree_root + nprocs_completed) &&
-: 403: (rank >= tree_root + nprocs_completed)) {
-: 404: mpi_errno = MPIC_Recv(((char *)tmp_buf + offset),
-: 405: tmp_buf_size - offset,
-: 406: MPI_BYTE, dst,
-: 407: MPIR_ALLGATHER_TAG,
-: 408: comm, &status);
-: 409: /* nprocs_completed is also equal to the
-: 410: no. of processes whose data we don't have */
-: 411: if (mpi_errno) {
-: 412: MPIU_ERR_POP(mpi_errno);
-: 413: }
-: 414: NMPI_Get_count(&status, MPI_BYTE, &last_recv_cnt);
-: 415: curr_cnt += last_recv_cnt;
-: 416: }
-: 417: tmp_mask >>= 1;
-: 418: k--;
-: 419: }
-: 420: }
-: 421: mask <<= 1;
-: 422: i++;
-: 423: }
-: 424:
-: 425: position = 0;
-: 426: NMPI_Unpack(tmp_buf, tmp_buf_size, &position, recvbuf,
-: 427: recvcount*comm_size, recvtype, comm);
-: 428: }
-: 429:#endif /* MPID_HAS_HETERO */
-: 430: }
-: 431:
|
589193: 432: else if (tot_bytes < MPIR_ALLGATHER_SHORT_MSG) {
-: 433: /* Short message and non-power-of-two no. of processes. Use
-: 434: * Bruck algorithm (see description above). */
-: 435:
-: 436: /* allocate a temporary buffer of the same size as recvbuf. */
-: 437:
-: 438: /* get true extent of recvtype */
588775: 439: mpi_errno = NMPI_Type_get_true_extent(recvtype, &recvtype_true_lb,
-: 440: &recvtype_true_extent);
588775: 441: if (mpi_errno) {
|
#####: 442: MPIU_ERR_POP(mpi_errno);
-: 443: }
-: 444:
|
588775: 445: recvbuf_extent = recvcount * comm_size *
-: 446: (MPIR_MAX(recvtype_true_extent, recvtype_extent));
-: 447:
588775: 448: MPIU_CHKLMEM_MALLOC(tmp_buf, void*, recvbuf_extent, mpi_errno, "tmp_buf");
-: 449:
-: 450: /* adjust for potential negative lower bound in datatype */
588775: 451: tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
-: 452:
-: 453: /* copy local data to the top of tmp_buf */
588775: 454: if (sendbuf != MPI_IN_PLACE) {
582901: 455: mpi_errno = MPIR_Localcopy (sendbuf, sendcount, sendtype,
-: 456: tmp_buf, recvcount, recvtype);
582901: 457: if (mpi_errno) {
|
#####: 458: MPIU_ERR_POP(mpi_errno);
-: 459: }
-: 460: }
-: 461: else {
|
5874: 462: mpi_errno = MPIR_Localcopy (((char *)recvbuf +
-: 463: rank * recvcount * recvtype_extent),
-: 464: recvcount, recvtype, tmp_buf,
-: 465: recvcount, recvtype);
5874: 466: if (mpi_errno) {
|
#####: 467: MPIU_ERR_POP(mpi_errno);
-: 468: }
-: 469: }
-: 470:
-: 471: /* do the first \floor(\lg p) steps */
-: 472:
|
588775: 473: curr_cnt = recvcount;
588775: 474: pof2 = 1;
1773971: 475: while (pof2 <= comm_size/2) {
596421: 476: src = (rank + pof2) % comm_size;
596421: 477: dst = (rank - pof2 + comm_size) % comm_size;
-: 478:
596421: 479: mpi_errno = MPIC_Sendrecv(tmp_buf, curr_cnt, recvtype, dst,
-: 480: MPIR_ALLGATHER_TAG,
-: 481: ((char *)tmp_buf + curr_cnt*recvtype_extent),
-: 482: curr_cnt, recvtype,
-: 483: src, MPIR_ALLGATHER_TAG, comm,
-: 484: MPI_STATUS_IGNORE);
596421: 485: if (mpi_errno) {
|
#####: 486: MPIU_ERR_POP(mpi_errno);
-: 487: }
-: 488:
|
596421: 489: curr_cnt *= 2;
596421: 490: pof2 *= 2;
-: 491: }
-: 492:
-: 493: /* if comm_size is not a power of two, one more step is needed */
-: 494:
588775: 495: rem = comm_size - pof2;
588775: 496: if (rem) {
588775: 497: src = (rank + pof2) % comm_size;
588775: 498: dst = (rank - pof2 + comm_size) % comm_size;
-: 499:
588775: 500: mpi_errno = MPIC_Sendrecv(tmp_buf, rem * recvcount, recvtype,
-: 501: dst, MPIR_ALLGATHER_TAG,
-: 502: ((char *)tmp_buf + curr_cnt*recvtype_extent),
-: 503: rem * recvcount, recvtype,
-: 504: src, MPIR_ALLGATHER_TAG, comm,
-: 505: MPI_STATUS_IGNORE);
588775: 506: if (mpi_errno) {
|
#####: 507: MPIU_ERR_POP(mpi_errno);
-: 508: }
-: 509: }
-: 510:
-: 511: /* Rotate blocks in tmp_buf down by (rank) blocks and store
-: 512: * result in recvbuf. */
-: 513:
|
588775: 514: mpi_errno = MPIR_Localcopy(tmp_buf, (comm_size-rank)*recvcount,
-: 515: recvtype, (char *) recvbuf + rank*recvcount*recvtype_extent,
-: 516: (comm_size-rank)*recvcount, recvtype);
588775: 517: if (mpi_errno) {
|
#####: 518: MPIU_ERR_POP(mpi_errno);
-: 519: }
-: 520:
|
588775: 521: if (rank) {
393602: 522: mpi_errno = MPIR_Localcopy((char *) tmp_buf +
-: 523: (comm_size-rank)*recvcount*recvtype_extent,
-: 524: rank*recvcount, recvtype, recvbuf,
-: 525: rank*recvcount, recvtype);
393602: 526: if (mpi_errno) {
|
#####: 527: MPIU_ERR_POP(mpi_errno);
-: 528: }
-: 529: }
-: 530: }
-: 531:
-: 532: else { /* long message or medium-size message and non-power-of-two
-: 533: * no. of processes. use ring algorithm. */
-: 534:
-: 535: /* First, load the "local" version in the recvbuf. */
|
418: 536: if (sendbuf != MPI_IN_PLACE) {
209: 537: mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype,
-: 538: ((char *)recvbuf +
-: 539: rank*recvcount*recvtype_extent),
-: 540: recvcount, recvtype);
209: 541: if (mpi_errno) {
|
#####: 542: MPIU_ERR_POP(mpi_errno);
-: 543: }
-: 544: }
-: 545:
-: 546: /*
-: 547: Now, send left to right. This fills in the receive area in
-: 548: reverse order.
-: 549: */
|
418: 550: left = (comm_size + rank - 1) % comm_size;
418: 551: right = (rank + 1) % comm_size;
-: 552:
418: 553: j = rank;
418: 554: jnext = left;
3674: 555: for (i=1; i<comm_size; i++) {
3256: 556: mpi_errno = MPIC_Sendrecv(((char *)recvbuf +
-: 557: j*recvcount*recvtype_extent),
-: 558: recvcount, recvtype, right,
-: 559: MPIR_ALLGATHER_TAG,
-: 560: ((char *)recvbuf +
-: 561: jnext*recvcount*recvtype_extent),
-: 562: recvcount, recvtype, left,
-: 563: MPIR_ALLGATHER_TAG, comm,
-: 564: MPI_STATUS_IGNORE);
3256: 565: if (mpi_errno) {
|
#####: 566: MPIU_ERR_POP(mpi_errno);
-: 567: }
|
3256: 568: j = jnext;
3256: 569: jnext = (comm_size + jnext - 1) % comm_size;
-: 570: }
-: 571: }
-: 572:
-: 573: /* check if multiple threads are calling this collective function */
-: 574: fn_exit:
588775: 575: MPIU_CHKLMEM_FREEALL();
-: 576: MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
2570802: 577: return (mpi_errno);
-: 578:
|
-: 579: fn_fail:
-: 580: goto fn_exit;
-: 581:}
-: 582:/* end:nested */
-: 583:
-: 584:#undef FUNCNAME
-: 585:#define FUNCNAME MPIR_Allgather_inter
-: 586:#undef FCNAME
-: 587:#define FCNAME MPIU_QUOTE(FUNCNAME)
-: 588:/* begin:nested */
-: 589:/* not declared static because a machine-specific function may call this one
-: 590: in some cases */
-: 591:int MPIR_Allgather_inter (
-: 592: void *sendbuf,
-: 593: int sendcount,
-: 594: MPI_Datatype sendtype,
-: 595: void *recvbuf,
-: 596: int recvcount,
-: 597: MPI_Datatype recvtype,
-: 598: MPID_Comm *comm_ptr )
|
3145: 599:{
-: 600: /* Intercommunicator Allgather.
-: 601: Each group does a gather to local root with the local
-: 602: intracommunicator, and then does an intercommunicator broadcast.
-: 603: */
-: 604:
3145: 605: int rank, local_size, remote_size, mpi_errno = MPI_SUCCESS, root;
3145: 606: MPI_Aint true_extent, true_lb = 0, extent, send_extent;
3145: 607: void *tmp_buf=NULL;
3145: 608: MPID_Comm *newcomm_ptr = NULL;
-: 609:
3145: 610: MPIU_CHKLMEM_DECL(1);
-: 611:
3145: 612: local_size = comm_ptr->local_size;
3145: 613: remote_size = comm_ptr->remote_size;
3145: 614: rank = comm_ptr->rank;
-: 615:
3145: 616: if ((rank == 0) && (sendcount != 0)) {
-: 617: /* In each group, rank 0 allocates temp. buffer for local
-: 618: gather */
854: 619: mpi_errno = NMPI_Type_get_true_extent(sendtype, &true_lb, &true_extent);
854: 620: if (mpi_errno) {
|
#####: 621: MPIU_ERR_POP(mpi_errno);
-: 622: }
|
854: 623: MPID_Datatype_get_extent_macro( sendtype, send_extent );
854: 624: extent = MPIR_MAX(send_extent, true_extent);
-: 625:
-: 626: MPID_Ensure_Aint_fits_in_pointer(extent * sendcount * local_size);
854: 627: MPIU_CHKLMEM_MALLOC(tmp_buf, void*, extent*sendcount*local_size, mpi_errno, "tmp_buf");
-: 628:
-: 629: /* adjust for potential negative lower bound in datatype */
854: 630: tmp_buf = (void *)((char*)tmp_buf - true_lb);
-: 631: }
-: 632:
-: 633: /* Get the local intracommunicator */
3145: 634: if (!comm_ptr->local_comm)
90: 635: MPIR_Setup_intercomm_localcomm( comm_ptr );
-: 636:
3145: 637: newcomm_ptr = comm_ptr->local_comm;
-: 638:
3145: 639: if (sendcount != 0) {
2649: 640: mpi_errno = MPIR_Gather(sendbuf, sendcount, sendtype, tmp_buf, sendcount,
-: 641: sendtype, 0, newcomm_ptr);
2649: 642: if (mpi_errno) {
|
#####: 643: MPIU_ERR_POP(mpi_errno);
-: 644: }
-: 645: }
-: 646:
-: 647: /* first broadcast from left to right group, then from right to
-: 648: left group */
|
3145: 649: if (comm_ptr->is_low_group) {
-: 650: /* bcast to right*/
1107: 651: if (sendcount != 0) {
611: 652: root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
611: 653: mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
-: 654: sendtype, root, comm_ptr);
611: 655: if (mpi_errno) {
|
#####: 656: MPIU_ERR_POP(mpi_errno);
-: 657: }
-: 658: }
-: 659:
-: 660: /* receive bcast from right */
|
1107: 661: if (recvcount != 0) {
1107: 662: root = 0;
1107: 663: mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
-: 664: recvtype, root, comm_ptr);
1107: 665: if (mpi_errno) {
|
#####: 666: MPIU_ERR_POP(mpi_errno);
-: 667: }
-: 668: }
-: 669: }
-: 670: else {
-: 671: /* receive bcast from left */
|
2038: 672: if (recvcount != 0) {
1094: 673: root = 0;
1094: 674: mpi_errno = MPIR_Bcast_inter(recvbuf, recvcount*remote_size,
-: 675: recvtype, root, comm_ptr);
1094: 676: if (mpi_errno) {
|
#####: 677: MPIU_ERR_POP(mpi_errno);
-: 678: }
-: 679: }
-: 680:
-: 681: /* bcast to left */
|
2038: 682: if (sendcount != 0) {
2038: 683: root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
2038: 684: mpi_errno = MPIR_Bcast_inter(tmp_buf, sendcount*local_size,
-: 685: sendtype, root, comm_ptr);
2038: 686: if (mpi_errno) {
|
#####: 687: MPIU_ERR_POP(mpi_errno);
-: 688: }
-: 689: }
-: 690: }
-: 691:
-: 692: fn_exit:
|
854: 693: MPIU_CHKLMEM_FREEALL();
3145: 694: return mpi_errno;
-: 695:
|
-: 696: fn_fail:
-: 697: goto fn_exit;
-: 698:}
-: 699:/* end:nested */
-: 700:#endif
-: 701:
-: 702:#undef FUNCNAME
-: 703:#define FUNCNAME MPI_Allgather
-: 704:#undef FCNAME
-: 705:#define FCNAME MPIU_QUOTE(FUNCNAME)
-: 706:/*@
-: 707:MPI_Allgather - Gathers data from all tasks and distribute the combined
-: 708: data to all tasks
-: 709:
-: 710:Input Parameters:
-: 711:+ sendbuf - starting address of send buffer (choice)
-: 712:. sendcount - number of elements in send buffer (integer)
-: 713:. sendtype - data type of send buffer elements (handle)
-: 714:. recvcount - number of elements received from any process (integer)
-: 715:. recvtype - data type of receive buffer elements (handle)
-: 716:- comm - communicator (handle)
-: 717:
-: 718:Output Parameter:
-: 719:. recvbuf - address of receive buffer (choice)
-: 720:
-: 721:Notes:
-: 722: The MPI standard (1.0 and 1.1) says that
-: 723:.n
-: 724:.n
-: 725: The jth block of data sent from each proess is received by every process
-: 726: and placed in the jth block of the buffer 'recvbuf'.
-: 727:.n
-: 728:.n
-: 729: This is misleading; a better description is
-: 730:.n
-: 731:.n
-: 732: The block of data sent from the jth process is received by every
-: 733: process and placed in the jth block of the buffer 'recvbuf'.
-: 734:.n
-: 735:.n
-: 736: This text was suggested by Rajeev Thakur and has been adopted as a
-: 737: clarification by the MPI Forum.
-: 738:
-: 739:.N ThreadSafe
-: 740:
-: 741:.N Fortran
-: 742:
-: 743:.N Errors
-: 744:.N MPI_ERR_COMM
-: 745:.N MPI_ERR_COUNT
-: 746:.N MPI_ERR_TYPE
-: 747:.N MPI_ERR_BUFFER
-: 748:@*/
-: 749:int MPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-: 750: void *recvbuf, int recvcount, MPI_Datatype recvtype,
-: 751: MPI_Comm comm)
|
2574179: 752:{
2574179: 753: int mpi_errno = MPI_SUCCESS;
2574179: 754: MPID_Comm *comm_ptr = NULL;
2574179: 755: MPIU_THREADPRIV_DECL;
-: 756: MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHER);
-: 757:
2574179: 758: MPIR_ERRTEST_INITIALIZED_ORDIE();
-: 759:
2574179: 760: MPIU_THREAD_CS_ENTER(ALLFUNC,);
-: 761: MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLGATHER);
-: 762:
-: 763: /* Validate parameters, especially handles needing to be converted */
|
-: 764:# ifdef HAVE_ERROR_CHECKING
-: 765: {
-: 766: MPID_BEGIN_ERROR_CHECKS;
-: 767: {
2574179: 768: MPIR_ERRTEST_COMM(comm, mpi_errno);
2574179: 769: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 770: }
-: 771: MPID_END_ERROR_CHECKS;
-: 772: }
-: 773:# endif /* HAVE_ERROR_CHECKING */
-: 774:
-: 775: /* Convert MPI object handles to object pointers */
|
2574177: 776: MPID_Comm_get_ptr( comm, comm_ptr );
-: 777:
-: 778: /* Validate parameters and objects (post conversion) */
|
-: 779:# ifdef HAVE_ERROR_CHECKING
-: 780: {
-: 781: MPID_BEGIN_ERROR_CHECKS;
-: 782: {
2574177: 783: MPID_Datatype *recvtype_ptr=NULL, *sendtype_ptr=NULL;
-: 784:
2574177: 785: MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
2574177: 786: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 787:
2574171: 788: if (comm_ptr->comm_kind == MPID_INTERCOMM)
3145: 789: MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
2574171: 790: if (sendbuf != MPI_IN_PLACE)
-: 791: {
2139817: 792: MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
2139817: 793: MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
2139817: 794: if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN)
-: 795: {
362: 796: MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
362: 797: MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
362: 798: MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
-: 799: }
2139817: 800: MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
-: 801: }
-: 802:
2574171: 803: MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
2574171: 804: MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
2574171: 805: MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
2574171: 806: if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN)
-: 807: {
362: 808: MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
362: 809: MPID_Datatype_valid_ptr( recvtype_ptr, mpi_errno );
362: 810: MPID_Datatype_committed_ptr( recvtype_ptr, mpi_errno );
-: 811: }
2574171: 812: MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
-: 813:
2574171: 814: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 815: }
-: 816: MPID_END_ERROR_CHECKS;
-: 817: }
-: 818:# endif /* HAVE_ERROR_CHECKING */
-: 819:
-: 820: /* ... body of routine ... */
-: 821:
|
2574167: 822: if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Allgather != NULL)
-: 823: {
|
#####: 824: mpi_errno = comm_ptr->coll_fns->Allgather(sendbuf, sendcount,
-: 825: sendtype, recvbuf, recvcount,
-: 826: recvtype, comm_ptr);
-: 827: }
-: 828: else
-: 829: {
|
2574167: 830: MPIU_THREADPRIV_GET;
-: 831:
2574167: 832: MPIR_Nest_incr();
2574167: 833: if (comm_ptr->comm_kind == MPID_INTRACOMM)
-: 834: /* intracommunicator */
2571022: 835: mpi_errno = MPIR_Allgather(sendbuf, sendcount, sendtype,
-: 836: recvbuf, recvcount, recvtype,
-: 837: comm_ptr);
-: 838: else {
-: 839: /* intercommunicator */
3145: 840: mpi_errno = MPIR_Allgather_inter(sendbuf, sendcount, sendtype,
-: 841: recvbuf, recvcount, recvtype,
-: 842: comm_ptr);
-: 843: }
2574167: 844: MPIR_Nest_decr();
-: 845: }
-: 846:
|
2574167: 847: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 848:
-: 849: /* ... end of body of routine ... */
-: 850:
|
2574179: 851: fn_exit:
|
-: 852: MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLGATHER);
|
2574179: 853: MPIU_THREAD_CS_EXIT(ALLFUNC,);
2574179: 854: return mpi_errno;
-: 855:
|
12: 856: fn_fail:
-: 857: /* --BEGIN ERROR HANDLING-- */
-: 858:# ifdef HAVE_ERROR_CHECKING
-: 859: {
12: 860: mpi_errno = MPIR_Err_create_code(
-: 861: mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_allgather",
-: 862: "**mpi_allgather %p %d %D %p %d %D %C", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
-: 863: }
-: 864:# endif
12: 865: mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
12: 866: goto fn_exit;
-: 867: /* --END ERROR HANDLING-- */
-: 868:}
|