-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/coll/alltoall.c
-: 0:Graph:alltoall.gcno
-: 0:Data:alltoall.gcda
-: 0:Runs:681
-: 0:Programs:186
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:/*
-: 3: *
-: 4: * (C) 2001 by Argonne National Laboratory.
-: 5: * See COPYRIGHT in top-level directory.
-: 6: */
-: 7:
-: 8:#include "mpiimpl.h"
-: 9:
-: 10:/* -- Begin Profiling Symbol Block for routine MPI_Alltoall */
-: 11:#if defined(HAVE_PRAGMA_WEAK)
-: 12:#pragma weak MPI_Alltoall = PMPI_Alltoall
-: 13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
-: 14:#pragma _HP_SECONDARY_DEF PMPI_Alltoall MPI_Alltoall
-: 15:#elif defined(HAVE_PRAGMA_CRI_DUP)
-: 16:#pragma _CRI duplicate MPI_Alltoall as PMPI_Alltoall
-: 17:#endif
-: 18:/* -- End Profiling Symbol Block */
-: 19:
-: 20:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
-: 21: the MPI routines */
-: 22:#ifndef MPICH_MPI_FROM_PMPI
-: 23:#undef MPI_Alltoall
-: 24:#define MPI_Alltoall PMPI_Alltoall
-: 25:
-: 26:/* This is the default implementation of alltoall. The algorithm is:
-: 27:
-: 28: Algorithm: MPI_Alltoall
-: 29:
-: 30: We use four algorithms for alltoall. For short messages and
-: 31: (comm_size >= 8), we use the algorithm by Jehoshua Bruck et al,
-: 32: IEEE TPDS, Nov. 1997. It is a store-and-forward algorithm that
-: 33: takes lgp steps. Because of the extra communication, the bandwidth
-: 34: requirement is (n/2).lgp.beta.
-: 35:
-: 36: Cost = lgp.alpha + (n/2).lgp.beta
-: 37:
-: 38: where n is the total amount of data a process needs to send to all
-: 39: other processes.
-: 40:
-: 41: For medium size messages and (short messages for comm_size < 8), we
-: 42: use an algorithm that posts all irecvs and isends and then does a
-: 43: waitall. We scatter the order of sources and destinations among the
-: 44: processes, so that all processes don't try to send/recv to/from the
-: 45: same process at the same time.
-: 46:
-: 47: *** Modification: We post only a small number of isends and irecvs
-: 48: at a time and wait on them as suggested by Tony Ladd. ***
-: 49:
-: 50: For long messages and power-of-two number of processes, we use a
-: 51: pairwise exchange algorithm, which takes p-1 steps. We
-: 52: calculate the pairs by using an exclusive-or algorithm:
-: 53: for (i=1; i<comm_size; i++)
-: 54: dest = rank ^ i;
-: 55: This algorithm doesn't work if the number of processes is not a power of
-: 56: two. For a non-power-of-two number of processes, we use an
-: 57: algorithm in which, in step i, each process receives from (rank-i)
-: 58: and sends to (rank+i).
-: 59:
-: 60: Cost = (p-1).alpha + n.beta
-: 61:
-: 62: where n is the total amount of data a process needs to send to all
-: 63: other processes.
-: 64:
-: 65: Possible improvements:
-: 66:
-: 67: End Algorithm: MPI_Alltoall
-: 68:*/
-: 69:
-: 70:/* begin:nested */
-: 71:/* not declared static because a machine-specific function may call this one in some cases */
-: 72:int MPIR_Alltoall(
-: 73: void *sendbuf,
-: 74: int sendcount,
-: 75: MPI_Datatype sendtype,
-: 76: void *recvbuf,
-: 77: int recvcount,
-: 78: MPI_Datatype recvtype,
-: 79: MPID_Comm *comm_ptr )
4460: 80:{
-: 81: static const char FCNAME[] = "MPIR_Alltoall";
-: 82: int comm_size, i, j, pof2;
-: 83: MPI_Aint sendtype_extent, recvtype_extent;
-: 84: MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb;
4460: 85: int mpi_errno=MPI_SUCCESS, src, dst, rank, nbytes;
-: 86: MPI_Status status;
-: 87: int sendtype_size, pack_size, block, position, *displs, count;
-: 88: MPI_Datatype newtype;
-: 89: void *tmp_buf;
-: 90: MPI_Comm comm;
-: 91: MPI_Request *reqarray;
-: 92: MPI_Status *starray;
-: 93:#ifdef MPIR_OLD_SHORT_ALLTOALL_ALG
-: 94: MPI_Aint sendtype_true_extent, sendbuf_extent, sendtype_true_lb;
-: 95: int k, p, curr_cnt, dst_tree_root, my_tree_root;
-: 96: int last_recv_cnt, mask, tmp_mask, tree_root, nprocs_completed;
-: 97:#endif
-: 98:
4460: 99: if (recvcount == 0) return MPI_SUCCESS;
-: 100:
4250: 101: comm = comm_ptr->handle;
4250: 102: comm_size = comm_ptr->local_size;
4250: 103: rank = comm_ptr->rank;
-: 104:
-: 105: /* Get extent of send and recv types */
4250: 106: MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
4250: 107: MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
-: 108:
4250: 109: MPID_Datatype_get_size_macro(sendtype, sendtype_size);
4250: 110: nbytes = sendtype_size * sendcount;
-: 111:
-: 112: /* check if multiple threads are calling this collective function */
-: 113: MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-: 114:
4250: 115: if (sendbuf == MPI_IN_PLACE) {
-: 116: /* We use pair-wise sendrecv_replace in order to conserve memory usage,
-: 117: * which is keeping with the spirit of the MPI-2.2 Standard. But
-: 118: * because of this approach all processes must agree on the global
-: 119: * schedule of sendrecv_replace operations to avoid deadlock.
-: 120: *
-: 121: * Note that this is not an especially efficient algorithm in terms of
-: 122: * time and there will be multiple repeated malloc/free's rather than
-: 123: * maintaining a single buffer across the whole loop. Something like
-: 124: * MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
6144: 125: for (i = 0; i < comm_size; ++i) {
-: 126: /* start inner loop at i to avoid re-exchanging data */
26800: 127: for (j = i; j < comm_size; ++j) {
21456: 128: if (rank == i) {
-: 129: /* also covers the (rank == i && rank == j) case */
3072: 130: mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + j*recvcount*recvtype_extent),
-: 131: recvcount, recvtype,
-: 132: j, MPIR_ALLTOALL_TAG,
-: 133: j, MPIR_ALLTOALL_TAG,
-: 134: comm, &status);
|
3072: 135: if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-: 136: }
|
18384: 137: else if (rank == j) {
-: 138: /* same as above with i/j args reversed */
2272: 139: mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + i*recvcount*recvtype_extent),
-: 140: recvcount, recvtype,
-: 141: i, MPIR_ALLTOALL_TAG,
-: 142: i, MPIR_ALLTOALL_TAG,
-: 143: comm, &status);
|
2272: 144: if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-: 145: }
-: 146: }
-: 147: }
-: 148: }
|
3450: 149: else if ((nbytes <= MPIR_ALLTOALL_SHORT_MSG) && (comm_size >= 8)) {
-: 150:
-: 151: /* use the indexing algorithm by Jehoshua Bruck et al,
-: 152: * IEEE TPDS, Nov. 97 */
-: 153:
-: 154: /* allocate temporary buffer */
168: 155: NMPI_Pack_size(recvcount*comm_size, recvtype, comm, &pack_size);
168: 156: tmp_buf = MPIU_Malloc(pack_size);
|
-: 157: /* --BEGIN ERROR HANDLING-- */
168: 158: if (!tmp_buf) {
#####: 159: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
#####: 160: return mpi_errno;
-: 161: }
-: 162: /* --END ERROR HANDLING-- */
-: 163:
-: 164: /* Do Phase 1 of the algorithim. Shift the data blocks on process i
-: 165: * upwards by a distance of i blocks. Store the result in recvbuf. */
|
168: 166: mpi_errno = MPIR_Localcopy((char *) sendbuf +
-: 167: rank*sendcount*sendtype_extent,
-: 168: (comm_size - rank)*sendcount, sendtype, recvbuf,
-: 169: (comm_size - rank)*recvcount, recvtype);
|
168: 170: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
|
168: 171: mpi_errno = MPIR_Localcopy(sendbuf, rank*sendcount, sendtype,
-: 172: (char *) recvbuf +
-: 173: (comm_size-rank)*recvcount*recvtype_extent,
-: 174: rank*recvcount, recvtype);
|
168: 175: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 176: /* Input data is now stored in recvbuf with datatype recvtype */
-: 177:
-: 178: /* Now do Phase 2, the communication phase. It takes
-: 179: ceiling(lg p) steps. In each step i, each process sends to rank+2^i
-: 180: and receives from rank-2^i, and exchanges all data blocks
-: 181: whose ith bit is 1. */
-: 182:
-: 183: /* allocate displacements array for indexed datatype used in
-: 184: communication */
-: 185:
|
168: 186: displs = MPIU_Malloc(comm_size * sizeof(int));
|
-: 187: /* --BEGIN ERROR HANDLING-- */
168: 188: if (!displs) {
#####: 189: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
#####: 190: return mpi_errno;
-: 191: }
-: 192: /* --END ERROR HANDLING-- */
-: 193:
|
168: 194: pof2 = 1;
840: 195: while (pof2 < comm_size) {
504: 196: dst = (rank + pof2) % comm_size;
504: 197: src = (rank - pof2 + comm_size) % comm_size;
-: 198:
-: 199: /* Exchange all data blocks whose ith bit is 1 */
-: 200: /* Create an indexed datatype for the purpose */
-: 201:
504: 202: count = 0;
4032: 203: for (block=1; block<comm_size; block++) {
3528: 204: if (block & pof2) {
2016: 205: displs[count] = block * recvcount;
2016: 206: count++;
-: 207: }
-: 208: }
-: 209:
504: 210: mpi_errno = NMPI_Type_create_indexed_block(count, recvcount,
-: 211: displs, recvtype, &newtype);
|
504: 212: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 213:
|
504: 214: mpi_errno = NMPI_Type_commit(&newtype);
|
504: 215: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 216:
|
504: 217: position = 0;
504: 218: mpi_errno = NMPI_Pack(recvbuf, 1, newtype, tmp_buf, pack_size,
-: 219: &position, comm);
-: 220:
504: 221: mpi_errno = MPIC_Sendrecv(tmp_buf, position, MPI_PACKED, dst,
-: 222: MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
-: 223: src, MPIR_ALLTOALL_TAG, comm,
-: 224: MPI_STATUS_IGNORE);
|
504: 225: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 226:
|
504: 227: mpi_errno = NMPI_Type_free(&newtype);
|
504: 228: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 229:
|
504: 230: pof2 *= 2;
-: 231: }
-: 232:
168: 233: MPIU_Free(displs);
168: 234: MPIU_Free(tmp_buf);
-: 235:
-: 236: /* Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need
-: 237: * a temporary buffer of the same size as recvbuf. */
-: 238:
-: 239: /* get true extent of recvtype */
168: 240: mpi_errno = NMPI_Type_get_true_extent(recvtype, &recvtype_true_lb,
-: 241: &recvtype_true_extent);
|
168: 242: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 243:
|
168: 244: recvbuf_extent = recvcount * comm_size *
-: 245: (MPIR_MAX(recvtype_true_extent, recvtype_extent));
168: 246: tmp_buf = MPIU_Malloc(recvbuf_extent);
|
-: 247: /* --BEGIN ERROR HANDLING-- */
168: 248: if (!tmp_buf) {
#####: 249: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
#####: 250: return mpi_errno;
-: 251: }
-: 252: /* --END ERROR HANDLING-- */
-: 253: /* adjust for potential negative lower bound in datatype */
|
168: 254: tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
-: 255:
168: 256: mpi_errno = MPIR_Localcopy((char *) recvbuf + (rank+1)*recvcount*recvtype_extent,
-: 257: (comm_size - rank - 1)*recvcount, recvtype, tmp_buf,
-: 258: (comm_size - rank - 1)*recvcount, recvtype);
|
168: 259: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
|
168: 260: mpi_errno = MPIR_Localcopy(recvbuf, (rank+1)*recvcount, recvtype,
-: 261: (char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent,
-: 262: (rank+1)*recvcount, recvtype);
|
168: 263: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 264:
-: 265: /* Blocks are in the reverse order now (comm_size-1 to 0).
-: 266: * Reorder them to (0 to comm_size-1) and store them in recvbuf. */
-: 267:
|
1512: 268: for (i=0; i<comm_size; i++)
1344: 269: MPIR_Localcopy((char *) tmp_buf + i*recvcount*recvtype_extent,
-: 270: recvcount, recvtype,
-: 271: (char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent,
-: 272: recvcount, recvtype);
-: 273:
168: 274: MPIU_Free((char*)tmp_buf + recvtype_true_lb);
-: 275:
-: 276:
-: 277:
-: 278:#ifdef MPIR_OLD_SHORT_ALLTOALL_ALG
-: 279: /* Short message. Use recursive doubling. Each process sends all
-: 280: its data at each step along with all data it received in
-: 281: previous steps. */
-: 282:
-: 283: /* need to allocate temporary buffer of size
-: 284: sendbuf_extent*comm_size */
-: 285:
-: 286: /* get true extent of sendtype */
-: 287: mpi_errno = NMPI_Type_get_true_extent(sendtype, &sendtype_true_lb,
-: 288: &sendtype_true_extent);
|
-: 289: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 290:
-: 291: sendbuf_extent = sendcount * comm_size *
-: 292: (MPIR_MAX(sendtype_true_extent, sendtype_extent));
-: 293: tmp_buf = MPIU_Malloc(sendbuf_extent*comm_size);
-: 294: /* --BEGIN ERROR HANDLING-- */
-: 295: if (!tmp_buf) {
-: 296: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
-: 297: return mpi_errno;
-: 298: }
-: 299: /* --END ERROR HANDLING-- */
-: 300:
-: 301: /* adjust for potential negative lower bound in datatype */
-: 302: tmp_buf = (void *)((char*)tmp_buf - sendtype_true_lb);
-: 303:
-: 304: /* copy local sendbuf into tmp_buf at location indexed by rank */
-: 305: curr_cnt = sendcount*comm_size;
-: 306: mpi_errno = MPIR_Localcopy(sendbuf, curr_cnt, sendtype,
-: 307: ((char *)tmp_buf + rank*sendbuf_extent),
-: 308: curr_cnt, sendtype);
-: 309: if (mpi_errno) { MPIU_ERR_POP(mpi_errno);}
-: 310:
-: 311: mask = 0x1;
-: 312: i = 0;
-: 313: while (mask < comm_size) {
-: 314: dst = rank ^ mask;
-: 315:
-: 316: dst_tree_root = dst >> i;
-: 317: dst_tree_root <<= i;
-: 318:
-: 319: my_tree_root = rank >> i;
-: 320: my_tree_root <<= i;
-: 321:
-: 322: if (dst < comm_size) {
-: 323: mpi_errno = MPIC_Sendrecv(((char *)tmp_buf +
-: 324: my_tree_root*sendbuf_extent),
-: 325: curr_cnt, sendtype,
-: 326: dst, MPIR_ALLTOALL_TAG,
-: 327: ((char *)tmp_buf +
-: 328: dst_tree_root*sendbuf_extent),
-: 329: sendbuf_extent*(comm_size-dst_tree_root),
-: 330: sendtype, dst, MPIR_ALLTOALL_TAG,
-: 331: comm, &status);
-: 332: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 333:
-: 334: /* in case of non-power-of-two nodes, less data may be
-: 335: received than specified */
-: 336: NMPI_Get_count(&status, sendtype, &last_recv_cnt);
-: 337: curr_cnt += last_recv_cnt;
-: 338: }
-: 339:
-: 340: /* if some processes in this process's subtree in this step
-: 341: did not have any destination process to communicate with
-: 342: because of non-power-of-two, we need to send them the
-: 343: result. We use a logarithmic recursive-halfing algorithm
-: 344: for this. */
-: 345:
-: 346: if (dst_tree_root + mask > comm_size) {
-: 347: nprocs_completed = comm_size - my_tree_root - mask;
-: 348: /* nprocs_completed is the number of processes in this
-: 349: subtree that have all the data. Send data to others
-: 350: in a tree fashion. First find root of current tree
-: 351: that is being divided into two. k is the number of
-: 352: least-significant bits in this process's rank that
-: 353: must be zeroed out to find the rank of the root */
-: 354: j = mask;
-: 355: k = 0;
-: 356: while (j) {
-: 357: j >>= 1;
-: 358: k++;
-: 359: }
-: 360: k--;
-: 361:
-: 362: tmp_mask = mask >> 1;
-: 363: while (tmp_mask) {
-: 364: dst = rank ^ tmp_mask;
-: 365:
-: 366: tree_root = rank >> k;
-: 367: tree_root <<= k;
-: 368:
-: 369: /* send only if this proc has data and destination
-: 370: doesn't have data. at any step, multiple processes
-: 371: can send if they have the data */
-: 372: if ((dst > rank) &&
-: 373: (rank < tree_root + nprocs_completed)
-: 374: && (dst >= tree_root + nprocs_completed)) {
-: 375: /* send the data received in this step above */
-: 376: mpi_errno = MPIC_Send(((char *)tmp_buf +
-: 377: dst_tree_root*sendbuf_extent),
-: 378: last_recv_cnt, sendtype,
-: 379: dst, MPIR_ALLTOALL_TAG,
-: 380: comm);
-: 381: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 382: }
-: 383: /* recv only if this proc. doesn't have data and sender
-: 384: has data */
-: 385: else if ((dst < rank) &&
-: 386: (dst < tree_root + nprocs_completed) &&
-: 387: (rank >= tree_root + nprocs_completed)) {
-: 388: mpi_errno = MPIC_Recv(((char *)tmp_buf +
-: 389: dst_tree_root*sendbuf_extent),
-: 390: sendbuf_extent*(comm_size-dst_tree_root),
-: 391: sendtype,
-: 392: dst, MPIR_ALLTOALL_TAG,
-: 393: comm, &status);
-: 394: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 395: NMPI_Get_count(&status, sendtype, &last_recv_cnt);
-: 396: curr_cnt += last_recv_cnt;
-: 397: }
-: 398: tmp_mask >>= 1;
-: 399: k--;
-: 400: }
-: 401: }
-: 402:
-: 403: mask <<= 1;
-: 404: i++;
-: 405: }
-: 406:
-: 407: /* now copy everyone's contribution from tmp_buf to recvbuf */
-: 408: for (p=0; p<comm_size; p++) {
-: 409: mpi_errno = MPIR_Localcopy(((char *)tmp_buf +
-: 410: p*sendbuf_extent +
-: 411: rank*sendcount*sendtype_extent),
-: 412: sendcount, sendtype,
-: 413: ((char*)recvbuf +
-: 414: p*recvcount*recvtype_extent),
-: 415: recvcount, recvtype);
-: 416: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 417: }
-: 418:
-: 419: MPIU_Free((char *)tmp_buf+sendtype_true_lb);
-: 420:#endif
-: 421:
-: 422: }
-: 423:
|
3282: 424: else if (nbytes <= MPIR_ALLTOALL_MEDIUM_MSG) {
-: 425: /* Medium-size message. Use isend/irecv with scattered
-: 426: destinations. Use Tony Ladd's modification to post only
-: 427: a small number of isends/irecvs at a time. */
-: 428: int ii, ss, bblock;
-: 429:
3182: 430: bblock = MPIR_ALLTOALL_THROTTLE;
3182: 431: if (bblock == 0) bblock = comm_size;
-: 432:
3182: 433: reqarray = (MPI_Request *) MPIU_Malloc(2*bblock*sizeof(MPI_Request));
|
-: 434: /* --BEGIN ERROR HANDLING-- */
3182: 435: if (!reqarray) {
#####: 436: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
#####: 437: return mpi_errno;
-: 438: }
-: 439: /* --END ERROR HANDLING-- */
-: 440:
|
3182: 441: starray = (MPI_Status *) MPIU_Malloc(2*bblock*sizeof(MPI_Status));
|
-: 442: /* --BEGIN ERROR HANDLING-- */
3182: 443: if (!starray) {
#####: 444: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
#####: 445: return mpi_errno;
-: 446: }
-: 447: /* --END ERROR HANDLING-- */
-: 448:
|
6790: 449: for (ii=0; ii<comm_size; ii+=bblock) {
3608: 450: ss = comm_size-ii < bblock ? comm_size-ii : bblock;
-: 451: /* do the communication -- post ss sends and receives: */
11948: 452: for ( i=0; i<ss; i++ ) {
8340: 453: dst = (rank+i+ii) % comm_size;
8340: 454: mpi_errno = MPIC_Irecv((char *)recvbuf +
-: 455: dst*recvcount*recvtype_extent,
-: 456: recvcount, recvtype, dst,
-: 457: MPIR_ALLTOALL_TAG, comm,
-: 458: &reqarray[i]);
|
8340: 459: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 460: }
-: 461:
|
11948: 462: for ( i=0; i<ss; i++ ) {
8340: 463: dst = (rank-i-ii+comm_size) % comm_size;
8340: 464: mpi_errno = MPIC_Isend((char *)sendbuf +
-: 465: dst*sendcount*sendtype_extent,
-: 466: sendcount, sendtype, dst,
-: 467: MPIR_ALLTOALL_TAG, comm,
-: 468: &reqarray[i+ss]);
|
8340: 469: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 470: }
-: 471:
-: 472: /* ... then wait for them to finish: */
|
3608: 473: mpi_errno = NMPI_Waitall(2*ss,reqarray,starray);
|
-: 474: /* --BEGIN ERROR HANDLING-- */
3608: 475: if (mpi_errno == MPI_ERR_IN_STATUS) {
#####: 476: for (j=0; j<2*ss; j++) {
#####: 477: if (starray[j].MPI_ERROR != MPI_SUCCESS)
#####: 478: mpi_errno = starray[j].MPI_ERROR;
-: 479: }
-: 480: }
-: 481: /* --END ERROR HANDLING-- */
-: 482: }
|
3182: 483: MPIU_Free(starray);
3182: 484: MPIU_Free(reqarray);
-: 485: }
-: 486:
-: 487: else {
-: 488: /* Long message. If comm_size is a power-of-two, do a pairwise
-: 489: exchange using exclusive-or to create pairs. Else send to
-: 490: rank+i, receive from rank-i. */
-: 491:
-: 492: /* Make local copy first */
100: 493: mpi_errno = MPIR_Localcopy(((char *)sendbuf +
-: 494: rank*sendcount*sendtype_extent),
-: 495: sendcount, sendtype,
-: 496: ((char *)recvbuf +
-: 497: rank*recvcount*recvtype_extent),
-: 498: recvcount, recvtype);
|
100: 499: if (mpi_errno) { MPIU_ERR_POP(mpi_errno ); }
-: 500:
-: 501: /* Is comm_size a power-of-two? */
|
100: 502: i = 1;
484: 503: while (i < comm_size)
284: 504: i *= 2;
100: 505: if (i == comm_size)
64: 506: pof2 = 1;
-: 507: else
36: 508: pof2 = 0;
-: 509:
-: 510: /* Do the pairwise exchanges */
668: 511: for (i=1; i<comm_size; i++) {
568: 512: if (pof2 == 1) {
-: 513: /* use exclusive-or algorithm */
384: 514: src = dst = rank ^ i;
-: 515: }
-: 516: else {
184: 517: src = (rank - i + comm_size) % comm_size;
184: 518: dst = (rank + i) % comm_size;
-: 519: }
-: 520:
568: 521: mpi_errno = MPIC_Sendrecv(((char *)sendbuf +
-: 522: dst*sendcount*sendtype_extent),
-: 523: sendcount, sendtype, dst,
-: 524: MPIR_ALLTOALL_TAG,
-: 525: ((char *)recvbuf +
-: 526: src*recvcount*recvtype_extent),
-: 527: recvcount, recvtype, src,
-: 528: MPIR_ALLTOALL_TAG, comm, &status);
|
568: 529: if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-: 530: }
-: 531: }
-: 532:
4250: 533: fn_fail:
-: 534: /* check if multiple threads are calling this collective function */
-: 535: MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-: 536:
|
4250: 537: return (mpi_errno);
-: 538:}
-: 539:/* end:nested */
-: 540:
-: 541:/* begin:nested */
-: 542:/* not declared static because a machine-specific function may call this one in some cases */
-: 543:int MPIR_Alltoall_inter(
-: 544: void *sendbuf,
-: 545: int sendcount,
-: 546: MPI_Datatype sendtype,
-: 547: void *recvbuf,
-: 548: int recvcount,
-: 549: MPI_Datatype recvtype,
-: 550: MPID_Comm *comm_ptr )
1530: 551:{
-: 552:/* Intercommunicator alltoall. We use a pairwise exchange algorithm
-: 553: similar to the one used in intracommunicator alltoall for long
-: 554: messages. Since the local and remote groups can be of different
-: 555: sizes, we first compute the max of local_group_size,
-: 556: remote_group_size. At step i, 0 <= i < max_size, each process
-: 557: receives from src = (rank - i + max_size) % max_size if src <
-: 558: remote_size, and sends to dst = (rank + i) % max_size if dst <
-: 559: remote_size.
-: 560:*/
-: 561: static const char FCNAME[] = "MPIR_Alltoall_inter";
-: 562: int local_size, remote_size, max_size, i;
-: 563: MPI_Aint sendtype_extent, recvtype_extent;
1530: 564: int mpi_errno = MPI_SUCCESS;
-: 565: MPI_Status status;
-: 566: int src, dst, rank;
-: 567: char *sendaddr, *recvaddr;
-: 568: MPI_Comm comm;
-: 569:
1530: 570: local_size = comm_ptr->local_size;
1530: 571: remote_size = comm_ptr->remote_size;
1530: 572: rank = comm_ptr->rank;
1530: 573: comm = comm_ptr->handle;
-: 574:
-: 575: /* Get extent of send and recv types */
1530: 576: MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
1530: 577: MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
-: 578:
-: 579: /* check if multiple threads are calling this collective function */
-: 580: MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-: 581:
-: 582: /* Do the pairwise exchanges */
1530: 583: max_size = MPIR_MAX(local_size, remote_size);
-: 584: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
-: 585: max_size*recvcount*recvtype_extent);
-: 586: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
-: 587: max_size*sendcount*sendtype_extent);
7344: 588: for (i=0; i<max_size; i++) {
5814: 589: src = (rank - i + max_size) % max_size;
5814: 590: dst = (rank + i) % max_size;
5814: 591: if (src >= remote_size) {
1972: 592: src = MPI_PROC_NULL;
1972: 593: recvaddr = NULL;
-: 594: }
-: 595: else {
3842: 596: recvaddr = (char *)recvbuf + src*recvcount*recvtype_extent;
-: 597: }
5814: 598: if (dst >= remote_size) {
1972: 599: dst = MPI_PROC_NULL;
1972: 600: sendaddr = NULL;
-: 601: }
-: 602: else {
3842: 603: sendaddr = (char *)sendbuf + dst*sendcount*sendtype_extent;
-: 604: }
-: 605:
5814: 606: mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, dst,
-: 607: MPIR_ALLTOALL_TAG, recvaddr,
-: 608: recvcount, recvtype, src,
-: 609: MPIR_ALLTOALL_TAG, comm, &status);
|
-: 610: /* --BEGIN ERROR HANDLING-- */
5814: 611: if (mpi_errno)
-: 612: {
#####: 613: mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
#####: 614: return mpi_errno;
-: 615: }
-: 616: /* --END ERROR HANDLING-- */
-: 617: }
-: 618:
-: 619: /* check if multiple threads are calling this collective function */
-: 620: MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-: 621:
|
1530: 622: return (mpi_errno);
-: 623:}
-: 624:/* end:nested */
-: 625:#endif
-: 626:
-: 627:#undef FUNCNAME
-: 628:#define FUNCNAME MPI_Alltoall
-: 629:
-: 630:/*@
-: 631:MPI_Alltoall - Sends data from all to all processes
-: 632:
-: 633:Input Parameters:
-: 634:+ sendbuf - starting address of send buffer (choice)
-: 635:. sendcount - number of elements to send to each process (integer)
-: 636:. sendtype - data type of send buffer elements (handle)
-: 637:. recvcount - number of elements received from any process (integer)
-: 638:. recvtype - data type of receive buffer elements (handle)
-: 639:- comm - communicator (handle)
-: 640:
-: 641:Output Parameter:
-: 642:. recvbuf - address of receive buffer (choice)
-: 643:
-: 644:.N ThreadSafe
-: 645:
-: 646:.N Fortran
-: 647:
-: 648:.N Errors
-: 649:.N MPI_ERR_COMM
-: 650:.N MPI_ERR_COUNT
-: 651:.N MPI_ERR_TYPE
-: 652:.N MPI_ERR_BUFFER
-: 653:@*/
-: 654:int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-: 655: void *recvbuf, int recvcount, MPI_Datatype recvtype,
-: 656: MPI_Comm comm)
5998: 657:{
-: 658: static const char FCNAME[] = "MPI_Alltoall";
5998: 659: int mpi_errno = MPI_SUCCESS;
5998: 660: MPID_Comm *comm_ptr = NULL;
5998: 661: MPIU_THREADPRIV_DECL;
-: 662: MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALL);
-: 663:
5998: 664: MPIR_ERRTEST_INITIALIZED_ORDIE();
-: 665:
5998: 666: MPIU_THREAD_CS_ENTER(ALLFUNC,);
-: 667: MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLTOALL);
-: 668:
-: 669: /* Validate parameters, especially handles needing to be converted */
|
-: 670:# ifdef HAVE_ERROR_CHECKING
-: 671: {
-: 672: MPID_BEGIN_ERROR_CHECKS;
-: 673: {
5998: 674: MPIR_ERRTEST_COMM(comm, mpi_errno);
5998: 675: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 676: }
-: 677: MPID_END_ERROR_CHECKS;
-: 678: }
-: 679:# endif /* HAVE_ERROR_CHECKING */
-: 680:
-: 681: /* Convert MPI object handles to object pointers */
|
5996: 682: MPID_Comm_get_ptr( comm, comm_ptr );
-: 683:
-: 684: /* Validate parameters and objects (post conversion) */
|
-: 685:# ifdef HAVE_ERROR_CHECKING
-: 686: {
-: 687: MPID_BEGIN_ERROR_CHECKS;
-: 688: {
5996: 689: MPID_Datatype *sendtype_ptr=NULL, *recvtype_ptr=NULL;
-: 690:
5996: 691: MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
5996: 692: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
5994: 693: MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
5994: 694: MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
5994: 695: MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
5994: 696: MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
5994: 697: if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
362: 698: MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
362: 699: MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
362: 700: MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
-: 701: }
5994: 702: if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
362: 703: MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
362: 704: MPID_Datatype_valid_ptr( recvtype_ptr, mpi_errno );
362: 705: MPID_Datatype_committed_ptr( recvtype_ptr, mpi_errno );
-: 706: }
-: 707:
5994: 708: if (comm_ptr->comm_kind == MPID_INTERCOMM) {
1530: 709: MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
-: 710: }
5994: 711: MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
5994: 712: MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
5994: 713: MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
-: 714:
5994: 715: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 716: }
-: 717: MPID_END_ERROR_CHECKS;
-: 718: }
-: 719:# endif /* HAVE_ERROR_CHECKING */
-: 720:
-: 721: /* ... body of routine ... */
-: 722:
|
5990: 723: if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoall != NULL)
-: 724: {
|
#####: 725: mpi_errno = comm_ptr->coll_fns->Alltoall(sendbuf, sendcount,
-: 726: sendtype, recvbuf, recvcount,
-: 727: recvtype, comm_ptr);
-: 728: }
-: 729: else
-: 730: {
|
5990: 731: MPIU_THREADPRIV_GET;
-: 732:
5990: 733: MPIR_Nest_incr();
5990: 734: if (comm_ptr->comm_kind == MPID_INTRACOMM)
-: 735: /* intracommunicator */
4460: 736: mpi_errno = MPIR_Alltoall(sendbuf, sendcount, sendtype,
-: 737: recvbuf, recvcount, recvtype, comm_ptr);
-: 738: else {
-: 739: /* intercommunicator */
1530: 740: mpi_errno = MPIR_Alltoall_inter(sendbuf, sendcount,
-: 741: sendtype, recvbuf,
-: 742: recvcount, recvtype,
-: 743: comm_ptr);
-: 744: }
5990: 745: MPIR_Nest_decr();
-: 746: }
-: 747:
|
5990: 748: if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-: 749:
-: 750: /* ... end of body of routine ... */
-: 751:
|
5998: 752: fn_exit:
|
-: 753: MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLTOALL);
|
5998: 754: MPIU_THREAD_CS_EXIT(ALLFUNC,);
5998: 755: return mpi_errno;
-: 756:
|
8: 757: fn_fail:
-: 758: /* --BEGIN ERROR HANDLING-- */
-: 759:# ifdef HAVE_ERROR_CHECKING
-: 760: {
8: 761: mpi_errno = MPIR_Err_create_code(
-: 762: mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_alltoall",
-: 763: "**mpi_alltoall %p %d %D %p %d %D %C", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
-: 764: }
-: 765:# endif
8: 766: mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
8: 767: goto fn_exit;
-: 768: /* --END ERROR HANDLING-- */
-: 769:}
|