-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/coll/alltoallw.c
        -:    0:Graph:alltoallw.gcno
        -:    0:Data:alltoallw.gcda
        -:    0:Runs:678
        -:    0:Programs:178
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:/*
        -:    3: *
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include "mpiimpl.h"
        -:    9:
        -:   10:/* -- Begin Profiling Symbol Block for routine MPI_Alltoallw */
        -:   11:#if defined(HAVE_PRAGMA_WEAK)
        -:   12:#pragma weak MPI_Alltoallw = PMPI_Alltoallw
        -:   13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
        -:   14:#pragma _HP_SECONDARY_DEF PMPI_Alltoallw  MPI_Alltoallw
        -:   15:#elif defined(HAVE_PRAGMA_CRI_DUP)
        -:   16:#pragma _CRI duplicate MPI_Alltoallw as PMPI_Alltoallw
        -:   17:#endif
        -:   18:/* -- End Profiling Symbol Block */
        -:   19:
        -:   20:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
        -:   21:   the MPI routines */
        -:   22:#ifndef MPICH_MPI_FROM_PMPI
        -:   23:#undef MPI_Alltoallw
        -:   24:#define MPI_Alltoallw PMPI_Alltoallw
        -:   25:/* This is the default implementation of alltoallw. The algorithm is:
        -:   26:   
        -:   27:   Algorithm: MPI_Alltoallw
        -:   28:
        -:   29:   Since each process sends/receives different amounts of data to
        -:   30:   every other process, we don't know the total message size for all
        -:   31:   processes without additional communication. Therefore we simply use
        -:   32:   the "middle of the road" isend/irecv algorithm that works
        -:   33:   reasonably well in all cases.
        -:   34:
        -:   35:   We post all irecvs and isends and then do a waitall. We scatter the
        -:   36:   order of sources and destinations among the processes, so that all
        -:   37:   processes don't try to send/recv to/from the same process at the
        -:   38:   same time. 
        -:   39:
        -:   40:   *** Modification: We post only a small number of isends and irecvs 
        -:   41:   at a time and wait on them as suggested by Tony Ladd. ***
        -:   42:
        -:   43:   Possible improvements: 
        -:   44:
        -:   45:   End Algorithm: MPI_Alltoallw
        -:   46:*/
        -:   47:/* begin:nested */
        -:   48:/* not declared static because a machine-specific function may call this one in some cases */
        -:   49:int MPIR_Alltoallw ( 
        -:   50:	void *sendbuf, 
        -:   51:	int *sendcnts, 
        -:   52:	int *sdispls, 
        -:   53:	MPI_Datatype *sendtypes, 
        -:   54:	void *recvbuf, 
        -:   55:	int *recvcnts, 
        -:   56:	int *rdispls, 
        -:   57:	MPI_Datatype *recvtypes, 
        -:   58:	MPID_Comm *comm_ptr )
      265:   59:{
        -:   60:    static const char FCNAME[] = "MPIR_Alltoallw";
        -:   61:    int        comm_size, i, j;
      265:   62:    int        mpi_errno = MPI_SUCCESS;
        -:   63:    MPI_Status status;
        -:   64:    MPI_Status *starray;
        -:   65:    MPI_Request *reqarray;
        -:   66:    int dst, rank;
        -:   67:    MPI_Comm comm;
        -:   68:    int outstanding_requests;
        -:   69:    int ii, ss, bblock;
        -:   70:    int type_size;
        -:   71:
      265:   72:    MPIU_CHKLMEM_DECL(2);
        -:   73:    
      265:   74:    comm = comm_ptr->handle;
      265:   75:    comm_size = comm_ptr->local_size;
      265:   76:    rank = comm_ptr->rank;
        -:   77:    
        -:   78:    /* check if multiple threads are calling this collective function */
        -:   79:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
        -:   80:
      265:   81:    if (sendbuf == MPI_IN_PLACE) {
        -:   82:        /* We use pair-wise sendrecv_replace in order to conserve memory usage,
        -:   83:         * which is keeping with the spirit of the MPI-2.2 Standard.  But
        -:   84:         * because of this approach all processes must agree on the global
        -:   85:         * schedule of sendrecv_replace operations to avoid deadlock.
        -:   86:         *
        -:   87:         * Note that this is not an especially efficient algorithm in terms of
        -:   88:         * time and there will be multiple repeated malloc/free's rather than
        -:   89:         * maintaining a single buffer across the whole loop.  Something like
        -:   90:         * MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
      730:   91:        for (i = 0; i < comm_size; ++i) {
        -:   92:            /* start inner loop at i to avoid re-exchanging data */
     3759:   93:            for (j = i; j < comm_size; ++j) {
     3110:   94:                if (rank == i) {
        -:   95:                    /* also covers the (rank == i && rank == j) case */
      365:   96:                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[j]),
        -:   97:                                                      recvcnts[j], recvtypes[j],
        -:   98:                                                      j, MPIR_ALLTOALL_TAG,
        -:   99:                                                      j, MPIR_ALLTOALL_TAG,
        -:  100:                                                      comm, &status);
      365:  101:                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  102:                }
     2745:  103:                else if (rank == j) {
        -:  104:                    /* same as above with i/j args reversed */
      284:  105:                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + rdispls[i]),
        -:  106:                                                      recvcnts[i], recvtypes[i],
        -:  107:                                                      i, MPIR_ALLTOALL_TAG,
        -:  108:                                                      i, MPIR_ALLTOALL_TAG,
        -:  109:                                                      comm, &status);
      284:  110:                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  111:                }
        -:  112:            }
        -:  113:        }
        -:  114:    }
        -:  115:    else {
      184:  116:        bblock = MPIR_ALLTOALL_THROTTLE;
      184:  117:        if (bblock == 0) bblock = comm_size;
        -:  118:
      184:  119:        MPIU_CHKLMEM_MALLOC(starray,  MPI_Status*,  2*bblock*sizeof(MPI_Status),  mpi_errno, "starray");
      184:  120:        MPIU_CHKLMEM_MALLOC(reqarray, MPI_Request*, 2*bblock*sizeof(MPI_Request), mpi_errno, "reqarray");
        -:  121:
        -:  122:        /* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
      634:  123:        for (ii=0; ii<comm_size; ii+=bblock) {
      450:  124:            outstanding_requests = 0;
      450:  125:            ss = comm_size-ii < bblock ? comm_size-ii : bblock;
        -:  126:
        -:  127:            /* do the communication -- post ss sends and receives: */
     1922:  128:            for ( i=0; i<ss; i++ ) { 
     1472:  129:                dst = (rank+i+ii) % comm_size;
     1472:  130:                if (recvcnts[dst]) {
     1243:  131:                    MPID_Datatype_get_size_macro(recvtypes[dst], type_size);
     1243:  132:                    if (type_size) {
     1149:  133:                        mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst],
        -:  134:                                               recvcnts[dst], recvtypes[dst], dst,
        -:  135:                                               MPIR_ALLTOALLW_TAG, comm,
        -:  136:                                               &reqarray[outstanding_requests]);
     1149:  137:                        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        -:  138:
     1149:  139:                        outstanding_requests++;
        -:  140:                    }
        -:  141:                }
        -:  142:            }
        -:  143:
     1922:  144:            for ( i=0; i<ss; i++ ) { 
     1472:  145:                dst = (rank-i-ii+comm_size) % comm_size;
     1472:  146:                if (sendcnts[dst]) {
     1243:  147:                    MPID_Datatype_get_size_macro(sendtypes[dst], type_size);
     1243:  148:                    if (type_size) {
     1149:  149:                        mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst],
        -:  150:                                               sendcnts[dst], sendtypes[dst], dst,
        -:  151:                                               MPIR_ALLTOALLW_TAG, comm,
        -:  152:                                               &reqarray[outstanding_requests]);
     1149:  153:                        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        -:  154:
     1149:  155:                        outstanding_requests++;
        -:  156:                    }
        -:  157:                }
        -:  158:            }
        -:  159:
      450:  160:            mpi_errno = NMPI_Waitall(outstanding_requests, reqarray, starray);
        -:  161:
        -:  162:            /* --BEGIN ERROR HANDLING-- */
      450:  163:            if (mpi_errno == MPI_ERR_IN_STATUS) {
    #####:  164:                for (i=0; i<outstanding_requests; i++) {
    #####:  165:                    if (starray[i].MPI_ERROR != MPI_SUCCESS) 
    #####:  166:                        mpi_errno = starray[i].MPI_ERROR;
        -:  167:                }
        -:  168:            }
        -:  169:            /* --END ERROR HANDLING-- */   
        -:  170:        }
        -:  171:
        -:  172:#ifdef FOO
        -:  173:        /* Use pairwise exchange algorithm. */
        -:  174:        
        -:  175:        /* Make local copy first */
        -:  176:        mpi_errno = MPIR_Localcopy(((char *)sendbuf+sdispls[rank]), 
        -:  177:                                   sendcnts[rank], sendtypes[rank], 
        -:  178:                                   ((char *)recvbuf+rdispls[rank]), 
        -:  179:                                   recvcnts[rank], recvtypes[rank]);
        -:  180:        /* --BEGIN ERROR HANDLING-- */
        -:  181:        if (mpi_errno)
        -:  182:        {
        -:  183:            mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
        -:  184:            goto fn_fail;
        -:  185:        }
        -:  186:        /* --END ERROR HANDLING-- */
        -:  187:        /* Do the pairwise exchange. */
        -:  188:        for (i=1; i<comm_size; i++) {
        -:  189:            src = (rank - i + comm_size) % comm_size;
        -:  190:            dst = (rank + i) % comm_size;
        -:  191:            mpi_errno = MPIC_Sendrecv(((char *)sendbuf+sdispls[dst]), 
        -:  192:                                      sendcnts[dst], sendtypes[dst], dst,
        -:  193:                                      MPIR_ALLTOALLW_TAG, 
        -:  194:                                      ((char *)recvbuf+rdispls[src]), 
        -:  195:                                      recvcnts[src], recvtypes[dst], src,
        -:  196:                                      MPIR_ALLTOALLW_TAG, comm, &status);
        -:  197:            /* --BEGIN ERROR HANDLING-- */
        -:  198:            if (mpi_errno)
        -:  199:            {
        -:  200:                mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
        -:  201:                goto fn_fail;
        -:  202:            }
        -:  203:            /* --END ERROR HANDLING-- */
        -:  204:        }
        -:  205:#endif
        -:  206:    }
        -:  207:
        -:  208:    /* check if multiple threads are calling this collective function */
        -:  209:  fn_exit:
        -:  210:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );  
      368:  211:    MPIU_CHKLMEM_FREEALL();
      265:  212:    return (mpi_errno);
        -:  213:
        -:  214:  fn_fail:
        -:  215:    goto fn_exit;
        -:  216:}
        -:  217:/* end:nested */
        -:  218:
        -:  219:/* not declared static because a machine-specific function may call this one in some cases */
        -:  220:int MPIR_Alltoallw_inter ( 
        -:  221:	void *sendbuf, 
        -:  222:	int *sendcnts, 
        -:  223:	int *sdispls, 
        -:  224:	MPI_Datatype *sendtypes, 
        -:  225:	void *recvbuf, 
        -:  226:	int *recvcnts, 
        -:  227:	int *rdispls, 
        -:  228:	MPI_Datatype *recvtypes, 
        -:  229:	MPID_Comm *comm_ptr )
       75:  230:{
        -:  231:/* Intercommunicator alltoallw. We use a pairwise exchange algorithm
        -:  232:   similar to the one used in intracommunicator alltoallw. Since the
        -:  233:   local and remote groups can be of different 
        -:  234:   sizes, we first compute the max of local_group_size,
        -:  235:   remote_group_size. At step i, 0 <= i < max_size, each process
        -:  236:   receives from src = (rank - i + max_size) % max_size if src <
        -:  237:   remote_size, and sends to dst = (rank + i) % max_size if dst <
        -:  238:   remote_size. 
        -:  239:
        -:  240:   FIXME: change algorithm to match intracommunicator alltoallv
        -:  241:*/
        -:  242:    static const char FCNAME[] = "MPIR_Alltoallw_inter";
        -:  243:    int local_size, remote_size, max_size, i;
       75:  244:    int mpi_errno = MPI_SUCCESS;
        -:  245:    MPI_Status status;
        -:  246:    int src, dst, rank, sendcount, recvcount;
        -:  247:    char *sendaddr, *recvaddr;
        -:  248:    MPI_Datatype sendtype, recvtype;
        -:  249:    MPI_Comm comm;
        -:  250:    
       75:  251:    local_size = comm_ptr->local_size; 
       75:  252:    remote_size = comm_ptr->remote_size;
       75:  253:    comm = comm_ptr->handle;
       75:  254:    rank = comm_ptr->rank;
        -:  255:
        -:  256:    /* check if multiple threads are calling this collective function */
        -:  257:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
        -:  258:
        -:  259:    /* Use pairwise exchange algorithm. */
       75:  260:    max_size = MPIR_MAX(local_size, remote_size);
      367:  261:    for (i=0; i<max_size; i++) {
      292:  262:        src = (rank - i + max_size) % max_size;
      292:  263:        dst = (rank + i) % max_size;
      292:  264:        if (src >= remote_size) {
       98:  265:            src = MPI_PROC_NULL;
       98:  266:            recvaddr = NULL;
       98:  267:            recvcount = 0;
       98:  268:            recvtype = MPI_DATATYPE_NULL;
        -:  269:        }
        -:  270:        else {
      194:  271:            recvaddr = (char *)recvbuf + rdispls[src];
      194:  272:            recvcount = recvcnts[src];
      194:  273:            recvtype = recvtypes[src];
        -:  274:        }
      292:  275:        if (dst >= remote_size) {
       98:  276:            dst = MPI_PROC_NULL;
       98:  277:            sendaddr = NULL;
       98:  278:            sendcount = 0;
       98:  279:            sendtype = MPI_DATATYPE_NULL;
        -:  280:        }
        -:  281:        else {
      194:  282:            sendaddr = (char *)sendbuf+sdispls[dst];
      194:  283:            sendcount = sendcnts[dst];
      194:  284:            sendtype = sendtypes[dst];
        -:  285:        }
        -:  286:
      292:  287:        mpi_errno = MPIC_Sendrecv(sendaddr, sendcount, sendtype, 
        -:  288:                                  dst, MPIR_ALLTOALLW_TAG, recvaddr, 
        -:  289:                                  recvcount, recvtype, src,
        -:  290:                                  MPIR_ALLTOALLW_TAG, comm, &status);
        -:  291:	/* --BEGIN ERROR HANDLING-- */
      292:  292:        if (mpi_errno)
        -:  293:	{
    #####:  294:	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  295:	    return mpi_errno;
        -:  296:	}
        -:  297:	/* --END ERROR HANDLING-- */
        -:  298:    }
        -:  299:    
        -:  300:    /* check if multiple threads are calling this collective function */
        -:  301:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
        -:  302:    
       75:  303:    return (mpi_errno);
        -:  304:}
        -:  305:
        -:  306:#endif
        -:  307:
        -:  308:#undef FUNCNAME
        -:  309:#define FUNCNAME MPI_Alltoallw
        -:  310:
        -:  311:/*@
        -:  312:   MPI_Alltoallw - Generalized all-to-all communication allowing different
        -:  313:   datatypes, counts, and displacements for each partner
        -:  314:
        -:  315:   Input Parameters:
        -:  316:+ sendbuf - starting address of send buffer (choice) 
        -:  317:. sendcounts - integer array equal to the group size specifying the number of 
        -:  318:  elements to send to each processor (integer) 
        -:  319:. sdispls - integer array (of length group size). Entry j specifies the 
        -:  320:  displacement in bytes (relative to sendbuf) from which to take the outgoing 
        -:  321:  data destined for process j 
        -:  322:. sendtypes - array of datatypes (of length group size). Entry j specifies the 
        -:  323:  type of data to send to process j (handle) 
        -:  324:. recvcounts - integer array equal to the group size specifying the number of
        -:  325:   elements that can be received from each processor (integer) 
        -:  326:. rdispls - integer array (of length group size). Entry i specifies the 
        -:  327:  displacement in bytes (relative to recvbuf) at which to place the incoming 
        -:  328:  data from process i 
        -:  329:. recvtypes - array of datatypes (of length group size). Entry i specifies 
        -:  330:  the type of data received from process i (handle) 
        -:  331:- comm - communicator (handle) 
        -:  332:
        -:  333: Output Parameter:
        -:  334:. recvbuf - address of receive buffer (choice) 
        -:  335:
        -:  336:.N ThreadSafe
        -:  337:
        -:  338:.N Fortran
        -:  339:
        -:  340:.N Errors
        -:  341:.N MPI_SUCCESS
        -:  342:.N MPI_ERR_COMM
        -:  343:.N MPI_ERR_ARG
        -:  344:.N MPI_ERR_COUNT
        -:  345:.N MPI_ERR_TYPE
        -:  346:@*/
        -:  347:int MPI_Alltoallw(void *sendbuf, int *sendcnts, int *sdispls, 
        -:  348:                  MPI_Datatype *sendtypes, void *recvbuf, int *recvcnts, 
        -:  349:                  int *rdispls, MPI_Datatype *recvtypes, MPI_Comm comm)
      340:  350:{
        -:  351:    static const char FCNAME[] = "MPI_Alltoallw";
      340:  352:    int mpi_errno = MPI_SUCCESS;
      340:  353:    MPID_Comm *comm_ptr = NULL;
      340:  354:    MPIU_THREADPRIV_DECL;
        -:  355:    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLW);
        -:  356:
      340:  357:    MPIR_ERRTEST_INITIALIZED_ORDIE();
        -:  358:    
      340:  359:    MPIU_THREAD_CS_ENTER(ALLFUNC,);
        -:  360:    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLTOALLW);
        -:  361:
        -:  362:    /* Validate parameters, especially handles needing to be converted */
        -:  363:#   ifdef HAVE_ERROR_CHECKING
        -:  364:    {
        -:  365:        MPID_BEGIN_ERROR_CHECKS;
        -:  366:        {
      340:  367:	    MPIR_ERRTEST_COMM(comm, mpi_errno);
      340:  368:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  369:	}
        -:  370:        MPID_END_ERROR_CHECKS;
        -:  371:    }
        -:  372:#   endif /* HAVE_ERROR_CHECKING */
        -:  373:
        -:  374:    /* Convert MPI object handles to object pointers */
      340:  375:    MPID_Comm_get_ptr( comm, comm_ptr );
        -:  376:
        -:  377:    /* Validate parameters and objects (post conversion) */
        -:  378:#   ifdef HAVE_ERROR_CHECKING
        -:  379:    {
        -:  380:        MPID_BEGIN_ERROR_CHECKS;
        -:  381:        {
      340:  382:	    MPID_Datatype *sendtype_ptr=NULL, *recvtype_ptr=NULL;
        -:  383:            int i, comm_size;
        -:  384:            int check_send;
        -:  385:
      340:  386:            MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
      340:  387:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  388:
      340:  389:            check_send = (comm_ptr->comm_kind == MPID_INTRACOMM && sendbuf != MPI_IN_PLACE);
        -:  390:
      340:  391:            if (comm_ptr->comm_kind == MPID_INTERCOMM && sendbuf == MPI_IN_PLACE) {
    #####:  392:                MPIU_ERR_SETANDJUMP(mpi_errno, MPIR_ERR_RECOVERABLE, "**sendbuf_inplace");
        -:  393:            }
        -:  394:
      340:  395:            if (comm_ptr->comm_kind == MPID_INTRACOMM)
      265:  396:                comm_size = comm_ptr->local_size;
        -:  397:            else
       75:  398:                comm_size = comm_ptr->remote_size;
        -:  399:
     2655:  400:            for (i=0; i<comm_size; i++) {
     2315:  401:                if (check_send) {
     1472:  402:                    MPIR_ERRTEST_COUNT(sendcnts[i], mpi_errno);
     1472:  403:                    if (sendcnts[i] > 0) {
     1243:  404:                        MPIR_ERRTEST_DATATYPE(sendtypes[i], "sendtype[i]", mpi_errno);
        -:  405:                    }
     1472:  406:                    if ((sendcnts[i] > 0) && (HANDLE_GET_KIND(sendtypes[i]) != HANDLE_KIND_BUILTIN)) {
      194:  407:                        MPID_Datatype_get_ptr(sendtypes[i], sendtype_ptr);
      194:  408:                        MPID_Datatype_valid_ptr( sendtype_ptr, mpi_errno );
      194:  409:                        MPID_Datatype_committed_ptr( sendtype_ptr, mpi_errno );
        -:  410:                    }
        -:  411:                }
        -:  412:
     2315:  413:                MPIR_ERRTEST_COUNT(recvcnts[i], mpi_errno);
     2315:  414:                if (recvcnts[i] > 0) {
     1959:  415:                    MPIR_ERRTEST_DATATYPE(recvtypes[i], "recvtype[i]", mpi_errno);
        -:  416:                }
     2315:  417:                if ((recvcnts[i] > 0) && (HANDLE_GET_KIND(recvtypes[i]) != HANDLE_KIND_BUILTIN)) {
      244:  418:                    MPID_Datatype_get_ptr(recvtypes[i], recvtype_ptr);
      244:  419:                    MPID_Datatype_valid_ptr( recvtype_ptr, mpi_errno );
      244:  420:                    MPID_Datatype_committed_ptr( recvtype_ptr, mpi_errno );
        -:  421:                }
        -:  422:            }
        -:  423:
      569:  424:            for (i=0; i<comm_size && check_send; i++) {
      397:  425:                if (sendcnts[i] > 0) {
      168:  426:                    MPIR_ERRTEST_USERBUFFER(sendbuf,sendcnts[i],sendtypes[i],mpi_errno); 
        -:  427:                    break;
        -:  428:                }
        -:  429:            }
      696:  430:            for (i=0; i<comm_size; i++) {
      630:  431:                if (recvcnts[i] > 0) {
      274:  432:                    MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcnts[i], mpi_errno);
      274:  433:                    MPIR_ERRTEST_USERBUFFER(recvbuf,recvcnts[i],recvtypes[i],mpi_errno); 
        -:  434:                    break;
        -:  435:                }
        -:  436:            }
        -:  437:
      340:  438:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  439:        }
        -:  440:        MPID_END_ERROR_CHECKS;
        -:  441:    }
        -:  442:#   endif /* HAVE_ERROR_CHECKING */
        -:  443:
        -:  444:    /* ... body of routine ...  */
        -:  445:
      340:  446:    if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Alltoallw != NULL)
        -:  447:    {
    #####:  448:	mpi_errno = comm_ptr->coll_fns->Alltoallw(sendbuf, sendcnts, sdispls,
        -:  449:                                                 sendtypes, recvbuf, recvcnts,
        -:  450:                                                 rdispls, recvtypes, comm_ptr);
        -:  451:    }
        -:  452:    else
        -:  453:    {
      340:  454:	MPIU_THREADPRIV_GET;
        -:  455:
      340:  456:	MPIR_Nest_incr();
      340:  457:        if (comm_ptr->comm_kind == MPID_INTRACOMM) 
        -:  458:            /* intracommunicator */
      265:  459:            mpi_errno = MPIR_Alltoallw(sendbuf, sendcnts, sdispls,
        -:  460:                                       sendtypes, recvbuf, recvcnts,
        -:  461:                                       rdispls, recvtypes, comm_ptr);
        -:  462:        else {
        -:  463:            /* intercommunicator */
       75:  464:            mpi_errno = MPIR_Alltoallw_inter(sendbuf, sendcnts, sdispls,
        -:  465:                                       sendtypes, recvbuf, recvcnts,
        -:  466:                                       rdispls, recvtypes, comm_ptr);
        -:  467:        }
      340:  468:	MPIR_Nest_decr();
        -:  469:    }
        -:  470:
        -:  471:    /* ... end of body of routine ... */
        -:  472:    
      340:  473:    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  474:
      340:  475:  fn_exit:
        -:  476:    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLTOALLW);
      340:  477:    MPIU_THREAD_CS_EXIT(ALLFUNC,);
      340:  478:    return mpi_errno;
        -:  479:
    #####:  480:  fn_fail:
        -:  481:    /* --BEGIN ERROR HANDLING-- */
        -:  482:#   ifdef HAVE_ERROR_CHECKING
        -:  483:    {
    #####:  484:	mpi_errno = MPIR_Err_create_code(
        -:  485:	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_alltoallw",
        -:  486:	    "**mpi_alltoallw %p %p %p %p %p %p %p %p %C", sendbuf, sendcnts, sdispls, sendtypes,
        -:  487:	    recvbuf, recvcnts, rdispls, recvtypes, comm);
        -:  488:    }
        -:  489:#   endif
    #####:  490:    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
    #####:  491:    goto fn_exit;
        -:  492:    /* --END ERROR HANDLING-- */
        -:  493:}