-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/coll/scan.c
        -:    0:Graph:scan.gcno
        -:    0:Data:scan.gcda
        -:    0:Runs:563
        -:    0:Programs:153
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:/*
        -:    3: *
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include "mpiimpl.h"
        -:    9:
        -:   10:/* -- Begin Profiling Symbol Block for routine MPI_Scan */
        -:   11:#if defined(HAVE_PRAGMA_WEAK)
        -:   12:#pragma weak MPI_Scan = PMPI_Scan
        -:   13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
        -:   14:#pragma _HP_SECONDARY_DEF PMPI_Scan  MPI_Scan
        -:   15:#elif defined(HAVE_PRAGMA_CRI_DUP)
        -:   16:#pragma _CRI duplicate MPI_Scan as PMPI_Scan
        -:   17:#endif
        -:   18:/* -- End Profiling Symbol Block */
        -:   19:
        -:   20:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
        -:   21:   the MPI routines */
        -:   22:#ifndef MPICH_MPI_FROM_PMPI
        -:   23:#undef MPI_Scan
        -:   24:#define MPI_Scan PMPI_Scan
        -:   25:
        -:   26:/* This is the default implementation of scan. The algorithm is:
        -:   27:   
        -:   28:   Algorithm: MPI_Scan
        -:   29:
        -:   30:   We use a lgp recursive doubling algorithm. The basic algorithm is
        -:   31:   given below. (You can replace "+" with any other scan operator.)
        -:   32:   The result is stored in recvbuf.
        -:   33:
        -:   34: .vb
        -:   35:   recvbuf = sendbuf;
        -:   36:   partial_scan = sendbuf;
        -:   37:   mask = 0x1;
        -:   38:   while (mask < size) {
        -:   39:      dst = rank^mask;
        -:   40:      if (dst < size) {
        -:   41:         send partial_scan to dst;
        -:   42:         recv from dst into tmp_buf;
        -:   43:         if (rank > dst) {
        -:   44:            partial_scan = tmp_buf + partial_scan;
        -:   45:            recvbuf = tmp_buf + recvbuf;
        -:   46:         }
        -:   47:         else {
        -:   48:            if (op is commutative)
        -:   49:               partial_scan = tmp_buf + partial_scan;
        -:   50:            else {
        -:   51:               tmp_buf = partial_scan + tmp_buf;
        -:   52:               partial_scan = tmp_buf;
        -:   53:            }
        -:   54:         }
        -:   55:      }
        -:   56:      mask <<= 1;
        -:   57:   }  
        -:   58: .ve
        -:   59:
        -:   60:   End Algorithm: MPI_Scan
        -:   61:*/
        -:   62:
        -:   63:/* begin:nested */
        -:   64:/* not declared static because a machine-specific function may call this one in some cases */
        -:   65:int MPIR_Scan ( 
        -:   66:    void *sendbuf, 
        -:   67:    void *recvbuf, 
        -:   68:    int count, 
        -:   69:    MPI_Datatype datatype, 
        -:   70:    MPI_Op op, 
        -:   71:    MPID_Comm *comm_ptr )
    23675:   72:{
        -:   73:    static const char FCNAME[] = "MPIR_Scan";
        -:   74:    MPI_Status status;
        -:   75:    int        rank, comm_size;
    23675:   76:    int        mpi_errno = MPI_SUCCESS;
        -:   77:    int mask, dst, is_commutative; 
        -:   78:    MPI_Aint true_extent, true_lb, extent;
        -:   79:    void *partial_scan, *tmp_buf;
        -:   80:    MPI_User_function *uop;
        -:   81:    MPID_Op *op_ptr;
        -:   82:    MPI_Comm comm;
    23675:   83:    MPIU_THREADPRIV_DECL;
        -:   84:#ifdef HAVE_CXX_BINDING
    23675:   85:    int is_cxx_uop = 0;
        -:   86:#endif
        -:   87:    
    23675:   88:    if (count == 0) return MPI_SUCCESS;
        -:   89:
    23675:   90:    comm = comm_ptr->handle;
    23675:   91:    comm_size = comm_ptr->local_size;
    23675:   92:    rank = comm_ptr->rank;
        -:   93:
    23675:   94:    MPIU_THREADPRIV_GET;
        -:   95:    /* set op_errno to 0. stored in perthread structure */
    23675:   96:    MPIU_THREADPRIV_FIELD(op_errno) = 0;
        -:   97:
    23675:   98:    if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
    18611:   99:        is_commutative = 1;
        -:  100:        /* get the function by indexing into the op table */
    18611:  101:        uop = MPIR_Op_table[op%16 - 1];
        -:  102:    }
        -:  103:    else {
     5064:  104:        MPID_Op_get_ptr(op, op_ptr);
     5064:  105:        if (op_ptr->kind == MPID_OP_USER_NONCOMMUTE)
     2460:  106:            is_commutative = 0;
        -:  107:        else
     2604:  108:            is_commutative = 1;
        -:  109:
        -:  110:#ifdef HAVE_CXX_BINDING            
     5064:  111:	if (op_ptr->language == MPID_LANG_CXX) {
      102:  112:	    uop = (MPI_User_function *) op_ptr->function.c_function;
      102:  113:	    is_cxx_uop = 1;
        -:  114:	}
        -:  115:	else
        -:  116:#endif
     4962:  117:	if ((op_ptr->language == MPID_LANG_C))
     4962:  118:            uop = (MPI_User_function *) op_ptr->function.c_function;
        -:  119:        else
    #####:  120:            uop = (MPI_User_function *) op_ptr->function.f77_function;
        -:  121:    }
        -:  122:    
        -:  123:    /* need to allocate temporary buffer to store partial scan*/
    23675:  124:    mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb,
        -:  125:                                          &true_extent);
        -:  126:    /* --BEGIN ERROR HANDLING-- */
    23675:  127:    if (mpi_errno)
        -:  128:    {
    #####:  129:	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  130:	return mpi_errno;
        -:  131:    }
        -:  132:    /* --END ERROR HANDLING-- */
        -:  133:
    23675:  134:    MPID_Datatype_get_extent_macro(datatype, extent);
    23675:  135:    partial_scan = MPIU_Malloc(count*(MPIR_MAX(extent,true_extent)));
        -:  136:
        -:  137:    /* This eventually gets malloc()ed as a temp buffer, not added to
        -:  138:     * any user buffers */
        -:  139:    MPID_Ensure_Aint_fits_in_pointer(count * MPIR_MAX(extent, true_extent));
        -:  140:
        -:  141:    /* --BEGIN ERROR HANDLING-- */
    23675:  142:    if (!partial_scan) {
    #####:  143:        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
    #####:  144:        return mpi_errno;
        -:  145:    }
        -:  146:    /* --END ERROR HANDLING-- */
        -:  147:    /* adjust for potential negative lower bound in datatype */
    23675:  148:    partial_scan = (void *)((char*)partial_scan - true_lb);
        -:  149:    
        -:  150:    /* need to allocate temporary buffer to store incoming data*/
    23675:  151:    tmp_buf = MPIU_Malloc(count*(MPIR_MAX(extent,true_extent)));
        -:  152:    /* --BEGIN ERROR HANDLING-- */
    23675:  153:    if (!tmp_buf) {
    #####:  154:        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
    #####:  155:        return mpi_errno;
        -:  156:    }
        -:  157:    /* --END ERROR HANDLING-- */
        -:  158:    /* adjust for potential negative lower bound in datatype */
    23675:  159:    tmp_buf = (void *)((char*)tmp_buf - true_lb);
        -:  160:    
        -:  161:    /* Since this is an inclusive scan, copy local contribution into
        -:  162:       recvbuf. */
    23675:  163:    if (sendbuf != MPI_IN_PLACE) {
    23675:  164:        mpi_errno = MPIR_Localcopy(sendbuf, count, datatype,
        -:  165:                                   recvbuf, count, datatype);
        -:  166:	/* --BEGIN ERROR HANDLING-- */
    23675:  167:        if (mpi_errno)
        -:  168:	{
    #####:  169:	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  170:	    return mpi_errno;
        -:  171:	}
        -:  172:	/* --END ERROR HANDLING-- */
        -:  173:    }
        -:  174:    
    23675:  175:    if (sendbuf != MPI_IN_PLACE)
    23675:  176:        mpi_errno = MPIR_Localcopy(sendbuf, count, datatype,
        -:  177:                                   partial_scan, count, datatype);
        -:  178:    else 
    #####:  179:        mpi_errno = MPIR_Localcopy(recvbuf, count, datatype,
        -:  180:                                   partial_scan, count, datatype);
        -:  181:    /* --BEGIN ERROR HANDLING-- */
    23675:  182:    if (mpi_errno)
        -:  183:    {
    #####:  184:	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  185:	return mpi_errno;
        -:  186:    }
        -:  187:    /* --END ERROR HANDLING-- */
        -:  188:    
        -:  189:    /* check if multiple threads are calling this collective function */
        -:  190:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
        -:  191:
    23675:  192:    mask = 0x1;
    58735:  193:    while (mask < comm_size) {
    11385:  194:        dst = rank ^ mask;
    11385:  195:        if (dst < comm_size) {
        -:  196:            /* Send partial_scan to dst. Recv into tmp_buf */
    11270:  197:            mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype,
        -:  198:                                      dst, MPIR_SCAN_TAG, tmp_buf,
        -:  199:                                      count, datatype, dst,
        -:  200:                                      MPIR_SCAN_TAG, comm,
        -:  201:                                      &status);
        -:  202:	    /* --BEGIN ERROR HANDLING-- */
    11270:  203:            if (mpi_errno)
        -:  204:	    {
    #####:  205:		mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  206:		return mpi_errno;
        -:  207:	    }
        -:  208:	    /* --END ERROR HANDLING-- */
        -:  209:            
    11270:  210:            if (rank > dst) {
        -:  211:#ifdef HAVE_CXX_BINDING
     5635:  212:		if (is_cxx_uop) {
       85:  213:		    (*MPIR_Process.cxx_call_op_fn)( tmp_buf, partial_scan, 
        -:  214:				     count, datatype, uop );
       85:  215:		    (*MPIR_Process.cxx_call_op_fn)( tmp_buf, recvbuf, 
        -:  216:				     count, datatype, uop );
        -:  217:		}
        -:  218:		else 
        -:  219:#endif
        -:  220:                {		    
     5550:  221:		    (*uop)(tmp_buf, partial_scan, &count, &datatype);
     5550:  222:		    (*uop)(tmp_buf, recvbuf, &count, &datatype);
        -:  223:		}
        -:  224:            }
        -:  225:            else {
     5635:  226:                if (is_commutative) {
        -:  227:#ifdef HAVE_CXX_BINDING
     5051:  228:		    if (is_cxx_uop) {
       85:  229:			(*MPIR_Process.cxx_call_op_fn)( tmp_buf, partial_scan, 
        -:  230:					 count, datatype, uop );
        -:  231:		    }
        -:  232:		    else 
        -:  233:#endif
     4966:  234:                    (*uop)(tmp_buf, partial_scan, &count, &datatype);
        -:  235:		}
        -:  236:                else {
        -:  237:#ifdef HAVE_CXX_BINDING
      584:  238:		    if (is_cxx_uop) {
    #####:  239:			(*MPIR_Process.cxx_call_op_fn)( partial_scan, tmp_buf,
        -:  240:					 count, datatype, uop );
        -:  241:		    }
        -:  242:		    else 
        -:  243:#endif
      584:  244:                    (*uop)(partial_scan, tmp_buf, &count, &datatype);
      584:  245:                    mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype,
        -:  246:                                               partial_scan,
        -:  247:                                               count, datatype);
        -:  248:		    /* --BEGIN ERROR HANDLING-- */
      584:  249:                    if (mpi_errno)
        -:  250:		    {
    #####:  251:			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
    #####:  252:			return mpi_errno;
        -:  253:		    }
        -:  254:		    /* --END ERROR HANDLING-- */
        -:  255:                }
        -:  256:            }
        -:  257:        }
    11385:  258:        mask <<= 1;
        -:  259:    }
        -:  260:    
    23675:  261:    MPIU_Free((char *)partial_scan+true_lb); 
    23675:  262:    MPIU_Free((char *)tmp_buf+true_lb); 
        -:  263:    
        -:  264:    /* check if multiple threads are calling this collective function */
        -:  265:    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
        -:  266:    
    23675:  267:    if (MPIU_THREADPRIV_FIELD(op_errno)) 
    #####:  268:	mpi_errno = MPIU_THREADPRIV_FIELD(op_errno);
        -:  269:
    23675:  270:    return (mpi_errno);
        -:  271:}
        -:  272:/* end:nested */
        -:  273:#endif
        -:  274:
        -:  275:/* A simple utility function to that calls the comm_ptr->coll_fns->Scan
        -:  276:override if it exists or else it calls MPIR_Scan with the same arguments. */
        -:  277:#undef FUNCNAME
        -:  278:#define FUNCNAME MPIR_Scan_or_coll_fn
        -:  279:#undef FCNAME
        -:  280:#define FCNAME MPIU_QUOTE(FUNCNAME)
        -:  281:static int MPIR_Scan_or_coll_fn(
        -:  282:    void *sendbuf, 
        -:  283:    void *recvbuf, 
        -:  284:    int count, 
        -:  285:    MPI_Datatype datatype, 
        -:  286:    MPI_Op op, 
        -:  287:    MPID_Comm *comm_ptr )
    16502:  288:{
    16502:  289:    int mpi_errno = MPI_SUCCESS;
        -:  290:
    16502:  291:    if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scan != NULL)
        -:  292:    {
        -:  293:        /* --BEGIN USEREXTENSION-- */
    #####:  294:        mpi_errno = comm_ptr->coll_fns->Scan(sendbuf, recvbuf, count,
        -:  295:                                             datatype, op, comm_ptr);
        -:  296:        /* --END USEREXTENSION-- */
        -:  297:    }
        -:  298:    else {
    16502:  299:        mpi_errno = MPIR_Scan(sendbuf, recvbuf, count, 
        -:  300:                              datatype, op, comm_ptr);
        -:  301:    }
        -:  302:
    16502:  303:    return mpi_errno;
        -:  304:}
        -:  305:
        -:  306:/* Sub function to perform shmcoll scan operation. The "op" could be either 
        -:  307:   commutative or non-commutative. 
        -:  308:   Restriction: we require a communicator, in which all the nodes contain 
        -:  309:   processes with consecutive ranks. */
        -:  310:#undef FUNCNAME
        -:  311:#define FUNCNAME MPIR_Scan_sub_shmcoll
        -:  312:#undef FCNAME
        -:  313:#define FCNAME MPIU_QUOTE(FUNCNAME)
        -:  314:static int MPIR_SMP_Scan(
        -:  315:    void *sendbuf,
        -:  316:    void *recvbuf,
        -:  317:    int count,
        -:  318:    MPI_Datatype datatype,
        -:  319:    MPI_Op op,
        -:  320:    MPID_Comm *comm_ptr )
    11044:  321:{
    11044:  322:    int mpi_errno = MPI_SUCCESS;
    11044:  323:    MPIU_CHKLMEM_DECL(3);
        -:  324:
    11044:  325:    int rank = comm_ptr->rank;
        -:  326:    MPI_Status status;
    11044:  327:    void *tempbuf = NULL, *localfulldata = NULL, *prefulldata = NULL;
        -:  328:    MPI_Aint  true_lb, true_extent, extent; 
        -:  329:    MPI_User_function *uop;
        -:  330:    MPID_Op *op_ptr;
    11044:  331:    int noneed = 1; /* noneed=1 means no need to bcast tempbuf and 
        -:  332:                       reduce tempbuf & recvbuf */
        -:  333:
    11044:  334:    mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb, &true_extent);
    11044:  335:    if (mpi_errno) MPIU_ERR_POP(mpi_errno); 
        -:  336:
    11044:  337:    MPID_Datatype_get_extent_macro(datatype, extent);
        -:  338:
        -:  339:    MPID_Ensure_Aint_fits_in_pointer(count * MPIR_MAX(extent, true_extent));
        -:  340:
    11044:  341:    MPIU_CHKLMEM_MALLOC(tempbuf, void *, count*(MPIR_MAX(extent, true_extent)),
        -:  342:                        mpi_errno, "temporary buffer");
    11044:  343:    tempbuf = (void *)((char*)tempbuf - true_lb);
        -:  344:
        -:  345:    /* Create prefulldata and localfulldata on local roots of all nodes */
    11044:  346:    if (comm_ptr->node_roots_comm != NULL) {
     5458:  347:        MPIU_CHKLMEM_MALLOC(prefulldata, void *, count*(MPIR_MAX(extent, true_extent)),
        -:  348:                            mpi_errno, "prefulldata for scan");
     5458:  349:        prefulldata = (void *)((char*)prefulldata - true_lb);
        -:  350:
     5458:  351:        if (comm_ptr->node_comm != NULL) {
     5458:  352:            MPIU_CHKLMEM_MALLOC(localfulldata, void *, count*(MPIR_MAX(extent, true_extent)),
        -:  353:                                mpi_errno, "localfulldata for scan");
     5458:  354:            localfulldata = (void *)((char*)localfulldata - true_lb);
        -:  355:        }
        -:  356:    }
        -:  357:  
        -:  358:    /* perform intranode scan to get temporary result in recvbuf. if there is only 
        -:  359:       one process, just copy the raw data. */
    11044:  360:    if (comm_ptr->node_comm != NULL)
        -:  361:    {
    11044:  362:        mpi_errno = MPIR_Scan_or_coll_fn(sendbuf, recvbuf, count, datatype, 
        -:  363:                                         op, comm_ptr->node_comm);
    11044:  364:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  365:    }
    #####:  366:    else if (sendbuf != MPI_IN_PLACE)
        -:  367:    {
    #####:  368:        mpi_errno = MPIR_Localcopy(sendbuf, count, datatype,
        -:  369:                                   recvbuf, count, datatype);
    #####:  370:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  371:    }
        -:  372:
        -:  373:    /* get result from local node's last processor which 
        -:  374:       contains the reduce result of the whole node. Name it as
        -:  375:       localfulldata. For example, localfulldata from node 1 contains
        -:  376:       reduced data of rank 1,2,3. */
    11044:  377:    if (comm_ptr->node_roots_comm != NULL && comm_ptr->node_comm != NULL)
        -:  378:    {
     5458:  379:        mpi_errno = MPIC_Recv(localfulldata, count, datatype, 
        -:  380:                              comm_ptr->node_comm->local_size - 1, MPIR_SCAN_TAG, 
        -:  381:                              comm_ptr->node_comm->handle, &status);
     5458:  382:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  383:    }
     5586:  384:    else if (comm_ptr->node_roots_comm == NULL && 
        -:  385:             comm_ptr->node_comm != NULL && 
        -:  386:             MPIU_Get_intranode_rank(comm_ptr, rank) == comm_ptr->node_comm->local_size - 1)
        -:  387:    {
     5458:  388:        mpi_errno = MPIC_Send(recvbuf, count, datatype,
        -:  389:                              0, MPIR_SCAN_TAG, comm_ptr->node_comm->handle);
     5458:  390:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  391:    }
      128:  392:    else if (comm_ptr->node_roots_comm != NULL)
        -:  393:    {
    #####:  394:        localfulldata = recvbuf;
        -:  395:    }
        -:  396:
        -:  397:    /* do scan on localfulldata to prefulldata. for example, 
        -:  398:       prefulldata on rank 4 contains reduce result of ranks 
        -:  399:       1,2,3,4,5,6. it will be sent to rank 7 which is master 
        -:  400:       process of node 3. */
    11044:  401:    if (comm_ptr->node_roots_comm != NULL)
        -:  402:    {
     5458:  403:        mpi_errno = MPIR_Scan_or_coll_fn(localfulldata, prefulldata, count, datatype,
        -:  404:                                         op, comm_ptr->node_roots_comm);
     5458:  405:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  406:
     5458:  407:        if (MPIU_Get_internode_rank(comm_ptr, rank) != 
        -:  408:            comm_ptr->node_roots_comm->local_size-1)
        -:  409:        {
    #####:  410:            mpi_errno = MPIC_Send(prefulldata, count, datatype,
        -:  411:                                  MPIU_Get_internode_rank(comm_ptr, rank) + 1,
        -:  412:                                  MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle);
    #####:  413:            if(mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  414:        }
     5458:  415:        if (MPIU_Get_internode_rank(comm_ptr, rank) != 0)
        -:  416:        {
    #####:  417:            mpi_errno = MPIC_Recv(tempbuf, count, datatype,
        -:  418:                                  MPIU_Get_internode_rank(comm_ptr, rank) - 1, 
        -:  419:                                  MPIR_SCAN_TAG, comm_ptr->node_roots_comm->handle, 
        -:  420:                                  &status);
    #####:  421:            noneed = 0;
    #####:  422:            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  423:        }
        -:  424:    }
        -:  425:
        -:  426:    /* now tempbuf contains all the data needed to get the correct 
        -:  427:       scan result. for example, to node 3, it will have reduce result 
        -:  428:       of rank 1,2,3,4,5,6 in tempbuf. 
        -:  429:       then we should broadcast this result in the local node, and
        -:  430:       reduce it with recvbuf to get final result if nessesary. */
        -:  431:
    11044:  432:    if (comm_ptr->node_comm != NULL) {
    11044:  433:        mpi_errno = MPIR_Bcast_or_coll_fn(&noneed, 1, MPI_INT, 0, comm_ptr->node_comm);
    11044:  434:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  435:    }
        -:  436:
    11044:  437:    if (noneed == 0) {
        -:  438:#ifdef HAVE_CXX_BINDING
    #####:  439:        int is_cxx_uop = 0;
        -:  440:#endif
    #####:  441:        if (comm_ptr->node_comm != NULL) {
    #####:  442:            mpi_errno = MPIR_Bcast_or_coll_fn(tempbuf, count, datatype, 0, comm_ptr->node_comm);
    #####:  443:            if(mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  444:        }
        -:  445:
        -:  446:        /* do reduce on tempbuf and recvbuf, finish scan. */
    #####:  447:        if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
        -:  448:            /* get the function by indexing into the op table */
    #####:  449:            uop = MPIR_Op_table[op%16 - 1];
        -:  450:        }
        -:  451:        else {
    #####:  452:            MPID_Op_get_ptr(op, op_ptr);
        -:  453:
        -:  454:#ifdef HAVE_CXX_BINDING
    #####:  455:            if (op_ptr->language == MPID_LANG_CXX) {
    #####:  456:                 uop = (MPI_User_function *) op_ptr->function.c_function;
    #####:  457:                 is_cxx_uop = 1;
        -:  458:            }
        -:  459:            else
        -:  460:#endif
        -:  461:            {
    #####:  462:                if ((op_ptr->language == MPID_LANG_C))
    #####:  463:                    uop = (MPI_User_function *) op_ptr->function.c_function;
        -:  464:                else
    #####:  465:                    uop = (MPI_User_function *) op_ptr->function.f77_function;
        -:  466:            }
        -:  467:        }
        -:  468:
        -:  469:#ifdef HAVE_CXX_BINDING
    #####:  470:        if (is_cxx_uop) {
    #####:  471:            (*MPIR_Process.cxx_call_op_fn)( tempbuf, recvbuf, count, 
        -:  472:                                            datatype, uop );
        -:  473:        }
        -:  474:        else
        -:  475:#endif
    #####:  476:            (*uop)(tempbuf, recvbuf, &count, &datatype);
        -:  477:    }
        -:  478:
        -:  479:  fn_exit:
    21960:  480:    MPIU_CHKLMEM_FREEALL();
    11044:  481:    return mpi_errno;
        -:  482:
        -:  483:  fn_fail:
        -:  484:    goto fn_exit;
        -:  485:}
        -:  486:
        -:  487:#undef FUNCNAME
        -:  488:#define FUNCNAME MPI_Scan
        -:  489:#undef FCNAME
        -:  490:#define FCNAME MPIU_QUOTE(FUNCNAME)
        -:  491:/*@
        -:  492:
        -:  493:MPI_Scan - Computes the scan (partial reductions) of data on a collection of
        -:  494:           processes
        -:  495:
        -:  496:Input Parameters:
        -:  497:+ sendbuf - starting address of send buffer (choice) 
        -:  498:. count - number of elements in input buffer (integer) 
        -:  499:. datatype - data type of elements of input buffer (handle) 
        -:  500:. op - operation (handle) 
        -:  501:- comm - communicator (handle) 
        -:  502:
        -:  503:Output Parameter:
        -:  504:. recvbuf - starting address of receive buffer (choice) 
        -:  505:
        -:  506:.N ThreadSafe
        -:  507:
        -:  508:.N Fortran
        -:  509:
        -:  510:.N collops
        -:  511:
        -:  512:.N Errors
        -:  513:.N MPI_SUCCESS
        -:  514:.N MPI_ERR_COMM
        -:  515:.N MPI_ERR_COUNT
        -:  516:.N MPI_ERR_TYPE
        -:  517:.N MPI_ERR_BUFFER
        -:  518:.N MPI_ERR_BUFFER_ALIAS
        -:  519:@*/
        -:  520:int MPI_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, 
        -:  521:	     MPI_Op op, MPI_Comm comm)
    18275:  522:{
    18275:  523:    int mpi_errno = MPI_SUCCESS;
    18275:  524:    MPID_Comm *comm_ptr = NULL;
    18275:  525:    MPIU_THREADPRIV_DECL;
        -:  526:    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCAN);
        -:  527:
    18275:  528:    MPIR_ERRTEST_INITIALIZED_ORDIE();
        -:  529:    
    18275:  530:    MPIU_THREAD_CS_ENTER(ALLFUNC,);
        -:  531:    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_SCAN);
        -:  532:
        -:  533:    /* Validate parameters, especially handles needing to be converted */
        -:  534:#   ifdef HAVE_ERROR_CHECKING
        -:  535:    {
        -:  536:        MPID_BEGIN_ERROR_CHECKS;
        -:  537:        {
    18275:  538:	    MPIR_ERRTEST_COMM(comm, mpi_errno);
    18275:  539:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  540:	}
        -:  541:        MPID_END_ERROR_CHECKS;
        -:  542:    }
        -:  543:#   endif /* HAVE_ERROR_CHECKING */
        -:  544:
        -:  545:    /* Convert MPI object handles to object pointers */
    18273:  546:    MPID_Comm_get_ptr( comm, comm_ptr );
        -:  547:
        -:  548:    /* Validate parameters and objects (post conversion) */
        -:  549:#   ifdef HAVE_ERROR_CHECKING
        -:  550:    {
        -:  551:        MPID_BEGIN_ERROR_CHECKS;
        -:  552:        {
    18273:  553:	    MPID_Datatype *datatype_ptr = NULL;
    18273:  554:            MPID_Op *op_ptr = NULL;
        -:  555:	    
    18273:  556:            MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
    18273:  557:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  558:
    18271:  559:            MPIR_ERRTEST_COMM_INTRA(comm_ptr, mpi_errno);
    18271:  560:	    MPIR_ERRTEST_COUNT(count, mpi_errno);
    18271:  561:	    MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
    18271:  562:	    MPIR_ERRTEST_OP(op, mpi_errno);
        -:  563:	    
    18271:  564:            if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
     1684:  565:                MPID_Datatype_get_ptr(datatype, datatype_ptr);
     1684:  566:                MPID_Datatype_valid_ptr( datatype_ptr, mpi_errno );
     1684:  567:                MPID_Datatype_committed_ptr( datatype_ptr, mpi_errno );
        -:  568:            }
        -:  569:
        -:  570:            /* in_place option allowed. no error check */
    18271:  571:            MPIR_ERRTEST_USERBUFFER(sendbuf,count,datatype,mpi_errno);
        -:  572:
    18271:  573:            MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno);
    18271:  574:            MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno);
        -:  575:
    18271:  576:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
    18243:  577:            if (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) {
     3901:  578:                MPID_Op_get_ptr(op, op_ptr);
     3901:  579:                MPID_Op_valid_ptr( op_ptr, mpi_errno );
        -:  580:            }
    18243:  581:            if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
    14342:  582:                mpi_errno = 
        -:  583:                    ( * MPIR_Op_check_dtype_table[op%16 - 1] )(datatype); 
        -:  584:            }
    18243:  585:            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  586:        }
        -:  587:        MPID_END_ERROR_CHECKS;
        -:  588:    }
        -:  589:#   endif /* HAVE_ERROR_CHECKING */
        -:  590:
        -:  591:    /* ... body of routine ...  */
        -:  592:
    18217:  593:    if (comm_ptr->coll_fns != NULL && comm_ptr->coll_fns->Scan != NULL)
        -:  594:    {
    #####:  595:	mpi_errno = comm_ptr->coll_fns->Scan(sendbuf, recvbuf, count,
        -:  596:                                             datatype, op, comm_ptr);
        -:  597:    }
        -:  598:    else
        -:  599:    {
    18217:  600:	MPIU_THREADPRIV_GET;
    18217:  601:	MPIR_Nest_incr();
        -:  602:#if defined(USE_SMP_COLLECTIVES)
        -:  603:
        -:  604:        /* The current algorithm assume the ranks of processes in the 
        -:  605:           same node are consecutive. for example, node 1 contains rank
        -:  606:           1, 2, 3; while node 2 has 4, 5, 6 and node 3 with 7, 8, 9 */
    18217:  607:        if (MPIR_Comm_is_node_consecutive(comm_ptr)) {
    11044:  608:            mpi_errno = MPIR_SMP_Scan(sendbuf, recvbuf, count,
        -:  609:                                      datatype, op, comm_ptr);
        -:  610:        }
        -:  611:        else {
     7173:  612:            mpi_errno = MPIR_Scan(sendbuf, recvbuf, count, datatype, 
        -:  613:                                  op, comm_ptr);
        -:  614:        }
        -:  615:#else
        -:  616:        mpi_errno = MPIR_Scan(sendbuf, recvbuf, count, datatype,
        -:  617:                              op, comm_ptr); 
        -:  618:#endif
    18217:  619:	MPIR_Nest_decr();
        -:  620:    }
        -:  621:    
    18217:  622:    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        -:  623:
        -:  624:    /* ... end of body of routine ... */
        -:  625:    
    18275:  626:  fn_exit:
        -:  627:    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_SCAN);
    18275:  628:    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    18275:  629:    return mpi_errno;
        -:  630:
       58:  631:  fn_fail:
        -:  632:    /* --BEGIN ERROR HANDLING-- */
        -:  633:#   ifdef HAVE_ERROR_CHECKING
        -:  634:    {
       58:  635:	mpi_errno = MPIR_Err_create_code(
        -:  636:	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_scan",
        -:  637:	    "**mpi_scan %p %p %d %D %O %C", sendbuf, recvbuf, count, datatype, op, comm);
        -:  638:    }
        -:  639:#   endif
       58:  640:    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
       58:  641:    goto fn_exit;
        -:  642:    /* --END ERROR HANDLING-- */
        -:  643:}