-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/ch3/src/mpid_vc.c
        -:    0:Graph:mpid_vc.gcno
        -:    0:Data:mpid_vc.gcda
        -:    0:Runs:3459
        -:    0:Programs:899
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:/*
        -:    3: *  (C) 2001 by Argonne National Laboratory.
        -:    4: *      See COPYRIGHT in top-level directory.
        -:    5: */
        -:    6:
        -:    7:#include "mpidimpl.h"
        -:    8:
        -:    9:#ifdef USE_PMI2_API
        -:   10:#include "pmi2.h"
        -:   11:#else
        -:   12:#include "pmi.h"
        -:   13:#endif
        -:   14:#if defined(HAVE_LIMITS_H)
        -:   15:#include <limits.h>
        -:   16:#endif
        -:   17:#if defined(HAVE_UNISTD_H)
        -:   18:#include <unistd.h>
        -:   19:#endif
        -:   20:#if defined(HAVE_ERRNO_H)
        -:   21:#include <errno.h>
        -:   22:#endif
        -:   23:#include <ctype.h>
        -:   24:
        -:   25:
        -:   26:/*S
        -:   27: * MPIDI_VCRT - virtual connection reference table
        -:   28: *
        -:   29: * handle - this element is not used, but exists so that we may use the 
        -:   30: * MPIU_Object routines for reference counting
        -:   31: *
        -:   32: * ref_count - number of references to this table
        -:   33: *
        -:   34: * vcr_table - array of virtual connection references
        -:   35: S*/
        -:   36:typedef struct MPIDI_VCRT
        -:   37:{
        -:   38:    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
        -:   39:    int size;
        -:   40:    MPIDI_VC_t * vcr_table[1];
        -:   41:}
        -:   42:MPIDI_VCRT_t;
        -:   43:
        -:   44:/* What is the arrangement of VCRT and VCR and VC? 
        -:   45:   
        -:   46:   Each VC (the virtual connection itself) is refered to by a reference 
        -:   47:   (pointer) or VCR.  
        -:   48:   Each communicator has a VCRT, which is nothing more than a 
        -:   49:   structure containing a count (size) and an array of pointers to 
        -:   50:   virtual connections (as an abstraction, this could be a sparse
        -:   51:   array, allowing a more scalable representation on massively 
        -:   52:   parallel systems).
        -:   53:
        -:   54: */
        -:   55:
        -:   56:static int MPIDI_CH3U_VC_FinishPending( MPIDI_VCRT_t *vcrt );
        -:   57:
        -:   58:/*@
        -:   59:  MPID_VCRT_Create - Create a table of VC references
        -:   60:
        -:   61:  Notes:
        -:   62:  This routine only provides space for the VC references.  Those should
        -:   63:  be added by assigning to elements of the vc array within the 
        -:   64:  'MPID_VCRT' object.
        -:   65:  @*/
        -:   66:#undef FUNCNAME
        -:   67:#define FUNCNAME MPID_VCRT_Create
        -:   68:#undef FCNAME
        -:   69:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:   70:int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
  1120393:   71:{
        -:   72:    MPIDI_VCRT_t * vcrt;
  1120393:   73:    int mpi_errno = MPI_SUCCESS;
  1120393:   74:    MPIU_CHKPMEM_DECL(1);
        -:   75:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCRT_CREATE);
        -:   76:
        -:   77:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCRT_CREATE);
        -:   78:
  1120393:   79:    MPIU_CHKPMEM_MALLOC(vcrt, MPIDI_VCRT_t *, sizeof(MPIDI_VCRT_t) + (size - 1) * sizeof(MPIDI_VC_t *),	mpi_errno, "**nomem");
  1120393:   80:    vcrt->handle = HANDLE_SET_KIND(0, HANDLE_KIND_INVALID);
  1120393:   81:    MPIU_Object_set_ref(vcrt, 1);
  1120393:   82:    vcrt->size = size;
  1120393:   83:    *vcrt_ptr = vcrt;
        -:   84:
  1120393:   85: fn_exit:
  1120393:   86:    MPIU_CHKPMEM_COMMIT();
        -:   87:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCRT_CREATE);
  1120393:   88:    return mpi_errno;
        -:   89: fn_fail:
    #####:   90:    MPIU_CHKPMEM_REAP();
        -:   91:    goto fn_exit;
        -:   92:}
        -:   93:
        -:   94:/*@
        -:   95:  MPID_VCRT_Add_ref - Add a reference to a VC reference table
        -:   96:
        -:   97:  Notes:
        -:   98:  This is called when a communicator duplicates its group of processes.
        -:   99:  It is used in 'commutil.c' and in routines to create communicators from
        -:  100:  dynamic process operations.  It does not change the state of any of the
        -:  101:  virtural connections (VCs).
        -:  102:  @*/
        -:  103:#undef FUNCNAME
        -:  104:#define FUNCNAME MPID_VCRT_Add_ref
        -:  105:#undef FCNAME
        -:  106:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  107:int MPID_VCRT_Add_ref(MPID_VCRT vcrt)
   539216:  108:{
        -:  109:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCRT_ADD_REF);
        -:  110:
        -:  111:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCRT_ADD_REF);
   539216:  112:    MPIU_Object_add_ref(vcrt);
        -:  113:    MPIU_DBG_MSG_FMT(REFCOUNT,TYPICAL,(MPIU_DBG_FDEST, "Incr VCRT %p ref count",vcrt));
        -:  114:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCRT_ADD_REF);
   539216:  115:    return MPI_SUCCESS;
        -:  116:}
        -:  117:
        -:  118:/* FIXME: What should this do?  See proc group and vc discussion */
        -:  119:
        -:  120:/*@
        -:  121:  MPID_VCRT_Release - Release a reference to a VC reference table
        -:  122:
        -:  123:  Notes:
        -:  124:  
        -:  125:  @*/
        -:  126:#undef FUNCNAME
        -:  127:#define FUNCNAME MPID_VCRT_Release
        -:  128:#undef FCNAME
        -:  129:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  130:int MPID_VCRT_Release(MPID_VCRT vcrt, int isDisconnect )
  1659354:  131:{
        -:  132:    int in_use;
  1659354:  133:    int mpi_errno = MPI_SUCCESS;
        -:  134:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCRT_RELEASE);
        -:  135:
        -:  136:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCRT_RELEASE);
        -:  137:
  1659354:  138:    MPIU_Object_release_ref(vcrt, &in_use);
        -:  139:    MPIU_DBG_MSG_FMT(REFCOUNT,TYPICAL,(MPIU_DBG_FDEST, "Decr VCRT %p ref count",vcrt));
        -:  140:    
        -:  141:    /* If this VC reference table is no longer in use, we can
        -:  142:       decrement the reference count of each of the VCs.  If the
        -:  143:       count on the VCs goes to zero, then we can decrement the
        -:  144:       ref count on the process group and so on. 
        -:  145:    */
  1659354:  146:    if (!in_use) {
        -:  147:	int i, inuse;
        -:  148:
        -:  149:	/* FIXME: Need a better way to define how vc's are closed that 
        -:  150:	 takes into account pending operations on vcs, including 
        -:  151:	 close events received from other processes. */
        -:  152:	/* mpi_errno = MPIDI_CH3U_VC_FinishPending( vcrt ); */
  1120146:  153:        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        -:  154:
  3451587:  155:	for (i = 0; i < vcrt->size; i++)
        -:  156:	{
  2331441:  157:	    MPIDI_VC_t * const vc = vcrt->vcr_table[i];
        -:  158:	    
  2331441:  159:	    MPIDI_VC_release_ref(vc, &in_use);
        -:  160:
        -:  161:            /* Dynamic connections start with a refcount of 2 instead of 1.
        -:  162:             * That way we can distinguish between an MPI_Free and an
        -:  163:             * MPI_Comm_disconnect. */
        -:  164:            /* XXX DJG FIXME-MT should we be checking this? */
        -:  165:            /* probably not, need to do something like the following instead: */
        -:  166:#if 0
        -:  167:            if (isDisconnect) {
        -:  168:                MPIU_Assert(in_use);
        -:  169:                /* FIXME this is still bogus, the VCRT may contain a mix of
        -:  170:                 * dynamic and non-dynamic VCs, so the ref_count isn't
        -:  171:                 * guaranteed to have started at 2.  The best thing to do might
        -:  172:                 * be to avoid overloading the reference counting this way and
        -:  173:                 * use a separate check for dynamic VCs (another flag? compare
        -:  174:                 * PGs?) */
        -:  175:                MPIU_Object_release_ref(vc, &in_use);
        -:  176:            }
        -:  177:#endif
  2331441:  178:	    if (isDisconnect && MPIU_Object_get_ref(vc) == 1) {
     5500:  179:		MPIDI_VC_release_ref(vc, &in_use);
        -:  180:	    }
        -:  181:
  2331441:  182:	    if (!in_use)
        -:  183:	    {
        -:  184:		/* If the VC is myself then skip the close message */
     6836:  185:		if (vc->pg == MPIDI_Process.my_pg && 
        -:  186:		    vc->pg_rank == MPIDI_Process.my_pg_rank)
        -:  187:		{
    #####:  188:                    MPIDI_PG_release_ref(vc->pg, &inuse);
    #####:  189:                    if (inuse == 0)
        -:  190:                    {
    #####:  191:                        MPIDI_PG_Destroy(vc->pg);
        -:  192:                    }
        -:  193:		    continue;
        -:  194:		}
        -:  195:		
        -:  196:		/* FIXME: the correct test is ACTIVE or REMOTE_CLOSE */
        -:  197:		/*if (vc->state != MPIDI_VC_STATE_INACTIVE) { */
     6836:  198:		if (vc->state == MPIDI_VC_STATE_ACTIVE ||
        -:  199:		    vc->state == MPIDI_VC_STATE_REMOTE_CLOSE)
        -:  200:		{
     3144:  201:		    MPIDI_CH3U_VC_SendClose( vc, i );
        -:  202:		}
        -:  203:		else
        -:  204:		{
     3692:  205:                    MPIDI_PG_release_ref(vc->pg, &inuse);
     3692:  206:                    if (inuse == 0)
        -:  207:                    {
      632:  208:                        MPIDI_PG_Destroy(vc->pg);
        -:  209:                    }
        -:  210:
        -:  211:		    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
        -:  212:                            "vc=%p: not sending a close to %d, vc in state %s",
        -:  213:			     vc, i, MPIDI_VC_GetStateString(vc->state)));
        -:  214:		}
        -:  215:
        -:  216:                /* NOTE: we used to * MPIU_CALL(MPIDI_CH3,VC_Destroy(&(pg->vct[i])))
        -:  217:                   here but that is incorrect.  According to the standard, it's
        -:  218:                   entirely possible (likely even) that this VC might still be
        -:  219:                   connected.  VCs are now destroyed when the PG that "owns"
        -:  220:                   them is destroyed (see MPIDI_PG_Destroy). [goodell@ 2008-06-13] */
        -:  221:	    }
        -:  222:	}
        -:  223:
  1120146:  224:	MPIU_Free(vcrt);
        -:  225:    }
        -:  226:
  1659354:  227: fn_exit:    
        -:  228:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCRT_RELEASE);
  1659354:  229:    return mpi_errno;
        -:  230: fn_fail:
        -:  231:    goto fn_exit;
        -:  232:}
        -:  233:
        -:  234:/*@
        -:  235:  MPID_VCRT_Get_ptr - Return a pointer to the array of VCs for this 
        -:  236:  reference table
        -:  237:
        -:  238:  Notes:
        -:  239:  This routine is always used with MPID_VCRT_Create and should be 
        -:  240:  combined with it.
        -:  241:
        -:  242:  @*/
        -:  243:#undef FUNCNAME
        -:  244:#define FUNCNAME MPID_VCRT_Get_ptr
        -:  245:#undef FCNAME
        -:  246:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  247:int MPID_VCRT_Get_ptr(MPID_VCRT vcrt, MPID_VCR **vc_pptr)
  1120393:  248:{
        -:  249:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCRT_GET_PTR);
        -:  250:
        -:  251:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCRT_GET_PTR);
  1120393:  252:    *vc_pptr = vcrt->vcr_table;
        -:  253:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCRT_GET_PTR);
  1120393:  254:    return MPI_SUCCESS;
        -:  255:}
        -:  256:
        -:  257:/*@
        -:  258:  MPID_VCR_Dup - Duplicate a virtual connection reference 
        -:  259:
        -:  260:  Notes:
        -:  261:  If the VC is being used for the first time in a VC reference
        -:  262:  table, the reference count is set to two, not one, in order to
        -:  263:  distinquish between freeing a communicator with 'MPI_Comm_free' and
        -:  264:  'MPI_Comm_disconnect', and the reference count on the process group
        -:  265:  is incremented (to indicate that the process group is in use).
        -:  266:  While this has no effect on the process group of 'MPI_COMM_WORLD',
        -:  267:  it is important for process groups accessed through 'MPI_Comm_spawn'
        -:  268:  or 'MPI_Comm_connect/MPI_Comm_accept'.
        -:  269:  
        -:  270:  @*/
        -:  271:#undef FUNCNAME
        -:  272:#define FUNCNAME MPID_VCR_Dup
        -:  273:#undef FCNAME
        -:  274:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  275:int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr)
  2324797:  276:{
        -:  277:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_DUP);
        -:  278:
        -:  279:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCR_DUP);
        -:  280:
        -:  281:    /* We are allowed to create a vc that belongs to no process group 
        -:  282:     as part of the initial connect/accept action, so in that case,
        -:  283:     ignore the pg ref count update */
        -:  284:    /* XXX DJG FIXME-MT should we be checking this? */
        -:  285:    /* we probably need a test-and-incr operation or equivalent to avoid races */
  2324797:  286:    if (MPIU_Object_get_ref(orig_vcr) == 0 && orig_vcr->pg) {
    16132:  287:	MPIDI_VC_add_ref( orig_vcr );
    16132:  288:	MPIDI_VC_add_ref( orig_vcr );
    16132:  289:	MPIDI_PG_add_ref( orig_vcr->pg );
        -:  290:    }
        -:  291:    else {
  2308665:  292:	MPIDI_VC_add_ref(orig_vcr);
        -:  293:    }
        -:  294:    MPIU_DBG_MSG_FMT(REFCOUNT,TYPICAL,(MPIU_DBG_FDEST,"Incr VCR %p ref count",orig_vcr));
  2324797:  295:    *new_vcr = orig_vcr;
        -:  296:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCR_DUP);
  2324797:  297:    return MPI_SUCCESS;
        -:  298:}
        -:  299:
        -:  300:/*@
        -:  301:  MPID_VCR_Get_lpid - Get the local process ID for a given VC reference
        -:  302:  @*/
        -:  303:#undef FUNCNAME
        -:  304:#define FUNCNAME MPID_VCR_Get_lpid
        -:  305:#undef FCNAME
        -:  306:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  307:int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr)
   185899:  308:{
        -:  309:    MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_GET_LPID);
        -:  310:
        -:  311:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCR_GET_LPID);
   185899:  312:    *lpid_ptr = vcr->lpid;
        -:  313:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCR_GET_LPID);
   185899:  314:    return MPI_SUCCESS;
        -:  315:}
        -:  316:
        -:  317:/* 
        -:  318: * The following routines convert to/from the global pids, which are 
        -:  319: * represented as pairs of ints (process group id, rank in that process group)
        -:  320: */
        -:  321:
        -:  322:/* FIXME: These routines belong in a different place */
        -:  323:#undef FUNCNAME
        -:  324:#define FUNCNAME MPID_GPID_GetAllInComm
        -:  325:#undef FCNAME
        -:  326:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  327:int MPID_GPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size, 
        -:  328:			    int local_gpids[], int *singlePG )
      988:  329:{
      988:  330:    int mpi_errno = MPI_SUCCESS;
        -:  331:    int i;
      988:  332:    int *gpid = local_gpids;
      988:  333:    int lastPGID = -1, pgid;
        -:  334:    MPID_VCR vc;
        -:  335:    MPIDI_STATE_DECL(MPID_STATE_MPID_GPID_GETALLINCOMM);
        -:  336:
        -:  337:    MPIDI_FUNC_ENTER(MPID_STATE_MPID_GPID_GETALLINCOMM);
        -:  338:
      988:  339:    MPIU_Assert(comm_ptr->local_size == local_size);
        -:  340:    
      988:  341:    *singlePG = 1;
     3047:  342:    for (i=0; i<comm_ptr->local_size; i++) {
     2059:  343:	vc = comm_ptr->vcr[i];
        -:  344:
        -:  345:	/* Get the process group id as an int */
     2059:  346:	MPIDI_PG_IdToNum( vc->pg, &pgid );
        -:  347:
     2059:  348:	*gpid++ = pgid;
     2059:  349:	if (lastPGID != pgid) { 
      988:  350:	    if (lastPGID != -1)
    #####:  351:		*singlePG = 0;
      988:  352:	    lastPGID = pgid;
        -:  353:	}
     2059:  354:	*gpid++ = vc->pg_rank;
        -:  355:
        -:  356:        MPIU_DBG_MSG_FMT(COMM,VERBOSE, (MPIU_DBG_FDEST,
        -:  357:                         "pgid=%d vc->pg_rank=%d",
        -:  358:                         pgid, vc->pg_rank));
        -:  359:    }
        -:  360:    
        -:  361:    MPIDI_FUNC_EXIT(MPID_STATE_MPID_GPID_GETALLINCOMM);
      988:  362:    return mpi_errno;
        -:  363:}
        -:  364:
        -:  365:#undef FUNCNAME
        -:  366:#define FUNCNAME MPID_GPID_Get
        -:  367:#undef FCNAME
        -:  368:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  369:int MPID_GPID_Get( MPID_Comm *comm_ptr, int rank, int gpid[] )
      256:  370:{
        -:  371:    int      pgid;
        -:  372:    MPID_VCR vc;
        -:  373:    
      256:  374:    vc = comm_ptr->vcr[rank];
        -:  375:
        -:  376:    /* Get the process group id as an int */
      256:  377:    MPIDI_PG_IdToNum( vc->pg, &pgid );
        -:  378:    
      256:  379:    gpid[0] = pgid;
      256:  380:    gpid[1] = vc->pg_rank;
        -:  381:    
      256:  382:    return 0;
        -:  383:}
        -:  384:
        -:  385:/* 
        -:  386: * The following is a very simple code for looping through 
        -:  387: * the GPIDs.  Note that this code requires that all processes
        -:  388: * have information on the process groups.
        -:  389: */
        -:  390:#undef FUNCNAME
        -:  391:#define FUNCNAME MPID_GPID_ToLpidArray
        -:  392:#undef FCNAME
        -:  393:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  394:int MPID_GPID_ToLpidArray( int size, int gpid[], int lpid[] )
     2059:  395:{
     2059:  396:    int i, mpi_errno = MPI_SUCCESS;
        -:  397:    int pgid;
     2059:  398:    MPIDI_PG_t *pg = 0;
        -:  399:    MPIDI_PG_iterator iter;
        -:  400:
    13822:  401:    for (i=0; i<size; i++) {
     4852:  402:        MPIDI_PG_Get_iterator(&iter);
        -:  403:	do {
     4860:  404:	    MPIDI_PG_Get_next( &iter, &pg );
     4860:  405:	    if (!pg) {
        -:  406:		/* Internal error.  This gpid is unknown on this process */
    #####:  407:		printf("No matching pg foung for id = %d\n", pgid );
    #####:  408:		lpid[i] = -1;
    #####:  409:		MPIU_ERR_SET2(mpi_errno,MPI_ERR_INTERN, "**unknowngpid",
        -:  410:			      "**unknowngpid %d %d", gpid[0], gpid[1] );
    #####:  411:		return mpi_errno;
        -:  412:	    }
     4860:  413:	    MPIDI_PG_IdToNum( pg, &pgid );
        -:  414:
     4860:  415:	    if (pgid == gpid[0]) {
        -:  416:		/* found the process group.  gpid[1] is the rank in 
        -:  417:		   this process group */
        -:  418:		/* Sanity check on size */
     4852:  419:		if (pg->size > gpid[1]) {
     4852:  420:		    lpid[i] = pg->vct[gpid[1]].lpid;
        -:  421:		}
        -:  422:		else {
    #####:  423:		    lpid[i] = -1;
    #####:  424:		    MPIU_ERR_SET2(mpi_errno,MPI_ERR_INTERN, "**unknowngpid",
        -:  425:				  "**unknowngpid %d %d", gpid[0], gpid[1] );
    #####:  426:		    return mpi_errno;
        -:  427:		}
        -:  428:		/* printf( "lpid[%d] = %d for gpid = (%d)%d\n", i, lpid[i], 
        -:  429:		   gpid[0], gpid[1] ); */
        -:  430:		break;
        -:  431:	    }
        -:  432:	} while (1);
     4852:  433:	gpid += 2;
        -:  434:    }
        -:  435:
     2059:  436:    return mpi_errno;
        -:  437:}
        -:  438:
        -:  439:/*@
        -:  440:  MPID_VCR_CommFromLpids - Create a new communicator from a given set
        -:  441:  of lpids.  
        -:  442:
        -:  443:  Notes:
        -:  444:  This is used to create a communicator that is not a subset of some
        -:  445:  existing communicator, for example, in a 'MPI_Comm_spawn' or 
        -:  446:  'MPI_Comm_connect/MPI_Comm_accept'.
        -:  447: @*/
        -:  448:#undef FUNCNAME
        -:  449:#define FUNCNAME MPID_VCR_CommFromLpids
        -:  450:#undef FCNAME
        -:  451:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  452:int MPID_VCR_CommFromLpids( MPID_Comm *newcomm_ptr, 
        -:  453:			    int size, const int lpids[] )
     2059:  454:{
     2059:  455:    int mpi_errno = MPI_SUCCESS;
        -:  456:    MPID_Comm *commworld_ptr;
        -:  457:    int i;
        -:  458:    MPIDI_PG_iterator iter;
        -:  459:
     2059:  460:    commworld_ptr = MPIR_Process.comm_world;
        -:  461:    /* Setup the communicator's vc table: remote group */
     2059:  462:    MPID_VCRT_Create( size, &newcomm_ptr->vcrt );
     2059:  463:    MPID_VCRT_Get_ptr( newcomm_ptr->vcrt, &newcomm_ptr->vcr );
     6911:  464:    for (i=0; i<size; i++) {
     4852:  465:	MPIDI_VC_t *vc = 0;
        -:  466:
        -:  467:	/* For rank i in the new communicator, find the corresponding
        -:  468:	   virtual connection.  For lpids less than the size of comm_world,
        -:  469:	   we can just take the corresponding entry from comm_world.
        -:  470:	   Otherwise, we need to search through the process groups.
        -:  471:	*/
        -:  472:	/* printf( "[%d] Remote rank %d has lpid %d\n", 
        -:  473:	   MPIR_Process.comm_world->rank, i, lpids[i] ); */
     4852:  474:	if (lpids[i] < commworld_ptr->remote_size) {
     4844:  475:	    vc = commworld_ptr->vcr[lpids[i]];
        -:  476:	}
        -:  477:	else {
        -:  478:	    /* We must find the corresponding vcr for a given lpid */	
        -:  479:	    /* For now, this means iterating through the process groups */
        8:  480:	    MPIDI_PG_t *pg = 0;
        -:  481:	    int j;
        -:  482:
        8:  483:	    MPIDI_PG_Get_iterator(&iter);
        -:  484:	    /* Skip comm_world */
        8:  485:	    MPIDI_PG_Get_next( &iter, &pg );
        -:  486:	    do {
        8:  487:		MPIDI_PG_Get_next( &iter, &pg );
        8:  488:                MPIU_ERR_CHKINTERNAL(!pg, mpi_errno, "no pg");
        -:  489:		/* FIXME: a quick check on the min/max values of the lpid
        -:  490:		   for this process group could help speed this search */
       12:  491:		for (j=0; j<pg->size; j++) {
        -:  492:		    /*printf( "Checking lpid %d against %d in pg %s\n",
        -:  493:			    lpids[i], pg->vct[j].lpid, (char *)pg->id );
        -:  494:			    fflush(stdout); */
       12:  495:		    if (pg->vct[j].lpid == lpids[i]) {
        8:  496:			vc = &pg->vct[j];
        -:  497:			/*printf( "found vc %x for lpid = %d in another pg\n", 
        -:  498:			  (int)vc, lpids[i] );*/
        8:  499:			break;
        -:  500:		    }
        -:  501:		}
        8:  502:	    } while (!vc);
        -:  503:	}
        -:  504:
        -:  505:	/* printf( "about to dup vc %x for lpid = %d in another pg\n", 
        -:  506:	   (int)vc, lpids[i] ); */
        -:  507:	/* Note that his will increment the ref count for the associate
        -:  508:	   PG if necessary.  */
     4852:  509:	MPID_VCR_Dup( vc, &newcomm_ptr->vcr[i] );
        -:  510:    }
     2059:  511:fn_exit:
     2059:  512:    return mpi_errno;
        -:  513:fn_fail:
        -:  514:    goto fn_exit;
        -:  515:}
        -:  516:
        -:  517:/* The following is a temporary hook to ensure that all processes in 
        -:  518:   a communicator have a set of process groups.
        -:  519: 
        -:  520:   All arguments are input (all processes in comm must have gpids)
        -:  521:
        -:  522:   First: all processes check to see if they have information on all
        -:  523:   of the process groups mentioned by id in the array of gpids.
        -:  524:
        -:  525:   The local result is LANDed with Allreduce.
        -:  526:   If any process is missing process group information, then the
        -:  527:   root process broadcasts the process group information as a string; 
        -:  528:   each process then uses this information to update to local process group
        -:  529:   information (in the KVS cache that contains information about 
        -:  530:   contacting any process in the process groups).
        -:  531:*/
        -:  532:#undef FUNCNAME
        -:  533:#define FUNCNAME MPID_PG_ForwardPGInfo
        -:  534:#undef FCNAME
        -:  535:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  536:int MPID_PG_ForwardPGInfo( MPID_Comm *peer_ptr, MPID_Comm *comm_ptr, 
        -:  537:			   int nPGids, const int gpids[], 
        -:  538:			   int root )
     2059:  539:{
     2059:  540:    int i, allfound = 1, pgid, pgidWorld;
     2059:  541:    MPIDI_PG_t *pg = 0;
        -:  542:    MPIDI_PG_iterator iter;
        -:  543:
        -:  544:    /* Get the pgid for CommWorld (always attached to the first process 
        -:  545:       group) */
     2059:  546:    MPIDI_PG_Get_iterator(&iter);
     2059:  547:    MPIDI_PG_Get_next( &iter, &pg );
     2059:  548:    MPIDI_PG_IdToNum( pg, &pgidWorld );
        -:  549:    
        -:  550:    /* Extract the unique process groups */
     6910:  551:    for (i=0; i<nPGids && allfound; i++) {
     4851:  552:	if (gpids[0] != pgidWorld) {
        -:  553:	    /* Add this gpid to the list of values to check */
        -:  554:	    /* FIXME: For testing, we just test in place */
        7:  555:            MPIDI_PG_Get_iterator(&iter);
        -:  556:	    do {
       14:  557:                MPIDI_PG_Get_next( &iter, &pg );
       14:  558:		if (!pg) {
        -:  559:		    /* We don't know this pgid */
        1:  560:		    allfound = 0;
        1:  561:		    break;
        -:  562:		}
       13:  563:		MPIDI_PG_IdToNum( pg, &pgid );
       13:  564:	    } while (pgid != gpids[0]);
        -:  565:	}
     4851:  566:	gpids += 2;
        -:  567:    }
        -:  568:
        -:  569:    /* See if everyone is happy */
     2059:  570:    NMPI_Allreduce( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, 
        -:  571:		    comm_ptr->handle );
        -:  572:
     2059:  573:    if (allfound) return MPI_SUCCESS;
        -:  574:
        -:  575:    /* FIXME: We need a cleaner way to handle this case than using an ifdef.
        -:  576:       We could have an empty version of MPID_PG_BCast in ch3u_port.c, but
        -:  577:       that's a rather crude way of addressing this problem.  Better is to
        -:  578:       make the handling of local and remote PIDS for the dynamic process
        -:  579:       case part of the dynamic process "module"; devices that don't support
        -:  580:       dynamic processes (and hence have only COMM_WORLD) could optimize for 
        -:  581:       that case */
        -:  582:#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
        -:  583:    /* We need to share the process groups.  We use routines
        -:  584:       from ch3u_port.c */
        2:  585:    MPID_PG_BCast( peer_ptr, comm_ptr, root );
        -:  586:#endif
        2:  587:    return MPI_SUCCESS;
        -:  588:}
        -:  589:
        -:  590:#undef FUNCNAME
        -:  591:#define FUNCNAME MPIDI_CH3U_VC_FinishPending
        -:  592:#undef FCNAME
        -:  593:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  594:static int MPIDI_CH3U_VC_FinishPending( MPIDI_VCRT_t *vcrt )
    #####:  595:{
    #####:  596:    int mpi_errno = MPI_SUCCESS;
        -:  597:    MPIDI_VC_t **vc;
        -:  598:    int i, size, nPending;
        -:  599:    MPID_Progress_state progress_state; 
        -:  600:    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_FINISHPENDING);
        -:  601:
        -:  602:    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_VC_FINISHPENDING);
        -:  603:
        -:  604:    do {
        -:  605:	/* Compute the number of pending ops.
        -:  606:	   A virtual connection has pending operations if the state
        -:  607:	   is not INACTIVE or if the sendq is not null */
    #####:  608:	nPending = 0;
    #####:  609:	vc       = vcrt->vcr_table;
    #####:  610:	size     = vcrt->size;
        -:  611:	/* printf( "Size = %d\n", size ); fflush(stdout); */
    #####:  612:	for (i=0; i<size; i++) {
    #####:  613:	    if (vc[i]->state != MPIDI_VC_STATE_INACTIVE) {
        -:  614:		/* FIXME: Printf for debugging */
    #####:  615:		printf ("state for vc[%d] is %d\n",
    #####:  616:			i, vc[i]->state ); fflush(stdout);
    #####:  617:		nPending++;
        -:  618:	    }
        -:  619:#if 0
        -:  620:	    /* FIXME: We shouldn't have any references to the channel-specific
        -:  621:	       fields in this part of the code.  This case should actually
        -:  622:	       not be needed; if there is a pending send element, the
        -:  623:	       top-level state should not be inactive */
        -:  624:	    if (vc[i]->ch.sendq_head) {
        -:  625:		/* FIXME: Printf for debugging */
        -:  626:		printf( "Nonempty sendQ for vc[%d]\n", i ); fflush(stdout);
        -:  627:		nPending++;
        -:  628:	    }
        -:  629:#endif
        -:  630:	}
    #####:  631:	if (nPending > 0) {
    #####:  632:	    printf( "Panic! %d pending operations!\n", nPending );
    #####:  633:	    fflush(stdout);
    #####:  634:	    MPIU_Assert( nPending == 0 );
        -:  635:	}
        -:  636:	else {
    #####:  637:	    break;
        -:  638:	}
        -:  639:
    #####:  640:	MPID_Progress_start(&progress_state);
        -:  641:	MPIU_DBG_MSG_D(CH3_DISCONNECT,VERBOSE,
        -:  642:		       "Waiting for %d close operations",
        -:  643:		       nPending);
    #####:  644:	mpi_errno = MPID_Progress_wait(&progress_state);
        -:  645:	/* --BEGIN ERROR HANDLING-- */
    #####:  646:	if (mpi_errno != MPI_SUCCESS) {
        -:  647:	    MPID_Progress_end(&progress_state);
    #####:  648:	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
        -:  649:				"**ch3|close_progress");
        -:  650:	}
        -:  651:	/* --END ERROR HANDLING-- */
        -:  652:	MPID_Progress_end(&progress_state);
    #####:  653:    } while(nPending > 0);
        -:  654:
    #####:  655: fn_exit:
        -:  656:    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_VC_FINISHPENDING);
    #####:  657:    return mpi_errno;
        -:  658: fn_fail:
        -:  659:    goto fn_exit;
        -:  660:}
        -:  661:
        -:  662:/*
        -:  663: * MPIDI_CH3U_Comm_FinishPending - Complete any pending operations on the 
        -:  664: * communicator.  
        -:  665: *
        -:  666: * Notes: 
        -:  667: * This should be used before freeing or disconnecting a communicator.
        -:  668: *
        -:  669: * For better scalability, we might want to form a list of VC's with 
        -:  670: * pending operations.
        -:  671: */
        -:  672:#undef FUNCNAME
        -:  673:#define FUNCNAME MPIDI_CH3U_Comm_FinishPending
        -:  674:#undef FCNAME
        -:  675:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  676:int MPIDI_CH3U_Comm_FinishPending( MPID_Comm *comm_ptr )
    #####:  677:{
    #####:  678:    int mpi_errno = MPI_SUCCESS;
        -:  679:    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_FINISHPENDING);
        -:  680:
        -:  681:    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_FINISHPENDING);
        -:  682:
    #####:  683:    mpi_errno = MPIDI_CH3U_VC_FinishPending( comm_ptr->vcrt );
    #####:  684:    if (!mpi_errno && comm_ptr->local_vcrt) {
    #####:  685:	mpi_errno = MPIDI_CH3U_VC_FinishPending( comm_ptr->local_vcrt );
        -:  686:    }
        -:  687:
        -:  688:    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_FINISHPENDING);
    #####:  689:    return mpi_errno;
        -:  690:}
        -:  691:
        -:  692:/* ----------------------------------------------------------------------- */
        -:  693:/* Routines to initialize a VC */
        -:  694:
        -:  695:/*
        -:  696: * The lpid counter counts new processes that this process knows about.
        -:  697: */
        -:  698:static int lpid_counter = 0;
        -:  699:
        -:  700:/* Fully initialize a VC.  This invokes the channel-specific 
        -:  701:   VC initialization routine MPIDI_CH3_VC_Init . */
        -:  702:int MPIDI_VC_Init( MPIDI_VC_t *vc, MPIDI_PG_t *pg, int rank )
    21989:  703:{
    21989:  704:    vc->state = MPIDI_VC_STATE_INACTIVE;
    21989:  705:    vc->handle  = HANDLE_SET_MPI_KIND(0, MPID_VCONN);
    21989:  706:    MPIU_Object_set_ref(vc, 0);
    21989:  707:    vc->pg      = pg;
    21989:  708:    vc->pg_rank = rank;
    21989:  709:    vc->lpid    = lpid_counter++;
    21989:  710:    vc->node_id = -1;
        -:  711:    MPIDI_VC_Init_seqnum_send(vc);
        -:  712:    MPIDI_VC_Init_seqnum_recv(vc);
    21989:  713:    vc->rndvSend_fn      = MPIDI_CH3_RndvSend;
    21989:  714:    vc->rndvRecv_fn      = MPIDI_CH3_RecvRndv;
    21989:  715:    vc->eager_max_msg_sz = MPIDI_CH3_EAGER_MAX_MSG_SIZE;
    21989:  716:    vc->sendNoncontig_fn = MPIDI_CH3_SendNoncontig_iov;
        -:  717:#ifdef ENABLE_COMM_OVERRIDES
        -:  718:    vc->comm_ops         = NULL;
        -:  719:#endif
        -:  720:    /* FIXME: We need a better abstraction for initializing the thread state 
        -:  721:       for an object */
        -:  722:#if MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_PER_OBJECT
        -:  723:    MPID_Thread_mutex_create(&vc->pobj_mutex,NULL)
        -:  724:#endif /* MPIU_THREAD_GRANULARITY */
    21989:  725:    MPIU_CALL(MPIDI_CH3,VC_Init( vc ));
        -:  726:    MPIU_DBG_PrintVCState(vc);
        -:  727:
    21989:  728:    return MPI_SUCCESS;
        -:  729:}
        -:  730:
        -:  731:/* ----------------------------------------------------------------------- */
        -:  732:/* Routines to vend topology information. */
        -:  733:
        -:  734:static MPID_Node_id_t g_num_nodes = 0;
        -:  735:char MPIU_hostname[MAX_HOSTNAME_LEN] = "_UNKNOWN_"; /* '_' is an illegal char for a hostname so */
        -:  736:                                                    /* this will never match */
        -:  737:
        -:  738:#undef FUNCNAME
        -:  739:#define FUNCNAME MPID_Get_node_id
        -:  740:#undef FCNAME
        -:  741:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  742:int MPID_Get_node_id(MPID_Comm *comm, int rank, MPID_Node_id_t *id_p)
  2258720:  743:{
  2258720:  744:    *id_p = comm->vcr[rank]->node_id;
  2258720:  745:    return MPI_SUCCESS;
        -:  746:}
        -:  747:
        -:  748:#undef FUNCNAME
        -:  749:#define FUNCNAME MPID_Get_max_node_id
        -:  750:#undef FCNAME
        -:  751:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  752:/* Providing a comm argument permits optimization, but this function is always
        -:  753:   allowed to return the max for the universe. */
        -:  754:int MPID_Get_max_node_id(MPID_Comm *comm, MPID_Node_id_t *max_id_p)
   686092:  755:{
        -:  756:    /* easiest way to implement this is to track it at PG create/destroy time */
   686092:  757:    *max_id_p = g_num_nodes - 1;
   686092:  758:    MPIU_Assert(*max_id_p >= 0);
   686092:  759:    return MPI_SUCCESS;
        -:  760:}
        -:  761:
        -:  762:#if !defined(USE_PMI2_API)
        -:  763:/* this function is not used in pmi2 */
        -:  764:static int publish_node_id(MPIDI_PG_t *pg, int our_pg_rank)
       82:  765:{
       82:  766:    int mpi_errno = MPI_SUCCESS;
        -:  767:    int pmi_errno;
        -:  768:    int ret;
        -:  769:    char *key;
        -:  770:    int key_max_sz;
        -:  771:    char *kvs_name;
       82:  772:    MPIU_CHKLMEM_DECL(1);
        -:  773:
        -:  774:    /* set MPIU_hostname */
       82:  775:    ret = gethostname(MPIU_hostname, MAX_HOSTNAME_LEN);
       82:  776:    MPIU_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", strerror(errno), errno);
       82:  777:    MPIU_hostname[MAX_HOSTNAME_LEN-1] = '\0';
        -:  778:
        -:  779:    /* Allocate space for pmi key */
       82:  780:    pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz);
       82:  781:    MPIU_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
        -:  782:
       82:  783:    MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
        -:  784:
       82:  785:    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
       82:  786:    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -:  787:
        -:  788:    /* Put my hostname id */
       82:  789:    if (pg->size > 1)
        -:  790:    {
       82:  791:        memset(key, 0, key_max_sz);
       82:  792:        MPIU_Snprintf(key, key_max_sz, "hostname[%d]", our_pg_rank);
        -:  793:
       82:  794:        pmi_errno = PMI_KVS_Put(kvs_name, key, MPIU_hostname);
       82:  795:        MPIU_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put", "**pmi_kvs_put %d", pmi_errno);
        -:  796:
       82:  797:        pmi_errno = PMI_KVS_Commit(kvs_name);
       82:  798:        MPIU_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit", "**pmi_kvs_commit %d", pmi_errno);
        -:  799:
       82:  800:        pmi_errno = PMI_Barrier();
       82:  801:        MPIU_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno);
        -:  802:    }
        -:  803:    
        -:  804:fn_exit:
       82:  805:    MPIU_CHKLMEM_FREEALL();
       82:  806:    return mpi_errno;
        -:  807:fn_fail:
        -:  808:    goto fn_exit;
        -:  809:}
        -:  810:#endif
        -:  811:
        -:  812:
        -:  813:#define parse_error() MPIU_ERR_INTERNALANDJUMP(mpi_errno, "parse error")
        -:  814:/* advance _c until we find a non whitespace character */
        -:  815:#define skip_space(_c) while (isspace(*(_c))) ++(_c)
        -:  816:/* return true iff _c points to a character valid as an indentifier, i.e., [-_a-zA-Z0-9] */
        -:  817:#define isident(_c) (isalnum(_c) || (_c) == '-' || (_c) == '_')
        -:  818:
        -:  819:/* give an error iff *_c != _e */
        -:  820:#define expect_c(_c, _e) do { if (*(_c) != _e) parse_error(); } while (0)
        -:  821:#define expect_and_skip_c(_c, _e) do { expect_c(_c, _e); ++c; } while (0)
        -:  822:/* give an error iff the first |_m| characters of the string _s are equal to _e */
        -:  823:#define expect_s(_s, _e) (strncmp(_s, _e, strlen(_e)) == 0 && !isident((_s)[strlen(_e)]))
        -:  824:
        -:  825:typedef enum {
        -:  826:    NULL_MAPPING = 0,
        -:  827:    VECTOR_MAPPING
        -:  828:} mapping_type_t;
        -:  829:
        -:  830:#define VECTOR "vector"
        -:  831:
        -:  832:typedef struct map_block
        -:  833:{
        -:  834:    int start_id;
        -:  835:    int count;
        -:  836:    int size;
        -:  837:} map_block_t;
        -:  838:
        -:  839:#undef FUNCNAME
        -:  840:#define FUNCNAME parse_mapping
        -:  841:#undef FCNAME
        -:  842:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -:  843:static int parse_mapping(char *map_str, mapping_type_t *type, map_block_t **map, int *nblocks)
     2854:  844:{
     2854:  845:    int mpi_errno = MPI_SUCCESS;
     2854:  846:    char *c = map_str, *d;
     2854:  847:    int num_blocks = 0;
        -:  848:    int i;
     2854:  849:    MPIU_CHKPMEM_DECL(1);
        -:  850:    MPIDI_STATE_DECL(MPID_STATE_PARSE_MAPPING);
        -:  851:
        -:  852:    MPIDI_FUNC_ENTER(MPID_STATE_PARSE_MAPPING);
        -:  853:
        -:  854:    /* parse string of the form:
        -:  855:       '(' <format> ',' '(' <num> ',' <num> ',' <num> ')' {',' '(' <num> ',' <num> ',' <num> ')'} ')'
        -:  856:
        -:  857:       the values of each 3-tuple have the following meaning (X,Y,Z):
        -:  858:         X - node id start value
        -:  859:         Y - number of nodes with size Z
        -:  860:         Z - number of processes assigned to each node
        -:  861:     */
        -:  862:    MPIU_DBG_MSG_S(CH3_OTHER,VERBOSE,"parsing mapping string '%s'", map_str);
        -:  863:
     2854:  864:    if (!strlen(map_str)) {
        -:  865:        /* An empty-string indicates an inability to determine or express the
        -:  866:         * process layout on the part of the process manager.  Consider this a
        -:  867:         * non-fatal error case. */
    #####:  868:        *type = NULL_MAPPING;
    #####:  869:        *map = NULL;
    #####:  870:        *nblocks = 0;
    #####:  871:        goto fn_exit;
        -:  872:    }
        -:  873:
    #####:  874:    skip_space(c);
     2854:  875:    expect_and_skip_c(c, '(');
     2854:  876:    skip_space(c);
        -:  877:
     2854:  878:    d = c;
     2854:  879:    if (expect_s(d, VECTOR))
     2854:  880:        *type = VECTOR_MAPPING;
        -:  881:    else
    #####:  882:        parse_error();
     2854:  883:    c += strlen(VECTOR);
     2854:  884:    skip_space(c);
        -:  885:
        -:  886:    /* first count the number of block descriptors */
     2854:  887:    d = c;
    31658:  888:    while (*d) {
    25950:  889:        if (*d == '(')
     2854:  890:            ++num_blocks;
    25950:  891:        ++d;
        -:  892:    }
        -:  893:
     2854:  894:    MPIU_CHKPMEM_MALLOC(*map, map_block_t *, sizeof(map_block_t) * num_blocks, mpi_errno, "map");
        -:  895:
        -:  896:    /* parse block descriptors */
     5708:  897:    for (i = 0; i < num_blocks; ++i) {
     2854:  898:        expect_and_skip_c(c, ',');
     2854:  899:        skip_space(c);
        -:  900:
     2854:  901:        expect_and_skip_c(c, '(');
     2854:  902:        skip_space(c);
        -:  903:
     2854:  904:        if (!isdigit(*c))
    #####:  905:            parse_error();
     2854:  906:        (*map)[i].start_id = strtol(c, &c, 0);
     2854:  907:        skip_space(c);
        -:  908:
     2854:  909:        expect_and_skip_c(c, ',');
     2854:  910:        skip_space(c);
        -:  911:
     2854:  912:        if (!isdigit(*c))
    #####:  913:            parse_error();
     2854:  914:        (*map)[i].count = strtol(c, &c, 0);
     2854:  915:        skip_space(c);
        -:  916:
     2854:  917:        expect_and_skip_c(c, ',');
     2854:  918:        skip_space(c);
        -:  919:
     2854:  920:        if (!isdigit(*c))
    #####:  921:            parse_error();
     2854:  922:        (*map)[i].size = strtol(c, &c, 0);
        -:  923:
     2854:  924:        expect_and_skip_c(c, ')');
     2854:  925:        skip_space(c);
        -:  926:    }
        -:  927:
     2854:  928:    expect_and_skip_c(c, ')');
        -:  929:
     2854:  930:    *nblocks = num_blocks;
     2854:  931:    MPIU_CHKPMEM_COMMIT();
     2854:  932:fn_exit:
        -:  933:    MPIDI_FUNC_EXIT(MPID_STATE_PARSE_MAPPING);
     2854:  934:    return mpi_errno;
        -:  935:fn_fail:
    #####:  936:    MPIU_CHKPMEM_REAP();
        -:  937:    goto fn_exit;
        -:  938:}
        -:  939:
        -:  940:#if 0
        -:  941:static void t(const char *s, int nprocs)
        -:  942:{
        -:  943:    int ret;
        -:  944:    map_block_t *mb;
        -:  945:    int nblocks=0;
        -:  946:    int i;
        -:  947:    mapping_type_t mt = -1;
        -:  948:    int rank;
        -:  949:    int block, block_node, node_proc;
        -:  950:
        -:  951:    ret = parse_mapping(strdup(s), &mt, &mb, &nblocks);
        -:  952:    printf("str=\"%s\" type=%d ret=%d\n", s, mt, ret);
        -:  953:    if (ret) return;
        -:  954:    for (i = 0; i < nblocks; ++i)
        -:  955:        printf("    %d: start=%d size=%d count=%d\n", i, mb[i].start_id, mb[i].size, mb[i].count);
        -:  956:    printf("\n");
        -:  957:
        -:  958:
        -:  959:    rank = 0;
        -:  960:    while (rank < nprocs) {
        -:  961:        int node_id;
        -:  962:        for (block = 0; block < nblocks; ++block) {
        -:  963:            node_id = mb[block].start_id;
        -:  964:            for (block_node = 0; block_node < mb[block].count; ++block_node) {
        -:  965:                for (node_proc = 0; node_proc < mb[block].size; ++node_proc) {
        -:  966:                    printf("    %d  %d\n", rank, node_id);
        -:  967:                    ++rank;
        -:  968:                    if (rank == nprocs)
        -:  969:                        goto done;
        -:  970:                }
        -:  971:                ++node_id;
        -:  972:            }
        -:  973:        }
        -:  974:    }
        -:  975:done:
        -:  976:    return;
        -:  977:
        -:  978:}
        -:  979:
        -:  980:
        -:  981: void test_parse_mapping(void)
        -:  982:{
        -:  983:    t("(vector, (0,1,1))", 5);
        -:  984:    t("(vector, (0,1,1), (1,5,3), (6,2, 5))", 100);
        -:  985:    t("(vector, (1,1,1), (0,2,2))", 5);
        -:  986:    
        -:  987:    t("(vector, (1,1,1), (0,2,2),)", 5);
        -:  988:    t("XXX, (1,1))", 1);
        -:  989:    t("vector, (1,1))", 1);
        -:  990:    t("(vector, (1.11, 2,2))", 1);
        -:  991:    t("", 1);
        -:  992:
        -:  993:}
        -:  994:
        -:  995:
        -:  996:#endif
        -:  997:
        -:  998:#undef FUNCNAME
        -:  999:#define FUNCNAME populate_ids_from_mapping
        -: 1000:#undef FCNAME
        -: 1001:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -: 1002:static int populate_ids_from_mapping(char *mapping, int *num_nodes, MPIDI_PG_t *pg, int *did_map)
     2854: 1003:{
     2854: 1004:    int mpi_errno = MPI_SUCCESS;
        -: 1005:    /* PMI_process_mapping is available */
     2854: 1006:    mapping_type_t mt = -1;
     2854: 1007:    map_block_t *mb = NULL;
     2854: 1008:    int nblocks = 0;
        -: 1009:    int rank;
        -: 1010:    int block, block_node, node_proc;
        -: 1011:
     2854: 1012:    *did_map = 1; /* reset upon failure */
        -: 1013:
     2854: 1014:    mpi_errno = parse_mapping(mapping, &mt, &mb, &nblocks);
     2854: 1015:    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1016:
     2854: 1017:    if (NULL_MAPPING == mt) goto fn_fail;
     2854: 1018:    MPIU_ERR_CHKINTERNAL(mt != VECTOR_MAPPING, mpi_errno, "unsupported mapping type");
        -: 1019:
     2854: 1020:    rank = 0;
        -: 1021:    /* for a representation like (block,N,(1,1)) this while loop causes us to
        -: 1022:     * re-use that sole map block over and over until we have assigned node
        -: 1023:     * ids to every process */
     5708: 1024:    while (rank < pg->size) {
     2854: 1025:        for (block = 0; block < nblocks; ++block) {
     2854: 1026:            int node_id = mb[block].start_id;
     2854: 1027:            for (block_node = 0; block_node < mb[block].count; ++block_node) {
     2854: 1028:                if (node_id > *num_nodes)
    #####: 1029:                    *num_nodes = node_id;
        -: 1030:
    15383: 1031:                for (node_proc = 0; node_proc < mb[block].size; ++node_proc) {
    15383: 1032:                    pg->vct[rank].node_id = node_id;
    15383: 1033:                    ++rank;
    15383: 1034:                    if (rank == pg->size)
     2854: 1035:                        goto fn_exit;
        -: 1036:                }
    #####: 1037:                ++node_id;
        -: 1038:            }
        -: 1039:        }
        -: 1040:    }
        -: 1041:
     2854: 1042:fn_exit:
     2854: 1043:    ++(*num_nodes); /* add one to get the num instead of the max */
     2854: 1044:    MPIU_Free(mb);
     2854: 1045:    return mpi_errno;
    #####: 1046:fn_fail:
    #####: 1047:    *did_map = 0;
    #####: 1048:    goto fn_exit;
        -: 1049:}
        -: 1050:
        -: 1051:/* Fills in the node_id info from PMI info.  Adapted from MPIU_Get_local_procs.
        -: 1052:   This function is collective over the entire PG because PMI_Barrier is called.
        -: 1053:
        -: 1054:   our_pg_rank should be set to -1 if this is not the current process' PG.  This
        -: 1055:   is currently not supported due to PMI limitations.
        -: 1056:
        -: 1057:   Fallback Algorithm:
        -: 1058:
        -: 1059:   Each process kvs_puts its hostname and stores the total number of
        -: 1060:   processes (g_num_global).  Each process determines the number of nodes
        -: 1061:   (g_num_nodes) and assigns a node id to each process (g_node_ids[]):
        -: 1062:
        -: 1063:     For each hostname the process seaches the list of unique nodes
        -: 1064:     names (node_names[]) for a match.  If a match is found, the node id
        -: 1065:     is recorded for that matching process.  Otherwise, the hostname is
        -: 1066:     added to the list of node names.
        -: 1067:*/
        -: 1068:#undef FUNCNAME
        -: 1069:#define FUNCNAME MPIDI_Populate_vc_node_ids
        -: 1070:#undef FCNAME
        -: 1071:#define FCNAME MPIDI_QUOTE(FUNCNAME)
        -: 1072:int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
     3459: 1073:{
     3459: 1074:    int mpi_errno = MPI_SUCCESS;
        -: 1075:    int pmi_errno;
        -: 1076:    int ret;
        -: 1077:    int val;
        -: 1078:    int i, j;
        -: 1079:    char *key;
        -: 1080:    char *value;
        -: 1081:    int key_max_sz;
        -: 1082:    int val_max_sz;
        -: 1083:    char *kvs_name;
        -: 1084:    char **node_names;
        -: 1085:    char *node_name_buf;
     3459: 1086:    int no_local = 0;
     3459: 1087:    int odd_even_cliques = 0;
     3459: 1088:    int pmi_version = MPIU_DEFAULT_PMI_VERSION, pmi_subversion = MPIU_DEFAULT_PMI_SUBVERSION;
     3459: 1089:    MPIU_CHKLMEM_DECL(4);
        -: 1090:
        -: 1091:    /* See if the user wants to override our default values */
     3459: 1092:    MPIU_GetEnvInt("PMI_VERSION", &pmi_version);
     3459: 1093:    MPIU_GetEnvInt("PMI_SUBVERSION", &pmi_subversion);
        -: 1094:
     3459: 1095:    if (pg->size == 1) {
      523: 1096:        pg->vct[0].node_id = g_num_nodes++;
      523: 1097:        goto fn_exit;
        -: 1098:    }
        -: 1099:
        -: 1100:    /* Used for debugging only.  This disables communication over shared memory */
        -: 1101:#ifdef ENABLED_NO_LOCAL
        -: 1102:    no_local = 1;
        -: 1103:#else
     2936: 1104:    ret = MPIU_GetEnvBool("MPICH_NO_LOCAL", &val);
     2936: 1105:    if (ret == 1 && val)
    #####: 1106:        no_local = 1;
        -: 1107:#endif
        -: 1108:
        -: 1109:    /* Used for debugging on a single machine: Odd procs on a node are
        -: 1110:       seen as local to each other, and even procs on a node are seen
        -: 1111:       as local to each other. */
        -: 1112:#ifdef ENABLED_ODD_EVEN_CLIQUES
        -: 1113:    odd_even_cliques = 1;
        -: 1114:#else
     2936: 1115:    ret = MPIU_GetEnvBool("MPICH_ODD_EVEN_CLIQUES", &val);
     2936: 1116:    if (ret == 1 && val)
    #####: 1117:        odd_even_cliques = 1;
        -: 1118:#endif
        -: 1119:
     2936: 1120:    if (no_local) {
        -: 1121:        /* just assign 0 to n-1 as node ids and bail */
    #####: 1122:        for (i = 0; i < pg->size; ++i) {
    #####: 1123:            pg->vct[i].node_id = g_num_nodes++;
        -: 1124:        }
        -: 1125:        goto fn_exit;
        -: 1126:    }
        -: 1127:
        -: 1128:#ifdef USE_PMI2_API
        -: 1129:#if 0 /* use nodeid list */
        -: 1130:    {
        -: 1131:        int *node_ids;
        -: 1132:        int outlen;
        -: 1133:        int found = FALSE;
        -: 1134:        MPIU_CHKLMEM_MALLOC(node_ids, int *, pg->size * sizeof(int), mpi_errno, "node_ids");
        -: 1135:
        -: 1136:        mpi_errno = PMI2_Info_GetJobAttrIntArray("nodeIDs", node_ids, pg->size, &outlen, &found);
        -: 1137:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1138:        MPIU_ERR_CHKINTERNAL(!found, mpi_errno, "nodeIDs attribute not found");
        -: 1139:        MPIU_ERR_CHKINTERNAL(outlen != pg->size, mpi_errno, "did not receive enough nodeids");
        -: 1140:        g_num_nodes = 0;
        -: 1141:        for (i = 0; i < pg->size; ++i) {
        -: 1142:            pg->vct[i].node_id = node_ids[i];
        -: 1143:            if (g_num_nodes < node_ids[i])
        -: 1144:                g_num_nodes = node_ids[i];
        -: 1145:        }
        -: 1146:
        -: 1147:        ++g_num_nodes;
        -: 1148:
        -: 1149:        /* FIXME: need to handle oddeven cliques DARIUS */
        -: 1150:    }
        -: 1151:#else
        -: 1152:    {
        -: 1153:        char process_mapping[PMI2_MAX_VALLEN];
        -: 1154:        int outlen;
        -: 1155:        int found = FALSE;
        -: 1156:        int i;
        -: 1157:        map_block_t *mb;
        -: 1158:        int nblocks;
        -: 1159:        int rank;
        -: 1160:        int block, block_node, node_proc;
        -: 1161:        int did_map = 0;
        -: 1162:        int num_nodes = 0;
        -: 1163:
        -: 1164:        mpi_errno = PMI2_Info_GetJobAttr("PMI_process_mapping", process_mapping, sizeof(process_mapping), &found);
        -: 1165:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1166:        MPIU_ERR_CHKINTERNAL(!found, mpi_errno, "PMI_process_mapping attribute not found");
        -: 1167:        /* this code currently assumes pg is comm_world */
        -: 1168:        mpi_errno = populate_ids_from_mapping(process_mapping, &num_nodes, pg, &did_map);
        -: 1169:        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1170:        MPIU_ERR_CHKINTERNAL(!did_map, mpi_errno, "unable to populate node ids from PMI_process_mapping");
        -: 1171:        g_num_nodes = num_nodes;
        -: 1172:    }
        -: 1173:#endif
        -: 1174:#else /* USE_PMI2_API */
     2936: 1175:    if (our_pg_rank == -1) {
        -: 1176:        /* FIXME this routine can't handle the dynamic process case at this
        -: 1177:           time.  This will require more support from the process manager. */
    #####: 1178:        MPIU_Assert(0);
        -: 1179:    }
        -: 1180:
        -: 1181:    /* Allocate space for pmi key and value */
     2936: 1182:    pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz);
     2936: 1183:    MPIU_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
     2936: 1184:    MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
        -: 1185:
     2936: 1186:    pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
     2936: 1187:    MPIU_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
     2936: 1188:    MPIU_CHKLMEM_MALLOC(value, char *, val_max_sz, mpi_errno, "value");
        -: 1189:
     2936: 1190:    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     2936: 1191:    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1192:
        -: 1193:    /* See if process manager supports PMI_process_mapping keyval */
        -: 1194:
        -: 1195:    /* FIXME 'PMI_process_mapping' only applies for the original PG (MPI_COMM_WORLD) */
     2936: 1196:    if (pmi_version == 1 && pmi_subversion == 1) {
     2936: 1197:        pmi_errno = PMI_KVS_Get(kvs_name, "PMI_process_mapping", value, val_max_sz);
     2936: 1198:        if (pmi_errno == 0) {
     2854: 1199:            int did_map = 0;
     2854: 1200:            int num_nodes = 0;
        -: 1201:            /* this code currently assumes pg is comm_world */
     2854: 1202:            mpi_errno = populate_ids_from_mapping(value, &num_nodes, pg, &did_map);
     2854: 1203:            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     2854: 1204:            g_num_nodes = num_nodes;
     2854: 1205:            if (did_map) {
     2854: 1206:                goto fn_exit;
        -: 1207:            }
        -: 1208:            else {
        -: 1209:                MPIU_DBG_MSG_S(CH3_OTHER,TERSE,"did_map==0, unable to populate node ids from mapping=%s",value);
        -: 1210:            }
        -: 1211:            /* else fall through to O(N^2) PMI_KVS_Gets version */
        -: 1212:        }
        -: 1213:        else {
        -: 1214:            MPIU_DBG_MSG(CH3_OTHER,TERSE,"unable to obtain the 'PMI_process_mapping' PMI key");
        -: 1215:        }
        -: 1216:    }
        -: 1217:
       82: 1218:    mpi_errno = publish_node_id(pg, our_pg_rank);
       82: 1219:    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        -: 1220:
        -: 1221:    /* Allocate temporary structures.  These would need to be persistent if
        -: 1222:       we somehow were able to support dynamic processes via this method. */
       82: 1223:    MPIU_CHKLMEM_MALLOC(node_names, char **, pg->size * sizeof(char*), mpi_errno, "node_names");
       82: 1224:    MPIU_CHKLMEM_MALLOC(node_name_buf, char *, pg->size * key_max_sz * sizeof(char), mpi_errno, "node_name_buf");
        -: 1225:
        -: 1226:    /* Gather hostnames */
      304: 1227:    for (i = 0; i < pg->size; ++i)
        -: 1228:    {
      222: 1229:        node_names[i] = &node_name_buf[i * key_max_sz];
      222: 1230:        node_names[i][0] = '\0';
        -: 1231:    }
        -: 1232:
      304: 1233:    for (i = 0; i < pg->size; ++i)
        -: 1234:    {
      222: 1235:        if (i == our_pg_rank)
        -: 1236:        {
        -: 1237:            /* This is us, no need to perform a get */
       82: 1238:            MPIU_Snprintf(node_names[g_num_nodes], key_max_sz, "%s", MPIU_hostname);
        -: 1239:        }
        -: 1240:        else
        -: 1241:        {
      140: 1242:            memset(key, 0, key_max_sz);
      140: 1243:            MPIU_Snprintf(key, key_max_sz, "hostname[%d]", i);
        -: 1244:
      140: 1245:            pmi_errno = PMI_KVS_Get(kvs_name, key, node_names[g_num_nodes], key_max_sz);
      140: 1246:            MPIU_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get", "**pmi_kvs_get %d", pmi_errno);
        -: 1247:        }
        -: 1248:
        -: 1249:        /* Find the node_id for this process, or create a new one */
        -: 1250:        /* FIXME:need a better algorithm -- this one does O(N^2) strncmp()s! */
        -: 1251:        /* The right fix is to get all this information from the process
        -: 1252:           manager, rather than bother with this hostname hack at all. */
      222: 1253:        for (j = 0; j < g_num_nodes; ++j)
      140: 1254:            if (!strncmp(node_names[j], node_names[g_num_nodes], key_max_sz))
      140: 1255:                break;
      222: 1256:        if (j == g_num_nodes)
       82: 1257:            ++g_num_nodes;
        -: 1258:        else
      140: 1259:            node_names[g_num_nodes][0] = '\0';
      222: 1260:        pg->vct[i].node_id = j;
        -: 1261:    }
        -: 1262:
       82: 1263:    if (odd_even_cliques)
        -: 1264:    {
        -: 1265:        /* Create new processes for all odd numbered processes. This
        -: 1266:           may leave nodes ids with no processes assigned to them, but
        -: 1267:           I think this is OK */
    #####: 1268:        for (i = 0; i < pg->size; ++i)
    #####: 1269:            if (i & 0x1)
    #####: 1270:                pg->vct[i].node_id += g_num_nodes;
    #####: 1271:        g_num_nodes *= 2;
        -: 1272:    }
        -: 1273:#endif
        -: 1274:
        -: 1275:fn_exit:
     6036: 1276:    MPIU_CHKLMEM_FREEALL();
     3459: 1277:    return mpi_errno;
        -: 1278:fn_fail:
        -: 1279:    goto fn_exit;
        -: 1280:}
        -: 1281: