-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/topo/dims_create.c
        -:    0:Graph:dims_create.gcno
        -:    0:Data:dims_create.gcda
        -:    0:Runs:675
        -:    0:Programs:179
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:/*
        -:    3: *  (C) 2001 by Argonne National Laboratory.
        -:    4: *      See COPYRIGHT in top-level directory.
        -:    5: */
        -:    6:
        -:    7:#include "mpiimpl.h"
        -:    8:#include "topo.h"
        -:    9:
        -:   10:/* -- Begin Profiling Symbol Block for routine MPI_Dims_create */
        -:   11:#if defined(HAVE_PRAGMA_WEAK)
        -:   12:#pragma weak MPI_Dims_create = PMPI_Dims_create
        -:   13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
        -:   14:#pragma _HP_SECONDARY_DEF PMPI_Dims_create  MPI_Dims_create
        -:   15:#elif defined(HAVE_PRAGMA_CRI_DUP)
        -:   16:#pragma _CRI duplicate MPI_Dims_create as PMPI_Dims_create
        -:   17:#endif
        -:   18:/* -- End Profiling Symbol Block */
        -:   19:
        -:   20:/* Because we store factors with their multiplicities, a small array can
        -:   21:   store all of the factors for a large number (grows *faster* than n 
        -:   22:   factorial). */
        -:   23:#define MAX_FACTORS 10
        -:   24:/* 2^20 is a millon */
        -:   25:#define MAX_DIMS    20
        -:   26:
        -:   27:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
        -:   28:   the MPI routines.  You can use USE_WEAK_SYMBOLS to see if MPICH is
        -:   29:   using weak symbols to implement the MPI routines. */
        -:   30:typedef struct Factors { int val, cnt; } Factors;
        -:   31:/* This routine may be global if we are not using weak symbols */
        -:   32:PMPI_LOCAL int MPIR_Factor( int, Factors [], int * );
        -:   33:PMPI_LOCAL int MPIR_ChooseFactors( int, Factors [], int, int, int [] );
        -:   34:
        -:   35:#ifndef MPICH_MPI_FROM_PMPI
        -:   36:#undef MPI_Dims_create
        -:   37:#define MPI_Dims_create PMPI_Dims_create
        -:   38:
        -:   39:/* Return the factors of n and their multiplicity in factors; the number of 
        -:   40:   distinct factors is the return value and the total number of factors,
        -:   41:   including multiplicities, is returned in ndivisors */
        -:   42:#define NUM_PRIMES 168
        -:   43:  static int primes[NUM_PRIMES] = 
        -:   44:	   {2,    3,    5,    7,   11,   13,   17,   19,   23,   29, 
        -:   45:	   31,   37,   41,   43,   47,   53,   59,   61,   67,   71, 
        -:   46:	   73,   79,   83,   89,   97,  101,  103,  107,  109,  113, 
        -:   47:	  127,  131,  137,  139,  149,  151,  157,  163,  167,  173, 
        -:   48:	  179,  181,  191,  193,  197,  199,  211,  223,  227,  229, 
        -:   49:	  233,  239,  241,  251,  257,  263,  269,  271,  277,  281, 
        -:   50:	  283,  293,  307,  311,  313,  317,  331,  337,  347,  349, 
        -:   51:	  353,  359,  367,  373,  379,  383,  389,  397,  401,  409, 
        -:   52:	  419,  421,  431,  433,  439,  443,  449,  457,  461,  463, 
        -:   53:	  467,  479,  487,  491,  499,  503,  509,  521,  523,  541, 
        -:   54:	  547,  557,  563,  569,  571,  577,  587,  593,  599,  601, 
        -:   55:	  607,  613,  617,  619,  631,  641,  643,  647,  653,  659, 
        -:   56:	  661,  673,  677,  683,  691,  701,  709,  719,  727,  733, 
        -:   57:	  739,  743,  751,  757,  761,  769,  773,  787,  797,  809, 
        -:   58:	  811,  821,  823,  827,  829,  839,  853,  857,  859,  863, 
        -:   59:	  877,  881,  883,  887,  907,  911,  919,  929,  937,  941, 
        -:   60:	  947,  953,  967,  971,  977,  983,  991,  997};
        -:   61:
        -:   62:PMPI_LOCAL int MPIR_Factor( int n, Factors factors[], int *ndivisors )
      586:   63:{
        -:   64:    int n_tmp, n_root;
      586:   65:    int i, nfactors=0, nall=0;
        -:   66:    int cnt;
        -:   67:
        -:   68:    /* Start from an approximate of the square root of n, by first finding
        -:   69:       the power of 2 at least as large as n.  The approximate root is then
        -:   70:       2 to the 1/2 this power */
      586:   71:    n_tmp  = n;
      586:   72:    n_root = 0;
     3537:   73:    while (n_tmp) {
     2365:   74:	n_root ++;
     2365:   75:	n_tmp >>= 1;
        -:   76:    }
      586:   77:    n_root = 1 << (n_root / 2);
        -:   78:
        -:   79:    /* Find the prime number that less than that value and try dividing
        -:   80:       out the primes.  */
     1872:   81:    for (i=0; i<NUM_PRIMES; i++) {
     1872:   82:	if (primes[i] > n_root) break;
        -:   83:    }
        -:   84:
        -:   85:    /* For each prime, divide out as many as possible */
     1872:   86:    for (;i>=0;i--) {
     1872:   87:	cnt = 0;
     4786:   88:	while ( (n %  primes[i]) == 0) {
     1042:   89:	    cnt ++;
     1042:   90:	    n = n / primes[i];
        -:   91:	}
     1872:   92:	if (cnt > 0) {
        -:   93:	    /* --BEGIN ERROR HANDLING-- */
      694:   94:	    if (nfactors + 1 == MAX_FACTORS) {
        -:   95:		/* Time to panic.  This should not happen, since the
        -:   96:		   smallest number that could exceed this would
        -:   97:		   be the product of the first 10 primes that are
        -:   98:		   greater than one, which is 6469693230 */
    #####:   99:		return nfactors;
        -:  100:	    }
        -:  101:	    /* --END ERROR HANDLING-- */
      694:  102:	    factors[nfactors].val = primes[i];
      694:  103:	    factors[nfactors++].cnt = cnt;
      694:  104:	    nall += cnt;
        -:  105:	}
        -:  106:    }
        -:  107:    /* If nfactors == 0, n was a prime, so return that */
      586:  108:    if (nfactors == 0) {
      106:  109:	nfactors = 1;
      106:  110:        nall = 1;
      106:  111:        factors[0].val = n;
      106:  112:        factors[0].cnt = 1;
        -:  113:    }
      480:  114:    else if (n > 1) {
        -:  115:	/* We need one more factor (a single prime > n_root) */
       60:  116:	factors[nfactors].val   = n;
       60:  117:	factors[nfactors++].cnt = 1;
       60:  118:	nall++;
        -:  119:    }
      586:  120:    *ndivisors = nall;
      586:  121:    return nfactors;
        -:  122:}
        -:  123:
        -:  124:/* 
        -:  125:   Given a collection of factors from the factors routine and a number of
        -:  126:   required values, combine the elements in factors into "needed" elements
        -:  127:   of the array chosen.  These are non-increasing and so can be used directly
        -:  128:   in setting values in the dims array in MPIR_Dims_create.
        -:  129:
        -:  130:   Algorithm (very simple)
        -:  131:
        -:  132:   target_size = nnodes / ndims needed.
        -:  133:   Accumulate factors, starting from the bottom,
        -:  134:   until the target size is met or exceeded.
        -:  135:   Put all of the remaining factors into the last dimension
        -:  136:   (recompute target_size with each step, since we may
        -:  137:   miss the target by a wide margin.
        -:  138:   
        -:  139:   A much more sophisticated code would try to balance
        -:  140:   the number of nodes assigned to each dimension, possibly
        -:  141:   in concert with guidelines from the device about "good"
        -:  142:   sizes
        -:  143:
        -:  144: */
        -:  145:/* FIXME: This routine does not work well with non-powers of two (and
        -:  146:   probably with collections of different factors.  For example, 
        -:  147:   factoring squares like 6*6 or 7*7 in to 2 dimensions doesn't yield the
        -:  148:   expected results (18,2) and (49,1) in current tests */
        -:  149:
        -:  150:/* 
        -:  151:   To give more reasonable values of the chosen factors (in the chosen array)
        -:  152:   from the factors, we first distribute the factors among the dimensions
        -:  153:   equally.
        -:  154:
        -:  155:   Note that since this is only used in the case where factors are the
        -:  156:   factors of nnnodes, we don't need to use the value of nnodes.
        -:  157: */
        -:  158:PMPI_LOCAL int MPIR_ChooseFactors( int nfactors, Factors factors[], 
        -:  159:				   int nnodes, int needed, int chosen[] )
      558:  160:{
        -:  161:    int i, j;
        -:  162:
        -:  163:    /* Initialize the chosen factors to all 1 */
     2098:  164:    for (i=0; i<needed; i++) {
     1540:  165:	chosen[i] = 1;
        -:  166:    }
        -:  167:    /* For each of the available factors, there are factors[].cnt 
        -:  168:       copies of each of the unique factors.  These are assigned in
        -:  169:       modified round robin fashion to each of the "chosen" entries.  
        -:  170:       The modification is to combine with the smallest current term
        -:  171:       Note that the factors are in decreasing order. */
        -:  172:
      558:  173:    i = 0;  /* We don't reset the count so that the factors are 
        -:  174:	       distributed among the dimensions */
     1390:  175:    for (j=0; j<nfactors; j++) {
      832:  176:	int cnt = factors[j].cnt;
      832:  177:	int val = factors[j].val;
        -:  178:	/* For each of the factors that we add, try to keep the 
        -:  179:	   entries balanced. */
     2776:  180:	while (cnt--) {
        -:  181:	    /* Find the smallest current entry */
        -:  182:	    int ii, cMin, iMin;
     1112:  183:	    iMin = 0;
     1112:  184:	    cMin = chosen[0];
     3150:  185:	    for (ii=1; ii<needed; ii++) {
     2038:  186:		if (chosen[ii] < cMin) {
      501:  187:		    cMin = chosen[ii];
      501:  188:		    iMin = ii;
        -:  189:		}
        -:  190:	    }
     1112:  191:	    if (chosen[i] > iMin) i = iMin;
        -:  192:
     1112:  193:	    chosen[i] *= val;
     1112:  194:	    i++;
     1112:  195:	    if (i >= needed) i = 0;
        -:  196:	}
        -:  197:    }
        -:  198:
        -:  199:    /* Second, sort the chosen array in non-increasing order.  Use
        -:  200:       a simple bubble sort because the number of elements is always small */
     1540:  201:    for (i=0; i<needed-1; i++) {
     2821:  202:	for (j=i+1; j<needed; j++) {
     1839:  203:	    if (chosen[j] > chosen[i]) {
       61:  204:		int tmp = chosen[i];
       61:  205:		chosen[i] = chosen[j];
       61:  206:		chosen[j] = tmp;
        -:  207:	    }
        -:  208:	}
        -:  209:    }
      558:  210:    return 0;
        -:  211:}
        -:  212:
        -:  213:int MPIR_Dims_create( int nnodes, int ndims, int *dims )
      601:  214:{
        -:  215:    Factors factors[MAX_FACTORS];
        -:  216:    int chosen[MAX_DIMS];
        -:  217:    int i, j, mpi_errno;
      601:  218:    int dims_needed, dims_product, nfactors, ndivisors=0;
        -:  219:    
        -:  220:    /* Find the number of unspecified dimensions in dims and the product
        -:  221:       of the positive values in dims */
      601:  222:    dims_needed  = 0;
      601:  223:    dims_product = 1;
     2282:  224:    for (i=0; i<ndims; i++) {
     1683:  225:	if (dims[i] < 0) {
        2:  226:	    mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, 
        -:  227:					      MPIR_ERR_RECOVERABLE,
        -:  228:					      "MPIR_Dims_create", __LINE__,
        -:  229:					      MPI_ERR_DIMS, 
        -:  230:			     "**argarrayneg", 
        -:  231:			     "**argarrayneg %s %d %d", "dims", i, dims[i]);
        2:  232:	    return mpi_errno;
        -:  233:	}
     1681:  234:	if (dims[i] == 0) dims_needed ++;
       84:  235:	else dims_product *= dims[i];
        -:  236:    }
        -:  237:
        -:  238:    /* Can we factor nnodes by dims_product? */
      599:  239:    if ((nnodes / dims_product ) * dims_product != nnodes ) {
        3:  240:	mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
        -:  241:					  "MPIR_Dims_create", __LINE__,
        -:  242:					  MPI_ERR_DIMS, "**dimspartition", 0);
        3:  243:	return mpi_errno;
        -:  244:    }
        -:  245:
      596:  246:    if (!dims_needed) {
        -:  247:	/* Special case - all dimensions provided */
       10:  248:	return MPI_SUCCESS;
        -:  249:    }
        -:  250:    
      586:  251:    if (dims_needed > MAX_DIMS) {
        -:  252:	/* --BEGIN ERROR HANDLING-- */
    #####:  253:	mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, 
        -:  254:		  MPIR_ERR_RECOVERABLE,
        -:  255:		  "MPIR_Dims_create", __LINE__,  MPI_ERR_DIMS, 
        -:  256:		  "**dimsmany", "**dimsmany %d %d", dims_needed, MAX_DIMS );
    #####:  257:	return mpi_errno;
        -:  258:	/* --END ERROR HANDLING-- */
        -:  259:    }
        -:  260:
      586:  261:    nnodes /= dims_product;
        -:  262:
        -:  263:    /* Now, factor nnodes into dims_needed components.  We'd like these
        -:  264:       to match the underlying machine topology as much as possible.  In the
        -:  265:       absence of information about the machine topology, we can try to 
        -:  266:       make the factors a close to each other as possible.  
        -:  267:
        -:  268:       The MPICH 1 version used donated code that was quite sophisticated
        -:  269:       and complex.  However, since it didn't take the system topology
        -:  270:       into account, it was more sophisticated that was perhaps warranted.
        -:  271:       In addition, useful values of nnodes for most MPI programs will be
        -:  272:       of the order 10-10000, and powers of two will be common.
        -:  273:    */
        -:  274:
        -:  275:    /* Get the factors */
      586:  276:    nfactors = MPIR_Factor( nnodes, factors, &ndivisors );
        -:  277:
        -:  278:    /* Divide into 3 major cases:
        -:  279:       1. Fewer divisors than needed dimensions.  Just use all of the
        -:  280:          factors up, setting the remaining dimensions to 1
        -:  281:       2. Only one distinct factor (typically 2) but with greater 
        -:  282:          multiplicity.  Give each dimension a nearly equal size
        -:  283:       3. Other.  There are enough factors to divide among the dimensions.
        -:  284:          This is done in an ad hoc fashion
        -:  285:    */
        -:  286:
        -:  287:/* DEBUG 
        -:  288:    printf( "factors are (%d of them) with %d divisors\n", nfactors, ndivisors );
        -:  289:    for (j=0; j<nfactors; j++) {
        -:  290:	printf( "val = %d repeated %d\n", factors[j].val, factors[j].cnt );
        -:  291:    }
        -:  292:*/
        -:  293:    /* The MPI spec requires that the values that are set be in nonincreasing
        -:  294:       order (MPI-1, section 6.5).  */
        -:  295:
        -:  296:    /* Distribute the factors among the dimensions */
      586:  297:    if (ndivisors <= dims_needed) {
        -:  298:	/* Just use the factors as needed.  */
      487:  299:	MPIR_ChooseFactors( nfactors, factors, nnodes, dims_needed, chosen );
      487:  300:	j = 0;
     1958:  301:	for (i=0; i<ndims; i++) {
     1471:  302:	    if (dims[i] == 0) {
     1425:  303:		dims[i] = chosen[j++];
        -:  304:	    }
        -:  305:	}
        -:  306:#if 0
        -:  307:	/* Any remaining unset dims are set to one */
        -:  308:	for (i++;i<ndims; i++) {
        -:  309:	    if (dims[i] == 0) 
        -:  310:		dims[i] = 1;
        -:  311:	}
        -:  312:#endif
        -:  313:    }
        -:  314:    else {
        -:  315:	/* We must combine some of the factors */
        -:  316:	/* This is what the fancy code is for in the MPICH-1 code.
        -:  317:	   If the number of distinct factors is 1 (e.g., a power of 2),
        -:  318:	   then this code can be much simpler */
        -:  319:	/* NOT DONE */
        -:  320:	/* FIXME */
       99:  321:	if (nfactors == 1) {
        -:  322:	    /* Special case for k**n, such as powers of 2 */
       28:  323:	    int factor = factors[0].val;
       28:  324:	    int cnt    = factors[0].cnt; /* Numver of factors left */
       28:  325:	    int cnteach = ( cnt + dims_needed - 1 ) / dims_needed;
        -:  326:	    int factor_each;
        -:  327:	    
       28:  328:	    factor_each = factor;
       28:  329:	    for (i=1; i<cnteach; i++) factor_each *= factor;
        -:  330:
       80:  331:	    for (i=0; i<ndims; i++) {
       52:  332:		if (dims[i] == 0) {
       52:  333:		    if (cnt > cnteach) {
       18:  334:			dims[i] = factor_each;
       18:  335:			cnt -= cnteach;
        -:  336:		    }
       34:  337:		    else if (cnt > 0) {
       28:  338:			factor_each = factor;
       59:  339:			for (j=1; j<cnt; j++) 
       31:  340:			    factor_each *= factor;
       28:  341:			dims[i] = factor_each;
       28:  342:			cnt = 0;
        -:  343:		    }
        -:  344:		    else {
        6:  345:			dims[i] = 1;
        -:  346:		    }
        -:  347:		}
        -:  348:	    }
        -:  349:	}	    
        -:  350:	else {
        -:  351:	    /* Here is the general case.  */
       71:  352:	    MPIR_ChooseFactors( nfactors, factors, nnodes, dims_needed, 
        -:  353:				chosen );
       71:  354:	    j = 0;
      186:  355:	    for (i=0; i<ndims; i++) {
      115:  356:		if (dims[i] == 0) {
      115:  357:		    dims[i] = chosen[j++];
        -:  358:		}
        -:  359:	    }
        -:  360:	}
        -:  361:    }
      586:  362:    return MPI_SUCCESS;
        -:  363:}
        -:  364:
        -:  365:#endif
        -:  366:
        -:  367:#undef FUNCNAME
        -:  368:#define FUNCNAME MPI_Dims_create
        -:  369:#undef FCNAME
        -:  370:#define FCNAME "MPI_Dims_create"
        -:  371:
        -:  372:/*@
        -:  373:    MPI_Dims_create - Creates a division of processors in a cartesian grid
        -:  374:
        -:  375: Input Parameters:
        -:  376:+ nnodes - number of nodes in a grid (integer) 
        -:  377:- ndims - number of cartesian dimensions (integer) 
        -:  378:
        -:  379: In/Out Parameter:   
        -:  380:. dims - integer array of size  'ndims' specifying the number of nodes in each 
        -:  381: dimension.  A value of 0 indicates that 'MPI_Dims_create' should fill in a
        -:  382: suitable value.
        -:  383:
        -:  384:.N ThreadSafe
        -:  385:
        -:  386:.N Fortran
        -:  387:
        -:  388:.N Errors
        -:  389:.N MPI_SUCCESS
        -:  390:@*/
        -:  391:int MPI_Dims_create(int nnodes, int ndims, int *dims)
      606:  392:{
      606:  393:    int mpi_errno = MPI_SUCCESS;
        -:  394:    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);
        -:  395:
      606:  396:    MPIR_ERRTEST_INITIALIZED_ORDIE();
        -:  397:    
        -:  398:    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIMS_CREATE);
        -:  399:
      606:  400:    if (ndims == 0) goto fn_exit;
        -:  401:    
        -:  402:    /* Validate parameters and objects (post conversion) */
        -:  403:#   ifdef HAVE_ERROR_CHECKING
        -:  404:    {
        -:  405:        MPID_BEGIN_ERROR_CHECKS;
        -:  406:        {
      605:  407:	    MPIR_ERRTEST_ARGNEG(nnodes,"nnodes",mpi_errno);
      605:  408:	    MPIR_ERRTEST_ARGNEG(ndims,"ndims",mpi_errno);
      605:  409:	    MPIR_ERRTEST_ARGNULL(dims,"dims",mpi_errno);
      605:  410:            if (mpi_errno) goto fn_fail;
        -:  411:        }
        -:  412:        MPID_END_ERROR_CHECKS;
        -:  413:    }
        -:  414:#   endif /* HAVE_ERROR_CHECKING */
        -:  415:
        -:  416:    /* ... body of routine ...  */
      601:  417:    if (MPIR_Process.dimsCreate != NULL) {
    #####:  418:	mpi_errno = MPIR_Process.dimsCreate( nnodes, ndims, dims );
        -:  419:    }
        -:  420:    else {
      601:  421:	mpi_errno = MPIR_Dims_create( nnodes, ndims, dims );
        -:  422:    }
        -:  423:    /* ... end of body of routine ... */
        -:  424:
      606:  425:  fn_exit:
        -:  426:    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIMS_CREATE);
      606:  427:    return mpi_errno;
        -:  428:
        -:  429:    /* --BEGIN ERROR HANDLING-- */
        -:  430:#   ifdef HAVE_ERROR_CHECKING
        4:  431:  fn_fail:
        -:  432:    {
        4:  433:	mpi_errno = MPIR_Err_create_code(
        -:  434:	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
        -:  435:	    "**mpi_dims_create",
        -:  436:	    "**mpi_dims_create %d %d %p", nnodes, ndims, dims);
        -:  437:    }
        4:  438:    mpi_errno = MPIR_Err_return_comm( NULL, FCNAME, mpi_errno );
        4:  439:    goto fn_exit;
        -:  440:#   endif
        -:  441:    /* --END ERROR HANDLING-- */
        -:  442:}