-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpi/topo/dims_create.c
-: 0:Graph:dims_create.gcno
-: 0:Data:dims_create.gcda
-: 0:Runs:675
-: 0:Programs:179
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:/*
-: 3: * (C) 2001 by Argonne National Laboratory.
-: 4: * See COPYRIGHT in top-level directory.
-: 5: */
-: 6:
-: 7:#include "mpiimpl.h"
-: 8:#include "topo.h"
-: 9:
-: 10:/* -- Begin Profiling Symbol Block for routine MPI_Dims_create */
-: 11:#if defined(HAVE_PRAGMA_WEAK)
-: 12:#pragma weak MPI_Dims_create = PMPI_Dims_create
-: 13:#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
-: 14:#pragma _HP_SECONDARY_DEF PMPI_Dims_create MPI_Dims_create
-: 15:#elif defined(HAVE_PRAGMA_CRI_DUP)
-: 16:#pragma _CRI duplicate MPI_Dims_create as PMPI_Dims_create
-: 17:#endif
-: 18:/* -- End Profiling Symbol Block */
-: 19:
-: 20:/* Because we store factors with their multiplicities, a small array can
-: 21: store all of the factors for a large number (grows *faster* than n
-: 22: factorial). */
-: 23:#define MAX_FACTORS 10
-: 24:/* 2^20 is a millon */
-: 25:#define MAX_DIMS 20
-: 26:
-: 27:/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
-: 28: the MPI routines. You can use USE_WEAK_SYMBOLS to see if MPICH is
-: 29: using weak symbols to implement the MPI routines. */
-: 30:typedef struct Factors { int val, cnt; } Factors;
-: 31:/* This routine may be global if we are not using weak symbols */
-: 32:PMPI_LOCAL int MPIR_Factor( int, Factors [], int * );
-: 33:PMPI_LOCAL int MPIR_ChooseFactors( int, Factors [], int, int, int [] );
-: 34:
-: 35:#ifndef MPICH_MPI_FROM_PMPI
-: 36:#undef MPI_Dims_create
-: 37:#define MPI_Dims_create PMPI_Dims_create
-: 38:
-: 39:/* Return the factors of n and their multiplicity in factors; the number of
-: 40: distinct factors is the return value and the total number of factors,
-: 41: including multiplicities, is returned in ndivisors */
-: 42:#define NUM_PRIMES 168
-: 43: static int primes[NUM_PRIMES] =
-: 44: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
-: 45: 31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
-: 46: 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
-: 47: 127, 131, 137, 139, 149, 151, 157, 163, 167, 173,
-: 48: 179, 181, 191, 193, 197, 199, 211, 223, 227, 229,
-: 49: 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
-: 50: 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
-: 51: 353, 359, 367, 373, 379, 383, 389, 397, 401, 409,
-: 52: 419, 421, 431, 433, 439, 443, 449, 457, 461, 463,
-: 53: 467, 479, 487, 491, 499, 503, 509, 521, 523, 541,
-: 54: 547, 557, 563, 569, 571, 577, 587, 593, 599, 601,
-: 55: 607, 613, 617, 619, 631, 641, 643, 647, 653, 659,
-: 56: 661, 673, 677, 683, 691, 701, 709, 719, 727, 733,
-: 57: 739, 743, 751, 757, 761, 769, 773, 787, 797, 809,
-: 58: 811, 821, 823, 827, 829, 839, 853, 857, 859, 863,
-: 59: 877, 881, 883, 887, 907, 911, 919, 929, 937, 941,
-: 60: 947, 953, 967, 971, 977, 983, 991, 997};
-: 61:
-: 62:PMPI_LOCAL int MPIR_Factor( int n, Factors factors[], int *ndivisors )
586: 63:{
-: 64: int n_tmp, n_root;
586: 65: int i, nfactors=0, nall=0;
-: 66: int cnt;
-: 67:
-: 68: /* Start from an approximate of the square root of n, by first finding
-: 69: the power of 2 at least as large as n. The approximate root is then
-: 70: 2 to the 1/2 this power */
586: 71: n_tmp = n;
586: 72: n_root = 0;
3537: 73: while (n_tmp) {
2365: 74: n_root ++;
2365: 75: n_tmp >>= 1;
-: 76: }
586: 77: n_root = 1 << (n_root / 2);
-: 78:
-: 79: /* Find the prime number that less than that value and try dividing
-: 80: out the primes. */
1872: 81: for (i=0; i<NUM_PRIMES; i++) {
1872: 82: if (primes[i] > n_root) break;
-: 83: }
-: 84:
-: 85: /* For each prime, divide out as many as possible */
1872: 86: for (;i>=0;i--) {
1872: 87: cnt = 0;
4786: 88: while ( (n % primes[i]) == 0) {
1042: 89: cnt ++;
1042: 90: n = n / primes[i];
-: 91: }
1872: 92: if (cnt > 0) {
|
-: 93: /* --BEGIN ERROR HANDLING-- */
694: 94: if (nfactors + 1 == MAX_FACTORS) {
-: 95: /* Time to panic. This should not happen, since the
-: 96: smallest number that could exceed this would
-: 97: be the product of the first 10 primes that are
-: 98: greater than one, which is 6469693230 */
#####: 99: return nfactors;
-: 100: }
-: 101: /* --END ERROR HANDLING-- */
|
694: 102: factors[nfactors].val = primes[i];
694: 103: factors[nfactors++].cnt = cnt;
694: 104: nall += cnt;
-: 105: }
-: 106: }
-: 107: /* If nfactors == 0, n was a prime, so return that */
586: 108: if (nfactors == 0) {
106: 109: nfactors = 1;
106: 110: nall = 1;
106: 111: factors[0].val = n;
106: 112: factors[0].cnt = 1;
-: 113: }
480: 114: else if (n > 1) {
-: 115: /* We need one more factor (a single prime > n_root) */
60: 116: factors[nfactors].val = n;
60: 117: factors[nfactors++].cnt = 1;
60: 118: nall++;
-: 119: }
586: 120: *ndivisors = nall;
586: 121: return nfactors;
-: 122:}
-: 123:
-: 124:/*
-: 125: Given a collection of factors from the factors routine and a number of
-: 126: required values, combine the elements in factors into "needed" elements
-: 127: of the array chosen. These are non-increasing and so can be used directly
-: 128: in setting values in the dims array in MPIR_Dims_create.
-: 129:
-: 130: Algorithm (very simple)
-: 131:
-: 132: target_size = nnodes / ndims needed.
-: 133: Accumulate factors, starting from the bottom,
-: 134: until the target size is met or exceeded.
-: 135: Put all of the remaining factors into the last dimension
-: 136: (recompute target_size with each step, since we may
-: 137: miss the target by a wide margin.
-: 138:
-: 139: A much more sophisticated code would try to balance
-: 140: the number of nodes assigned to each dimension, possibly
-: 141: in concert with guidelines from the device about "good"
-: 142: sizes
-: 143:
-: 144: */
-: 145:/* FIXME: This routine does not work well with non-powers of two (and
-: 146: probably with collections of different factors. For example,
-: 147: factoring squares like 6*6 or 7*7 in to 2 dimensions doesn't yield the
-: 148: expected results (18,2) and (49,1) in current tests */
-: 149:
-: 150:/*
-: 151: To give more reasonable values of the chosen factors (in the chosen array)
-: 152: from the factors, we first distribute the factors among the dimensions
-: 153: equally.
-: 154:
-: 155: Note that since this is only used in the case where factors are the
-: 156: factors of nnnodes, we don't need to use the value of nnodes.
-: 157: */
-: 158:PMPI_LOCAL int MPIR_ChooseFactors( int nfactors, Factors factors[],
-: 159: int nnodes, int needed, int chosen[] )
558: 160:{
-: 161: int i, j;
-: 162:
-: 163: /* Initialize the chosen factors to all 1 */
2098: 164: for (i=0; i<needed; i++) {
1540: 165: chosen[i] = 1;
-: 166: }
-: 167: /* For each of the available factors, there are factors[].cnt
-: 168: copies of each of the unique factors. These are assigned in
-: 169: modified round robin fashion to each of the "chosen" entries.
-: 170: The modification is to combine with the smallest current term
-: 171: Note that the factors are in decreasing order. */
-: 172:
558: 173: i = 0; /* We don't reset the count so that the factors are
-: 174: distributed among the dimensions */
1390: 175: for (j=0; j<nfactors; j++) {
832: 176: int cnt = factors[j].cnt;
832: 177: int val = factors[j].val;
-: 178: /* For each of the factors that we add, try to keep the
-: 179: entries balanced. */
2776: 180: while (cnt--) {
-: 181: /* Find the smallest current entry */
-: 182: int ii, cMin, iMin;
1112: 183: iMin = 0;
1112: 184: cMin = chosen[0];
3150: 185: for (ii=1; ii<needed; ii++) {
2038: 186: if (chosen[ii] < cMin) {
501: 187: cMin = chosen[ii];
501: 188: iMin = ii;
-: 189: }
-: 190: }
1112: 191: if (chosen[i] > iMin) i = iMin;
-: 192:
1112: 193: chosen[i] *= val;
1112: 194: i++;
1112: 195: if (i >= needed) i = 0;
-: 196: }
-: 197: }
-: 198:
-: 199: /* Second, sort the chosen array in non-increasing order. Use
-: 200: a simple bubble sort because the number of elements is always small */
1540: 201: for (i=0; i<needed-1; i++) {
2821: 202: for (j=i+1; j<needed; j++) {
1839: 203: if (chosen[j] > chosen[i]) {
61: 204: int tmp = chosen[i];
61: 205: chosen[i] = chosen[j];
61: 206: chosen[j] = tmp;
-: 207: }
-: 208: }
-: 209: }
558: 210: return 0;
-: 211:}
-: 212:
-: 213:int MPIR_Dims_create( int nnodes, int ndims, int *dims )
601: 214:{
-: 215: Factors factors[MAX_FACTORS];
-: 216: int chosen[MAX_DIMS];
-: 217: int i, j, mpi_errno;
601: 218: int dims_needed, dims_product, nfactors, ndivisors=0;
-: 219:
-: 220: /* Find the number of unspecified dimensions in dims and the product
-: 221: of the positive values in dims */
601: 222: dims_needed = 0;
601: 223: dims_product = 1;
2282: 224: for (i=0; i<ndims; i++) {
1683: 225: if (dims[i] < 0) {
|
2: 226: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS,
-: 227: MPIR_ERR_RECOVERABLE,
-: 228: "MPIR_Dims_create", __LINE__,
-: 229: MPI_ERR_DIMS,
-: 230: "**argarrayneg",
-: 231: "**argarrayneg %s %d %d", "dims", i, dims[i]);
|
2: 232: return mpi_errno;
-: 233: }
1681: 234: if (dims[i] == 0) dims_needed ++;
84: 235: else dims_product *= dims[i];
-: 236: }
-: 237:
-: 238: /* Can we factor nnodes by dims_product? */
599: 239: if ((nnodes / dims_product ) * dims_product != nnodes ) {
|
3: 240: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
-: 241: "MPIR_Dims_create", __LINE__,
-: 242: MPI_ERR_DIMS, "**dimspartition", 0);
|
3: 243: return mpi_errno;
-: 244: }
-: 245:
596: 246: if (!dims_needed) {
-: 247: /* Special case - all dimensions provided */
10: 248: return MPI_SUCCESS;
-: 249: }
-: 250:
586: 251: if (dims_needed > MAX_DIMS) {
|
-: 252: /* --BEGIN ERROR HANDLING-- */
#####: 253: mpi_errno = MPIR_Err_create_code( MPI_SUCCESS,
-: 254: MPIR_ERR_RECOVERABLE,
-: 255: "MPIR_Dims_create", __LINE__, MPI_ERR_DIMS,
-: 256: "**dimsmany", "**dimsmany %d %d", dims_needed, MAX_DIMS );
#####: 257: return mpi_errno;
-: 258: /* --END ERROR HANDLING-- */
-: 259: }
-: 260:
|
586: 261: nnodes /= dims_product;
-: 262:
-: 263: /* Now, factor nnodes into dims_needed components. We'd like these
-: 264: to match the underlying machine topology as much as possible. In the
-: 265: absence of information about the machine topology, we can try to
-: 266: make the factors a close to each other as possible.
-: 267:
-: 268: The MPICH 1 version used donated code that was quite sophisticated
-: 269: and complex. However, since it didn't take the system topology
-: 270: into account, it was more sophisticated that was perhaps warranted.
-: 271: In addition, useful values of nnodes for most MPI programs will be
-: 272: of the order 10-10000, and powers of two will be common.
-: 273: */
-: 274:
-: 275: /* Get the factors */
586: 276: nfactors = MPIR_Factor( nnodes, factors, &ndivisors );
-: 277:
-: 278: /* Divide into 3 major cases:
-: 279: 1. Fewer divisors than needed dimensions. Just use all of the
-: 280: factors up, setting the remaining dimensions to 1
-: 281: 2. Only one distinct factor (typically 2) but with greater
-: 282: multiplicity. Give each dimension a nearly equal size
-: 283: 3. Other. There are enough factors to divide among the dimensions.
-: 284: This is done in an ad hoc fashion
-: 285: */
-: 286:
-: 287:/* DEBUG
-: 288: printf( "factors are (%d of them) with %d divisors\n", nfactors, ndivisors );
-: 289: for (j=0; j<nfactors; j++) {
-: 290: printf( "val = %d repeated %d\n", factors[j].val, factors[j].cnt );
-: 291: }
-: 292:*/
-: 293: /* The MPI spec requires that the values that are set be in nonincreasing
-: 294: order (MPI-1, section 6.5). */
-: 295:
-: 296: /* Distribute the factors among the dimensions */
586: 297: if (ndivisors <= dims_needed) {
-: 298: /* Just use the factors as needed. */
487: 299: MPIR_ChooseFactors( nfactors, factors, nnodes, dims_needed, chosen );
487: 300: j = 0;
1958: 301: for (i=0; i<ndims; i++) {
1471: 302: if (dims[i] == 0) {
1425: 303: dims[i] = chosen[j++];
-: 304: }
-: 305: }
-: 306:#if 0
-: 307: /* Any remaining unset dims are set to one */
-: 308: for (i++;i<ndims; i++) {
-: 309: if (dims[i] == 0)
-: 310: dims[i] = 1;
-: 311: }
-: 312:#endif
-: 313: }
-: 314: else {
-: 315: /* We must combine some of the factors */
-: 316: /* This is what the fancy code is for in the MPICH-1 code.
-: 317: If the number of distinct factors is 1 (e.g., a power of 2),
-: 318: then this code can be much simpler */
-: 319: /* NOT DONE */
-: 320: /* FIXME */
99: 321: if (nfactors == 1) {
-: 322: /* Special case for k**n, such as powers of 2 */
28: 323: int factor = factors[0].val;
28: 324: int cnt = factors[0].cnt; /* Numver of factors left */
28: 325: int cnteach = ( cnt + dims_needed - 1 ) / dims_needed;
-: 326: int factor_each;
-: 327:
28: 328: factor_each = factor;
28: 329: for (i=1; i<cnteach; i++) factor_each *= factor;
-: 330:
80: 331: for (i=0; i<ndims; i++) {
52: 332: if (dims[i] == 0) {
52: 333: if (cnt > cnteach) {
18: 334: dims[i] = factor_each;
18: 335: cnt -= cnteach;
-: 336: }
34: 337: else if (cnt > 0) {
28: 338: factor_each = factor;
59: 339: for (j=1; j<cnt; j++)
31: 340: factor_each *= factor;
28: 341: dims[i] = factor_each;
28: 342: cnt = 0;
-: 343: }
-: 344: else {
6: 345: dims[i] = 1;
-: 346: }
-: 347: }
-: 348: }
-: 349: }
-: 350: else {
-: 351: /* Here is the general case. */
71: 352: MPIR_ChooseFactors( nfactors, factors, nnodes, dims_needed,
-: 353: chosen );
71: 354: j = 0;
186: 355: for (i=0; i<ndims; i++) {
115: 356: if (dims[i] == 0) {
115: 357: dims[i] = chosen[j++];
-: 358: }
-: 359: }
-: 360: }
-: 361: }
586: 362: return MPI_SUCCESS;
-: 363:}
-: 364:
-: 365:#endif
-: 366:
-: 367:#undef FUNCNAME
-: 368:#define FUNCNAME MPI_Dims_create
-: 369:#undef FCNAME
-: 370:#define FCNAME "MPI_Dims_create"
-: 371:
-: 372:/*@
-: 373: MPI_Dims_create - Creates a division of processors in a cartesian grid
-: 374:
-: 375: Input Parameters:
-: 376:+ nnodes - number of nodes in a grid (integer)
-: 377:- ndims - number of cartesian dimensions (integer)
-: 378:
-: 379: In/Out Parameter:
-: 380:. dims - integer array of size 'ndims' specifying the number of nodes in each
-: 381: dimension. A value of 0 indicates that 'MPI_Dims_create' should fill in a
-: 382: suitable value.
-: 383:
-: 384:.N ThreadSafe
-: 385:
-: 386:.N Fortran
-: 387:
-: 388:.N Errors
-: 389:.N MPI_SUCCESS
-: 390:@*/
-: 391:int MPI_Dims_create(int nnodes, int ndims, int *dims)
606: 392:{
606: 393: int mpi_errno = MPI_SUCCESS;
-: 394: MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);
-: 395:
606: 396: MPIR_ERRTEST_INITIALIZED_ORDIE();
-: 397:
-: 398: MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIMS_CREATE);
-: 399:
606: 400: if (ndims == 0) goto fn_exit;
-: 401:
-: 402: /* Validate parameters and objects (post conversion) */
|
-: 403:# ifdef HAVE_ERROR_CHECKING
-: 404: {
-: 405: MPID_BEGIN_ERROR_CHECKS;
-: 406: {
605: 407: MPIR_ERRTEST_ARGNEG(nnodes,"nnodes",mpi_errno);
605: 408: MPIR_ERRTEST_ARGNEG(ndims,"ndims",mpi_errno);
605: 409: MPIR_ERRTEST_ARGNULL(dims,"dims",mpi_errno);
605: 410: if (mpi_errno) goto fn_fail;
-: 411: }
-: 412: MPID_END_ERROR_CHECKS;
-: 413: }
-: 414:# endif /* HAVE_ERROR_CHECKING */
-: 415:
-: 416: /* ... body of routine ... */
|
601: 417: if (MPIR_Process.dimsCreate != NULL) {
|
#####: 418: mpi_errno = MPIR_Process.dimsCreate( nnodes, ndims, dims );
-: 419: }
-: 420: else {
|
601: 421: mpi_errno = MPIR_Dims_create( nnodes, ndims, dims );
-: 422: }
-: 423: /* ... end of body of routine ... */
-: 424:
606: 425: fn_exit:
|
-: 426: MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIMS_CREATE);
|
606: 427: return mpi_errno;
-: 428:
|
-: 429: /* --BEGIN ERROR HANDLING-- */
-: 430:# ifdef HAVE_ERROR_CHECKING
4: 431: fn_fail:
-: 432: {
4: 433: mpi_errno = MPIR_Err_create_code(
-: 434: mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
-: 435: "**mpi_dims_create",
-: 436: "**mpi_dims_create %d %d %p", nnodes, ndims, dims);
-: 437: }
4: 438: mpi_errno = MPIR_Err_return_comm( NULL, FCNAME, mpi_errno );
4: 439: goto fn_exit;
-: 440:# endif
-: 441: /* --END ERROR HANDLING-- */
-: 442:}
|