-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop.c
-: 0:Graph:dataloop.gcno
-: 0:Data:dataloop.gcda
-: 0:Runs:4381
-: 0:Programs:1376
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:
-: 3:/*
-: 4: * (C) 2001 by Argonne National Laboratory.
-: 5: * See COPYRIGHT in top-level directory.
-: 6: */
-: 7:
-: 8:#include <string.h>
-: 9:#include <stdio.h>
-: 10:#include <stdlib.h>
-: 11:
-: 12:#include "./dataloop.h"
-: 13:
-: 14:#undef DEBUG_DLOOP_SIZE
-: 15:#undef DLOOP_DEBUG_MEMORY
-: 16:
-: 17:/* Dataloops
-: 18: *
-: 19: * The functions here are used for the creation, copying, update, and display
-: 20: * of DLOOP_Dataloop structures and trees of these structures.
-: 21: *
-: 22: * Currently we store trees of dataloops in contiguous regions of memory. They
-: 23: * are stored in such a way that subtrees are also stored contiguously. This
-: 24: * makes it somewhat easier to copy these subtrees around. Keep this in mind
-: 25: * when looking at the functions below.
-: 26: *
-: 27: * The structures used in this file are defined in mpid_datatype.h. There is
-: 28: * no separate mpid_dataloop.h at this time.
-: 29: *
-: 30: * OPTIMIZATIONS:
-: 31: *
-: 32: * There are spots in the code with OPT tags that indicate where we could
-: 33: * optimize particular calculations or avoid certain checks.
-: 34: *
-: 35: * NOTES:
-: 36: *
-: 37: * Don't have locks in place at this time!
-: 38: */
-: 39:
-: 40:/* Some functions in this file are responsible for allocation of space for
-: 41: * dataloops. These structures include the dataloop structure itself
-: 42: * followed by a sequence of variable-sized arrays, depending on the loop
-: 43: * kind. For example, a dataloop of kind DLOOP_KIND_INDEXED has a
-: 44: * dataloop structure followed by an array of block sizes and then an array
-: 45: * of offsets.
-: 46: *
-: 47: * For efficiency and ease of cleanup (preserving a single free at
-: 48: * deallocation), we want to allocate this memory as a single large chunk.
-: 49: * However, we must perform some alignment of the components of this chunk
-: 50: * in order to obtain correct and efficient operation across all platforms.
-: 51: */
-: 52:
-: 53:
-: 54:/*@
-: 55: Dataloop_free - deallocate the resources used to store a dataloop
-: 56:
-: 57: Input Parameters:
-: 58:. dataloop - pointer to dataloop structure
-: 59:@*/
-: 60:void PREPEND_PREFIX(Dataloop_free)(DLOOP_Dataloop **dataloop)
281098: 61:{
-: 62:
281098: 63: if (*dataloop == NULL) return;
-: 64:
-: 65:#ifdef DLOOP_DEBUG_MEMORY
-: 66: DLOOP_dbg_printf("DLOOP_Dataloop_free: freeing loop @ %x.\n",
-: 67: (int) *dataloop);
-: 68:#endif
-: 69:
281098: 70: memset(*dataloop, 0, sizeof(DLOOP_Dataloop_common));
281098: 71: DLOOP_Free(*dataloop);
281098: 72: *dataloop = NULL;
281098: 73: return;
-: 74:}
-: 75:/*@
-: 76: Dataloop_copy - Copy an arbitrary dataloop structure, updating
-: 77: pointers as necessary
-: 78:
-: 79: Input Parameters:
-: 80:+ dest - pointer to destination region
-: 81:. src - pointer to original dataloop structure
-: 82:- size - size of dataloop structure
-: 83:
-: 84: This routine parses the dataloop structure as it goes in order to
-: 85: determine what exactly it needs to update.
-: 86:
-: 87: Notes:
-: 88: It assumes that the source dataloop was allocated in our usual way;
-: 89: this means that the entire dataloop is in a contiguous region and that
-: 90: the root of the tree is first in the array.
-: 91:
-: 92: This has some implications:
-: 93:+ we can use a contiguous copy mechanism to copy the majority of the
-: 94: structure
-: 95:- all pointers in the region are relative to the start of the data region
-: 96: the first dataloop in the array is the root of the tree
-: 97:@*/
-: 98:void PREPEND_PREFIX(Dataloop_copy)(void *dest,
-: 99: void *src,
-: 100: int size)
135934: 101:{
-: 102: DLOOP_Offset ptrdiff;
-: 103:
-: 104:#ifdef DLOOP_DEBUG_MEMORY
-: 105: DLOOP_dbg_printf("DLOOP_Dataloop_copy: copying from %x to %x (%d bytes).\n",
-: 106: (int) src, (int) dest, size);
-: 107:#endif
-: 108:
-: 109: /* copy region first */
135934: 110: DLOOP_Memcpy(dest, src, size);
-: 111:
-: 112: /* Calculate difference in starting locations. DLOOP_Dataloop_update()
-: 113: * then traverses the new structure and updates internal pointers by
-: 114: * adding this difference to them. This way we can just copy the
-: 115: * structure, including pointers, in one big block.
-: 116: */
135934: 117: ptrdiff = (DLOOP_Offset) ((char *) dest - (char *) src);
-: 118:
-: 119: /* traverse structure updating pointers */
135934: 120: PREPEND_PREFIX(Dataloop_update)(dest, ptrdiff);
-: 121:
-: 122: return;
-: 123:}
-: 124:
-: 125:
-: 126:/*@
-: 127: Dataloop_update - update pointers after a copy operation
-: 128:
-: 129: Input Parameters:
-: 130:+ dataloop - pointer to loop to update
-: 131:- ptrdiff - value indicating offset between old and new pointer values
-: 132:
-: 133: This function is used to recursively update all the pointers in a
-: 134: dataloop tree.
-: 135:@*/
-: 136:void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop,
-: 137: DLOOP_Offset ptrdiff)
253100: 138:{
-: 139: /* OPT: only declare these variables down in the Struct case */
-: 140: int i;
-: 141: DLOOP_Dataloop **looparray;
-: 142:
253100: 143: switch(dataloop->kind & DLOOP_KIND_MASK) {
-: 144: case DLOOP_KIND_CONTIG:
-: 145: case DLOOP_KIND_VECTOR:
-: 146: /*
-: 147: * All these really ugly assignments are really of the form:
-: 148: *
-: 149: * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff;
-: 150: *
-: 151: * However, some compilers spit out warnings about casting on the
-: 152: * LHS, so we get this much nastier form instead (using common
-: 153: * struct for contig and vector):
-: 154: */
-: 155:
242125: 156: if (dataloop->loop_params.cm_t.dataloop)
-: 157: {
-: 158: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
-: 159:
2123: 160: dataloop->loop_params.cm_t.dataloop =
-: 161: (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
-: 162: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
-: 163: }
-: 164:
242125: 165: if (!(dataloop->kind & DLOOP_FINAL_MASK))
2123: 166: PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff);
-: 167: break;
-: 168:
-: 169: case DLOOP_KIND_BLOCKINDEXED:
656: 170: if (dataloop->loop_params.bi_t.offset_array)
-: 171: {
-: 172: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
656: 173: dataloop->loop_params.bi_t.offset_array =
-: 174: (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
-: 175: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
-: 176: }
-: 177:
656: 178: if (dataloop->loop_params.bi_t.dataloop)
-: 179: {
-: 180: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
156: 181: dataloop->loop_params.bi_t.dataloop =
-: 182: (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
-: 183: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
-: 184: }
-: 185:
656: 186: if (!(dataloop->kind & DLOOP_FINAL_MASK))
156: 187: PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff);
-: 188: break;
-: 189:
-: 190: case DLOOP_KIND_INDEXED:
622: 191: if (dataloop->loop_params.i_t.blocksize_array)
-: 192: {
-: 193: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
622: 194: dataloop->loop_params.i_t.blocksize_array =
-: 195: (DLOOP_Count *) MPI_AINT_CAST_TO_VOID_PTR
-: 196: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
-: 197: }
-: 198:
622: 199: if (dataloop->loop_params.i_t.offset_array)
-: 200: {
-: 201: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
622: 202: dataloop->loop_params.i_t.offset_array =
-: 203: (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
-: 204: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
-: 205: }
-: 206:
622: 207: if (dataloop->loop_params.i_t.dataloop)
-: 208: {
-: 209: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
|
#####: 210: dataloop->loop_params.i_t.dataloop =
-: 211: (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
-: 212: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
-: 213: }
-: 214:
|
622: 215: if (!(dataloop->kind & DLOOP_FINAL_MASK))
|
#####: 216: PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff);
-: 217: break;
-: 218:
-: 219: case DLOOP_KIND_STRUCT:
|
9697: 220: if (dataloop->loop_params.s_t.blocksize_array)
-: 221: {
-: 222: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
9697: 223: dataloop->loop_params.s_t.blocksize_array =
-: 224: (DLOOP_Count *) MPI_AINT_CAST_TO_VOID_PTR
-: 225: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
-: 226: }
-: 227:
9697: 228: if (dataloop->loop_params.s_t.offset_array)
-: 229: {
-: 230: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
9697: 231: dataloop->loop_params.s_t.offset_array =
-: 232: (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
-: 233: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
-: 234: }
-: 235:
9697: 236: if (dataloop->loop_params.s_t.dataloop_array)
-: 237: {
-: 238: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
9697: 239: dataloop->loop_params.s_t.dataloop_array =
-: 240: (DLOOP_Dataloop **) MPI_AINT_CAST_TO_VOID_PTR
-: 241: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
-: 242: }
-: 243:
-: 244: /* fix the N dataloop pointers too */
9697: 245: looparray = dataloop->loop_params.s_t.dataloop_array;
122031: 246: for (i=0; i < dataloop->loop_params.s_t.count; i++) {
112334: 247: if (looparray[i])
-: 248: {
-: 249: MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) looparray[i] + ptrdiff);
112334: 250: looparray[i] = (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
-: 251: (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) looparray[i] + ptrdiff);
-: 252: }
-: 253: }
-: 254:
9697: 255: if (dataloop->kind & DLOOP_FINAL_MASK) break;
-: 256:
122031: 257: for (i=0; i < dataloop->loop_params.s_t.count; i++) {
112334: 258: PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff);
-: 259: }
-: 260: break;
-: 261: default:
|
-: 262: /* --BEGIN ERROR HANDLING-- */
#####: 263: DLOOP_Assert(0);
-: 264: break;
-: 265: /* --END ERROR HANDLING-- */
-: 266: }
-: 267: return;
-: 268:}
-: 269:
-: 270:/*@
-: 271: Dataloop_alloc - allocate the resources used to store a dataloop with
-: 272: no old loops associated with it.
-: 273:
-: 274: Input Parameters:
-: 275:+ kind - kind of dataloop to allocate
-: 276:. count - number of elements in dataloop (kind dependent)
-: 277:. new_loop_p - address at which to store new dataloop pointer
-: 278:- new_loop_sz_p - pointer to integer in which to store new loop size
-: 279:
-: 280: Notes:
-: 281: The count parameter passed into this function will often be different
-: 282: from the count passed in at the MPI layer due to optimizations.
-: 283:@*/
-: 284:void PREPEND_PREFIX(Dataloop_alloc)(int kind,
-: 285: DLOOP_Count count,
-: 286: DLOOP_Dataloop **new_loop_p,
-: 287: int *new_loop_sz_p)
|
245436: 288:{
245436: 289: PREPEND_PREFIX(Dataloop_alloc_and_copy)(kind,
-: 290: count,
-: 291: NULL,
-: 292: 0,
-: 293: new_loop_p,
-: 294: new_loop_sz_p);
-: 295: return;
-: 296:}
-: 297:
-: 298:/*@
-: 299: Dataloop_alloc_and_copy - allocate the resources used to store a
-: 300: dataloop and copy in old dataloop as
-: 301: appropriate
-: 302:
-: 303: Input Parameters:
-: 304:+ kind - kind of dataloop to allocate
-: 305:. count - number of elements in dataloop (kind dependent)
-: 306:. old_loop - pointer to old dataloop (or NULL for none)
-: 307:. old_loop_sz - size of old dataloop (should be zero if old_loop is NULL)
-: 308:. new_loop_p - address at which to store new dataloop pointer
-: 309:- new_loop_sz_p - pointer to integer in which to store new loop size
-: 310:
-: 311: Notes:
-: 312: The count parameter passed into this function will often be different
-: 313: from the count passed in at the MPI layer.
-: 314:@*/
-: 315:void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
-: 316: DLOOP_Count count,
-: 317: DLOOP_Dataloop *old_loop,
-: 318: int old_loop_sz,
-: 319: DLOOP_Dataloop **new_loop_p,
-: 320: int *new_loop_sz_p)
254950: 321:{
254950: 322: int new_loop_sz = 0;
254950: 323: int align_sz = 8; /* default aligns everything to 8-byte boundaries */
-: 324: int epsilon;
254950: 325: int loop_sz = sizeof(DLOOP_Dataloop);
254950: 326: int off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;
-: 327:
-: 328: char *pos;
-: 329: DLOOP_Dataloop *new_loop;
-: 330:
-: 331:#ifdef HAVE_MAX_STRUCT_ALIGNMENT
254950: 332: if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
254950: 333: align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
-: 334: }
-: 335:#endif
-: 336:
254950: 337: if (old_loop != NULL) {
9514: 338: DLOOP_Assert((old_loop_sz % align_sz) == 0);
-: 339: }
-: 340:
-: 341: /* calculate the space that we actually need for everything */
254950: 342: switch (kind) {
-: 343: case DLOOP_KIND_STRUCT:
-: 344: /* need space for dataloop pointers and extents */
|
#####: 345: ptr_sz = count * sizeof(DLOOP_Dataloop *);
#####: 346: extent_sz = count * sizeof(DLOOP_Offset);
-: 347: case DLOOP_KIND_INDEXED:
-: 348: /* need space for block sizes */
|
18859: 349: blk_sz = count * sizeof(DLOOP_Count);
-: 350: case DLOOP_KIND_BLOCKINDEXED:
-: 351: /* need space for block offsets */
29268: 352: off_sz = count * sizeof(DLOOP_Offset);
-: 353: case DLOOP_KIND_CONTIG:
-: 354: case DLOOP_KIND_VECTOR:
-: 355: break;
-: 356: default:
|
#####: 357: DLOOP_Assert(0);
-: 358: }
-: 359:
-: 360: /* pad everything that we're going to allocate */
|
254950: 361: epsilon = loop_sz % align_sz;
254950: 362: if (epsilon) loop_sz += align_sz - epsilon;
-: 363:
254950: 364: epsilon = off_sz % align_sz;
254950: 365: if (epsilon) off_sz += align_sz - epsilon;
-: 366:
254950: 367: epsilon = blk_sz % align_sz;
254950: 368: if (epsilon) blk_sz += align_sz - epsilon;
-: 369:
254950: 370: epsilon = ptr_sz % align_sz;
254950: 371: if (epsilon) ptr_sz += align_sz - epsilon;
-: 372:
254950: 373: epsilon = extent_sz % align_sz;
254950: 374: if (epsilon) extent_sz += align_sz - epsilon;
-: 375:
254950: 376: new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
-: 377: extent_sz + old_loop_sz;
-: 378:
-: 379: /* allocate space */
254950: 380: new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
254950: 381: if (new_loop == NULL) {
|
#####: 382: *new_loop_p = NULL;
#####: 383: return;
-: 384: }
-: 385:
-: 386:#ifdef DLOOP_DEBUG_MEMORY
-: 387: DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, old = %d)\n",
-: 388: (int) new_loop,
-: 389: new_loop_sz,
-: 390: loop_sz,
-: 391: off_sz,
-: 392: blk_sz,
-: 393: ptr_sz,
-: 394: extent_sz,
-: 395: old_loop_sz);
-: 396:#endif
-: 397:
-: 398: /* set all the pointers in the new dataloop structure */
|
254950: 399: switch (kind) {
-: 400: case DLOOP_KIND_STRUCT:
-: 401: /* order is:
-: 402: * - pointers
-: 403: * - blocks
-: 404: * - offsets
-: 405: * - extents
-: 406: */
|
#####: 407: new_loop->loop_params.s_t.dataloop_array =
-: 408: (DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
#####: 409: new_loop->loop_params.s_t.blocksize_array =
-: 410: (DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
#####: 411: new_loop->loop_params.s_t.offset_array =
-: 412: (DLOOP_Offset *) (((char *) new_loop) + loop_sz +
-: 413: ptr_sz + blk_sz);
#####: 414: new_loop->loop_params.s_t.el_extent_array =
-: 415: (DLOOP_Offset *) (((char *) new_loop) + loop_sz +
-: 416: ptr_sz + blk_sz + off_sz);
#####: 417: break;
-: 418: case DLOOP_KIND_INDEXED:
-: 419: /* order is:
-: 420: * - blocks
-: 421: * - offsets
-: 422: */
|
18859: 423: new_loop->loop_params.i_t.blocksize_array =
-: 424: (DLOOP_Count *) (((char *) new_loop) + loop_sz);
18859: 425: new_loop->loop_params.i_t.offset_array =
-: 426: (DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
18859: 427: if (old_loop == NULL) {
18855: 428: new_loop->loop_params.i_t.dataloop = NULL;
-: 429: }
-: 430: else {
4: 431: new_loop->loop_params.i_t.dataloop =
-: 432: (DLOOP_Dataloop *) (((char *) new_loop) +
-: 433: (new_loop_sz - old_loop_sz));
-: 434: }
-: 435: break;
-: 436: case DLOOP_KIND_BLOCKINDEXED:
10409: 437: new_loop->loop_params.bi_t.offset_array =
-: 438: (DLOOP_Offset *) (((char *) new_loop) + loop_sz);
10409: 439: if (old_loop == NULL) {
9351: 440: new_loop->loop_params.bi_t.dataloop = NULL;
-: 441: }
-: 442: else {
1058: 443: new_loop->loop_params.bi_t.dataloop =
-: 444: (DLOOP_Dataloop *) (((char *) new_loop) +
-: 445: (new_loop_sz - old_loop_sz));
-: 446: }
-: 447: break;
-: 448: case DLOOP_KIND_CONTIG:
185660: 449: if (old_loop == NULL) {
178852: 450: new_loop->loop_params.c_t.dataloop = NULL;
-: 451: }
-: 452: else {
6808: 453: new_loop->loop_params.c_t.dataloop =
-: 454: (DLOOP_Dataloop *) (((char *) new_loop) +
-: 455: (new_loop_sz - old_loop_sz));
-: 456: }
-: 457: break;
-: 458: case DLOOP_KIND_VECTOR:
40022: 459: if (old_loop == NULL) {
38378: 460: new_loop->loop_params.v_t.dataloop = NULL;
-: 461: }
-: 462: else {
1644: 463: new_loop->loop_params.v_t.dataloop =
-: 464: (DLOOP_Dataloop *) (((char *) new_loop) +
-: 465: (new_loop_sz - old_loop_sz));
-: 466: }
-: 467: break;
-: 468: default:
|
#####: 469: DLOOP_Assert(0);
-: 470: }
-: 471:
|
254950: 472: pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
254950: 473: if (old_loop != NULL) {
9514: 474: PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
-: 475: }
-: 476:
254950: 477: *new_loop_p = new_loop;
254950: 478: *new_loop_sz_p = new_loop_sz;
254950: 479: return;
-: 480:}
-: 481:
-: 482:/*@
-: 483: Dataloop_struct_alloc - allocate the resources used to store a dataloop and
-: 484: copy in old dataloop as appropriate. this version
-: 485: is specifically for use when a struct dataloop is
-: 486: being created; the space to hold old dataloops in
-: 487: this case must be described back to the
-: 488: implementation in order for efficient copying.
-: 489:
-: 490: Input Parameters:
-: 491:+ count - number of elements in dataloop (kind dependent)
-: 492:. old_loop_sz - size of old dataloop (should be zero if old_loop is NULL)
-: 493:. basic_ct - number of basic types for which new dataloops are needed
-: 494:. old_loop_p - address at which to store pointer to old loops
-: 495:. new_loop_p - address at which to store new struct dataloop pointer
-: 496:- new_loop_sz_p - address at which to store new loop size
-: 497:
-: 498: Notes:
-: 499: The count parameter passed into this function will often be different
-: 500: from the count passed in at the MPI layer due to optimizations.
-: 501:
-: 502: The caller is responsible for filling in the region pointed to by
-: 503: old_loop_p (count elements).
-: 504:@*/
-: 505:void PREPEND_PREFIX(Dataloop_struct_alloc)(DLOOP_Count count,
-: 506: int old_loop_sz,
-: 507: int basic_ct,
-: 508: DLOOP_Dataloop **old_loop_p,
-: 509: DLOOP_Dataloop **new_loop_p,
-: 510: int *new_loop_sz_p)
32173: 511:{
32173: 512: int new_loop_sz = 0;
32173: 513: int align_sz = 8; /* default aligns everything to 8-byte boundaries */
-: 514: int epsilon;
32173: 515: int loop_sz = sizeof(DLOOP_Dataloop);
-: 516: int off_sz, blk_sz, ptr_sz, extent_sz, basic_sz;
-: 517:
-: 518: DLOOP_Dataloop *new_loop;
-: 519:
-: 520:#ifdef HAVE_MAX_STRUCT_ALIGNMENT
32173: 521: if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
32173: 522: align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
-: 523: }
-: 524:#endif
-: 525:
-: 526: /* calculate the space that we actually need for everything */
32173: 527: ptr_sz = count * sizeof(DLOOP_Dataloop *);
32173: 528: extent_sz = count * sizeof(DLOOP_Offset);
32173: 529: blk_sz = count * sizeof(DLOOP_Count);
32173: 530: off_sz = count * sizeof(DLOOP_Offset);
32173: 531: basic_sz = sizeof(DLOOP_Dataloop);
-: 532:
-: 533: /* pad everything that we're going to allocate */
32173: 534: epsilon = loop_sz % align_sz;
32173: 535: if (epsilon) loop_sz += align_sz - epsilon;
-: 536:
32173: 537: epsilon = off_sz % align_sz;
32173: 538: if (epsilon) off_sz += align_sz - epsilon;
-: 539:
32173: 540: epsilon = blk_sz % align_sz;
32173: 541: if (epsilon) blk_sz += align_sz - epsilon;
-: 542:
32173: 543: epsilon = ptr_sz % align_sz;
32173: 544: if (epsilon) ptr_sz += align_sz - epsilon;
-: 545:
32173: 546: epsilon = extent_sz % align_sz;
32173: 547: if (epsilon) extent_sz += align_sz - epsilon;
-: 548:
32173: 549: epsilon = basic_sz % align_sz;
32173: 550: if (epsilon) basic_sz += align_sz - epsilon;
-: 551:
-: 552: /* note: we pad *each* basic type dataloop, because the
-: 553: * code used to create them assumes that we're going to
-: 554: * do that.
-: 555: */
-: 556:
32173: 557: new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
-: 558: extent_sz + (basic_ct * basic_sz) + old_loop_sz;
-: 559:
-: 560: /* allocate space */
32173: 561: new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
32173: 562: if (new_loop == NULL) {
|
#####: 563: *new_loop_p = NULL;
#####: 564: return;
-: 565: }
-: 566:
-: 567:#ifdef DLOOP_DEBUG_MEMORY
-: 568: DLOOP_dbg_printf("DLOOP_Dataloop_struct_alloc: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, basics = %d, old = %d)\n",
-: 569: (int) new_loop,
-: 570: new_loop_sz,
-: 571: loop_sz,
-: 572: off_sz,
-: 573: blk_sz,
-: 574: ptr_sz,
-: 575: extent_sz,
-: 576: basic_sz,
-: 577: old_loop_sz);
-: 578:#endif
-: 579:
-: 580: /* set all the pointers in the new dataloop structure */
|
32173: 581: new_loop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **)
-: 582: (((char *) new_loop) + loop_sz);
32173: 583: new_loop->loop_params.s_t.blocksize_array = (DLOOP_Count *)
-: 584: (((char *) new_loop) + loop_sz + ptr_sz);
32173: 585: new_loop->loop_params.s_t.offset_array = (DLOOP_Offset *)
-: 586: (((char *) new_loop) + loop_sz + ptr_sz + blk_sz);
32173: 587: new_loop->loop_params.s_t.el_extent_array = (DLOOP_Offset *)
-: 588: (((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz);
-: 589:
32173: 590: *old_loop_p = (DLOOP_Dataloop *)
-: 591: (((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz + extent_sz);
32173: 592: *new_loop_p = new_loop;
32173: 593: *new_loop_sz_p = new_loop_sz;
-: 594:
32173: 595: return;
-: 596:}
-: 597:
-: 598:/*@
-: 599: Dataloop_dup - make a copy of a dataloop
-: 600:
-: 601: Returns 0 on success, -1 on failure.
-: 602:@*/
-: 603:void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
-: 604: int old_loop_sz,
-: 605: DLOOP_Dataloop **new_loop_p)
768: 606:{
-: 607: DLOOP_Dataloop *new_loop;
-: 608:
768: 609: DLOOP_Assert(old_loop != NULL);
768: 610: DLOOP_Assert(old_loop_sz > 0);
-: 611:
768: 612: new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
768: 613: if (new_loop == NULL) {
|
#####: 614: *new_loop_p = NULL;
#####: 615: return;
-: 616: }
-: 617:
|
768: 618: PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
768: 619: *new_loop_p = new_loop;
768: 620: return;
-: 621:}
-: 622:
-: 623:/*@
-: 624: Dataloop_stream_size - return the size of the data described by the dataloop
-: 625:
-: 626: Input Parameters:
-: 627:+ dl_p - pointer to dataloop for which we will return the size
-: 628:- sizefn - function for determining size of types in the corresponding stream
-: 629: (passing NULL will instead result in el_size values being used)
-: 630:
-: 631:@*/
-: 632:DLOOP_Offset
-: 633:PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p,
-: 634: DLOOP_Offset (*sizefn)(DLOOP_Type el_type))
|
#####: 635:{
#####: 636: DLOOP_Offset tmp_sz, tmp_ct = 1;
-: 637:
-: 638: for (;;)
-: 639: {
#####: 640: if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
-: 641: {
-: 642: int i;
-: 643:
#####: 644: tmp_sz = 0;
#####: 645: for (i = 0; i < dl_p->loop_params.s_t.count; i++)
-: 646: {
#####: 647: tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) *
-: 648: PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn);
-: 649: }
#####: 650: return tmp_sz * tmp_ct;
-: 651: }
-: 652:
#####: 653: switch (dl_p->kind & DLOOP_KIND_MASK) {
-: 654: case DLOOP_KIND_CONTIG:
#####: 655: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count);
-: 656:#ifdef DLOOP_DEBUG_SIZE
-: 657: DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
-: 658: (int) dl_p->loop_params.c_t.count, (MPI_Aint) tmp_ct);
-: 659:#endif
#####: 660: break;
-: 661: case DLOOP_KIND_VECTOR:
#####: 662: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) *
-: 663: (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize);
-: 664:#ifdef DLOOP_DEBUG_SIZE
-: 665: DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
-: 666: (int) dl_p->loop_params.v_t.count,
-: 667: (int) dl_p->loop_params.v_t.blocksize,
-: 668: (MPI_Aint) tmp_ct);
-: 669:#endif
#####: 670: break;
-: 671: case DLOOP_KIND_BLOCKINDEXED:
#####: 672: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) *
-: 673: (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize);
-: 674:#ifdef DLOOP_DEBUG_SIZE
-: 675: DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
-: 676: (int) dl_p->loop_params.bi_t.count *
-: 677: (int) dl_p->loop_params.bi_t.blocksize,
-: 678: (MPI_Aint) tmp_ct);
-: 679:#endif
#####: 680: break;
-: 681: case DLOOP_KIND_INDEXED:
#####: 682: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks);
-: 683:#ifdef DLOOP_DEBUG_SIZE
-: 684: DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
-: 685: (int) dl_p->loop_params.i_t.total_blocks,
-: 686: (MPI_Aint) tmp_ct);
-: 687:#endif
#####: 688: break;
-: 689: default:
|
-: 690: /* --BEGIN ERROR HANDLING-- */
#####: 691: DLOOP_Assert(0);
-: 692: break;
-: 693: /* --END ERROR HANDLING-- */
-: 694: }
-: 695:
|
#####: 696: if (dl_p->kind & DLOOP_FINAL_MASK) break;
-: 697: else {
#####: 698: DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL);
#####: 699: dl_p = dl_p->loop_params.cm_t.dataloop;
-: 700: }
#####: 701: }
-: 702:
-: 703: /* call fn for size using bottom type, or use size if fnptr is NULL */
#####: 704: tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size);
-: 705:
#####: 706: return tmp_sz * tmp_ct;
-: 707:}
-: 708:
|
-: 709:/* --BEGIN ERROR HANDLING-- */
-: 710:/*@
-: 711: Dataloop_print - dump a dataloop tree to stdout for debugging
-: 712: purposes
-: 713:
-: 714: Input Parameters:
-: 715:+ dataloop - root of tree to dump
-: 716:- depth - starting depth; used to help keep up with where we are in the tree
-: 717:@*/
-: 718:void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop,
-: 719: int depth)
#####: 720:{
-: 721: int i;
-: 722:
#####: 723: if (dataloop == NULL)
-: 724: {
#####: 725: DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n");
#####: 726: return;
-: 727: }
-: 728:
#####: 729: DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" MPI_AINT_FMT_DEC_SPEC "\n",
-: 730: dataloop, (int) depth, (int) dataloop->kind, (MPI_Aint) dataloop->el_extent);
#####: 731: switch(dataloop->kind & DLOOP_KIND_MASK) {
-: 732: case DLOOP_KIND_CONTIG:
#####: 733: DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n",
-: 734: (int) dataloop->loop_params.c_t.count,
-: 735: dataloop->loop_params.c_t.dataloop);
#####: 736: if (!(dataloop->kind & DLOOP_FINAL_MASK))
#####: 737: PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1);
-: 738: break;
-: 739: case DLOOP_KIND_VECTOR:
#####: 740: DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" MPI_AINT_FMT_DEC_SPEC ", datatype=%p\n",
-: 741: (int) dataloop->loop_params.v_t.count,
-: 742: (int) dataloop->loop_params.v_t.blocksize,
-: 743: (MPI_Aint) dataloop->loop_params.v_t.stride,
-: 744: dataloop->loop_params.v_t.dataloop);
#####: 745: if (!(dataloop->kind & DLOOP_FINAL_MASK))
#####: 746: PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1);
-: 747: break;
-: 748: case DLOOP_KIND_BLOCKINDEXED:
#####: 749: DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n",
-: 750: (int) dataloop->loop_params.bi_t.count,
-: 751: (int) dataloop->loop_params.bi_t.blocksize,
-: 752: dataloop->loop_params.bi_t.dataloop);
-: 753: /* print out offsets later */
#####: 754: if (!(dataloop->kind & DLOOP_FINAL_MASK))
#####: 755: PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1);
-: 756: break;
-: 757: case DLOOP_KIND_INDEXED:
#####: 758: DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n",
-: 759: (int) dataloop->loop_params.i_t.count,
-: 760: dataloop->loop_params.i_t.dataloop);
-: 761: /* print out blocksizes and offsets later */
#####: 762: if (!(dataloop->kind & DLOOP_FINAL_MASK))
#####: 763: PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1);
-: 764: break;
-: 765: case DLOOP_KIND_STRUCT:
#####: 766: DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count);
#####: 767: DLOOP_dbg_printf("\tblocksizes:\n");
#####: 768: for (i=0; i < dataloop->loop_params.s_t.count; i++)
#####: 769: DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]);
#####: 770: DLOOP_dbg_printf("\toffsets:\n");
#####: 771: for (i=0; i < dataloop->loop_params.s_t.count; i++)
#####: 772: DLOOP_dbg_printf("\t\t" MPI_AINT_FMT_DEC_SPEC "\n", (MPI_Aint) dataloop->loop_params.s_t.offset_array[i]);
#####: 773: DLOOP_dbg_printf("\tdatatypes:\n");
#####: 774: for (i=0; i < dataloop->loop_params.s_t.count; i++)
#####: 775: DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]);
#####: 776: if (dataloop->kind & DLOOP_FINAL_MASK) break;
-: 777:
#####: 778: for (i=0; i < dataloop->loop_params.s_t.count; i++) {
#####: 779: PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1);
-: 780: }
-: 781: break;
-: 782: default:
#####: 783: DLOOP_Assert(0);
-: 784: break;
-: 785: }
-: 786: return;
-: 787:}
-: 788:/* --END ERROR HANDLING-- */
|