-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop.c
        -:    0:Graph:dataloop.gcno
        -:    0:Data:dataloop.gcda
        -:    0:Runs:4381
        -:    0:Programs:1376
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <string.h>
        -:    9:#include <stdio.h>
        -:   10:#include <stdlib.h>
        -:   11:
        -:   12:#include "./dataloop.h"
        -:   13:
        -:   14:#undef DEBUG_DLOOP_SIZE
        -:   15:#undef DLOOP_DEBUG_MEMORY
        -:   16:
        -:   17:/* Dataloops
        -:   18: *
        -:   19: * The functions here are used for the creation, copying, update, and display
        -:   20: * of DLOOP_Dataloop structures and trees of these structures.
        -:   21: *
        -:   22: * Currently we store trees of dataloops in contiguous regions of memory.  They
        -:   23: * are stored in such a way that subtrees are also stored contiguously.  This
        -:   24: * makes it somewhat easier to copy these subtrees around.  Keep this in mind
        -:   25: * when looking at the functions below.
        -:   26: *
        -:   27: * The structures used in this file are defined in mpid_datatype.h.  There is
        -:   28: * no separate mpid_dataloop.h at this time.
        -:   29: *
        -:   30: * OPTIMIZATIONS:
        -:   31: *
        -:   32: * There are spots in the code with OPT tags that indicate where we could
        -:   33: * optimize particular calculations or avoid certain checks.
        -:   34: *
        -:   35: * NOTES:
        -:   36: *
        -:   37: * Don't have locks in place at this time!
        -:   38: */
        -:   39:
        -:   40:/* Some functions in this file are responsible for allocation of space for
        -:   41: * dataloops.  These structures include the dataloop structure itself
        -:   42: * followed by a sequence of variable-sized arrays, depending on the loop
        -:   43: * kind.  For example, a dataloop of kind DLOOP_KIND_INDEXED has a
        -:   44: * dataloop structure followed by an array of block sizes and then an array
        -:   45: * of offsets.
        -:   46: *
        -:   47: * For efficiency and ease of cleanup (preserving a single free at
        -:   48: * deallocation), we want to allocate this memory as a single large chunk.
        -:   49: * However, we must perform some alignment of the components of this chunk
        -:   50: * in order to obtain correct and efficient operation across all platforms.
        -:   51: */
        -:   52:
        -:   53:
        -:   54:/*@
        -:   55:  Dataloop_free - deallocate the resources used to store a dataloop
        -:   56:
        -:   57:  Input Parameters:
        -:   58:. dataloop - pointer to dataloop structure
        -:   59:@*/
        -:   60:void PREPEND_PREFIX(Dataloop_free)(DLOOP_Dataloop **dataloop)
   281098:   61:{
        -:   62:
   281098:   63:    if (*dataloop == NULL) return;
        -:   64:
        -:   65:#ifdef DLOOP_DEBUG_MEMORY
        -:   66:    DLOOP_dbg_printf("DLOOP_Dataloop_free: freeing loop @ %x.\n",
        -:   67:		     (int) *dataloop);
        -:   68:#endif
        -:   69:
   281098:   70:    memset(*dataloop, 0, sizeof(DLOOP_Dataloop_common));
   281098:   71:    DLOOP_Free(*dataloop);
   281098:   72:    *dataloop = NULL;
   281098:   73:    return;
        -:   74:}
        -:   75:/*@
        -:   76:  Dataloop_copy - Copy an arbitrary dataloop structure, updating
        -:   77:  pointers as necessary
        -:   78:
        -:   79:  Input Parameters:
        -:   80:+ dest   - pointer to destination region
        -:   81:. src    - pointer to original dataloop structure
        -:   82:- size   - size of dataloop structure
        -:   83:
        -:   84:  This routine parses the dataloop structure as it goes in order to
        -:   85:  determine what exactly it needs to update.
        -:   86:
        -:   87:  Notes:
        -:   88:  It assumes that the source dataloop was allocated in our usual way;
        -:   89:  this means that the entire dataloop is in a contiguous region and that
        -:   90:  the root of the tree is first in the array.
        -:   91:
        -:   92:  This has some implications:
        -:   93:+ we can use a contiguous copy mechanism to copy the majority of the
        -:   94:  structure
        -:   95:- all pointers in the region are relative to the start of the data region
        -:   96:  the first dataloop in the array is the root of the tree
        -:   97:@*/
        -:   98:void PREPEND_PREFIX(Dataloop_copy)(void *dest,
        -:   99:				   void *src,
        -:  100:				   int size)
   135934:  101:{
        -:  102:    DLOOP_Offset ptrdiff;
        -:  103:
        -:  104:#ifdef DLOOP_DEBUG_MEMORY
        -:  105:    DLOOP_dbg_printf("DLOOP_Dataloop_copy: copying from %x to %x (%d bytes).\n",
        -:  106:		     (int) src, (int) dest, size);
        -:  107:#endif
        -:  108:
        -:  109:    /* copy region first */
   135934:  110:    DLOOP_Memcpy(dest, src, size);
        -:  111:
        -:  112:    /* Calculate difference in starting locations. DLOOP_Dataloop_update()
        -:  113:     * then traverses the new structure and updates internal pointers by
        -:  114:     * adding this difference to them. This way we can just copy the
        -:  115:     * structure, including pointers, in one big block.
        -:  116:     */
   135934:  117:    ptrdiff = (DLOOP_Offset) ((char *) dest - (char *) src);
        -:  118:
        -:  119:    /* traverse structure updating pointers */
   135934:  120:    PREPEND_PREFIX(Dataloop_update)(dest, ptrdiff);
        -:  121:
        -:  122:    return;
        -:  123:}
        -:  124:
        -:  125:
        -:  126:/*@
        -:  127:  Dataloop_update - update pointers after a copy operation
        -:  128:
        -:  129:  Input Parameters:
        -:  130:+ dataloop - pointer to loop to update
        -:  131:- ptrdiff - value indicating offset between old and new pointer values
        -:  132:
        -:  133:  This function is used to recursively update all the pointers in a
        -:  134:  dataloop tree.
        -:  135:@*/
        -:  136:void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop,
        -:  137:				     DLOOP_Offset ptrdiff)
   253100:  138:{
        -:  139:    /* OPT: only declare these variables down in the Struct case */
        -:  140:    int i;
        -:  141:    DLOOP_Dataloop **looparray;
        -:  142:
   253100:  143:    switch(dataloop->kind & DLOOP_KIND_MASK) {
        -:  144:	case DLOOP_KIND_CONTIG:
        -:  145:	case DLOOP_KIND_VECTOR:
        -:  146:	    /*
        -:  147:	     * All these really ugly assignments are really of the form:
        -:  148:	     *
        -:  149:	     * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff;
        -:  150:	     *
        -:  151:	     * However, some compilers spit out warnings about casting on the
        -:  152:	     * LHS, so we get this much nastier form instead (using common
        -:  153:	     * struct for contig and vector):
        -:  154:	     */
        -:  155:
   242125:  156:	    if (dataloop->loop_params.cm_t.dataloop)
        -:  157:	    {
        -:  158:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
        -:  159:
     2123:  160:		dataloop->loop_params.cm_t.dataloop =
        -:  161:		    (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
        -:  162:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
        -:  163:	    }
        -:  164:
   242125:  165:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
     2123:  166:		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff);
        -:  167:	    break;
        -:  168:
        -:  169:	case DLOOP_KIND_BLOCKINDEXED:
      656:  170:	    if (dataloop->loop_params.bi_t.offset_array)
        -:  171:	    {
        -:  172:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
      656:  173:		dataloop->loop_params.bi_t.offset_array =
        -:  174:		    (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
        -:  175:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
        -:  176:	    }
        -:  177:
      656:  178:	    if (dataloop->loop_params.bi_t.dataloop)
        -:  179:	    {
        -:  180:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
      156:  181:		dataloop->loop_params.bi_t.dataloop =
        -:  182:		    (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
        -:  183:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
        -:  184:	    }
        -:  185:
      656:  186:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
      156:  187:		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff);
        -:  188:	    break;
        -:  189:
        -:  190:	case DLOOP_KIND_INDEXED:
      622:  191:	    if (dataloop->loop_params.i_t.blocksize_array)
        -:  192:	    {
        -:  193:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
      622:  194:		dataloop->loop_params.i_t.blocksize_array =
        -:  195:		    (DLOOP_Count *) MPI_AINT_CAST_TO_VOID_PTR
        -:  196:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
        -:  197:	    }
        -:  198:
      622:  199:	    if (dataloop->loop_params.i_t.offset_array)
        -:  200:	    {
        -:  201:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
      622:  202:		dataloop->loop_params.i_t.offset_array =
        -:  203:		    (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
        -:  204:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
        -:  205:	    }
        -:  206:
      622:  207:	    if (dataloop->loop_params.i_t.dataloop)
        -:  208:	    {
        -:  209:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
    #####:  210:		dataloop->loop_params.i_t.dataloop =
        -:  211:		    (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
        -:  212:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
        -:  213:	    }
        -:  214:
      622:  215:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
    #####:  216:		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff);
        -:  217:	    break;
        -:  218:
        -:  219:	case DLOOP_KIND_STRUCT:
     9697:  220:	    if (dataloop->loop_params.s_t.blocksize_array)
        -:  221:	    {
        -:  222:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
     9697:  223:		dataloop->loop_params.s_t.blocksize_array =
        -:  224:		    (DLOOP_Count *) MPI_AINT_CAST_TO_VOID_PTR
        -:  225:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
        -:  226:	    }
        -:  227:
     9697:  228:	    if (dataloop->loop_params.s_t.offset_array)
        -:  229:	    {
        -:  230:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
     9697:  231:		dataloop->loop_params.s_t.offset_array =
        -:  232:		    (DLOOP_Offset *) MPI_AINT_CAST_TO_VOID_PTR
        -:  233:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
        -:  234:	    }
        -:  235:
     9697:  236:	    if (dataloop->loop_params.s_t.dataloop_array)
        -:  237:	    {
        -:  238:		MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
     9697:  239:		dataloop->loop_params.s_t.dataloop_array =
        -:  240:		    (DLOOP_Dataloop **) MPI_AINT_CAST_TO_VOID_PTR
        -:  241:		    (MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
        -:  242:	    }
        -:  243:
        -:  244:	    /* fix the N dataloop pointers too */
     9697:  245:	    looparray = dataloop->loop_params.s_t.dataloop_array;
   122031:  246:	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
   112334:  247:		if (looparray[i])
        -:  248:		{
        -:  249:		    MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) looparray[i] + ptrdiff);
   112334:  250:		    looparray[i] = (DLOOP_Dataloop *) MPI_AINT_CAST_TO_VOID_PTR
        -:  251:			(MPI_VOID_PTR_CAST_TO_MPI_AINT (char *) looparray[i] + ptrdiff);
        -:  252:		}
        -:  253:	    }
        -:  254:
     9697:  255:	    if (dataloop->kind & DLOOP_FINAL_MASK) break;
        -:  256:
   122031:  257:	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
   112334:  258:		PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff);
        -:  259:	    }
        -:  260:	    break;
        -:  261:	default:
        -:  262:	    /* --BEGIN ERROR HANDLING-- */
    #####:  263:	    DLOOP_Assert(0);
        -:  264:	    break;
        -:  265:	    /* --END ERROR HANDLING-- */
        -:  266:    }
        -:  267:    return;
        -:  268:}
        -:  269:
        -:  270:/*@
        -:  271:  Dataloop_alloc - allocate the resources used to store a dataloop with
        -:  272:                   no old loops associated with it.
        -:  273:
        -:  274:  Input Parameters:
        -:  275:+ kind          - kind of dataloop to allocate
        -:  276:. count         - number of elements in dataloop (kind dependent)
        -:  277:. new_loop_p    - address at which to store new dataloop pointer
        -:  278:- new_loop_sz_p - pointer to integer in which to store new loop size
        -:  279:
        -:  280:  Notes:
        -:  281:  The count parameter passed into this function will often be different
        -:  282:  from the count passed in at the MPI layer due to optimizations.
        -:  283:@*/
        -:  284:void PREPEND_PREFIX(Dataloop_alloc)(int kind,
        -:  285:				    DLOOP_Count count,
        -:  286:				    DLOOP_Dataloop **new_loop_p,
        -:  287:				    int *new_loop_sz_p)
   245436:  288:{
   245436:  289:    PREPEND_PREFIX(Dataloop_alloc_and_copy)(kind,
        -:  290:					    count,
        -:  291:					    NULL,
        -:  292:					    0,
        -:  293:					    new_loop_p,
        -:  294:					    new_loop_sz_p);
        -:  295:    return;
        -:  296:}
        -:  297:
        -:  298:/*@
        -:  299:  Dataloop_alloc_and_copy - allocate the resources used to store a
        -:  300:                            dataloop and copy in old dataloop as
        -:  301:			    appropriate
        -:  302:
        -:  303:  Input Parameters:
        -:  304:+ kind          - kind of dataloop to allocate
        -:  305:. count         - number of elements in dataloop (kind dependent)
        -:  306:. old_loop      - pointer to old dataloop (or NULL for none)
        -:  307:. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
        -:  308:. new_loop_p    - address at which to store new dataloop pointer
        -:  309:- new_loop_sz_p - pointer to integer in which to store new loop size
        -:  310:
        -:  311:  Notes:
        -:  312:  The count parameter passed into this function will often be different
        -:  313:  from the count passed in at the MPI layer.
        -:  314:@*/
        -:  315:void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
        -:  316:					     DLOOP_Count count,
        -:  317:					     DLOOP_Dataloop *old_loop,
        -:  318:					     int old_loop_sz,
        -:  319:					     DLOOP_Dataloop **new_loop_p,
        -:  320:					     int *new_loop_sz_p)
   254950:  321:{
   254950:  322:    int new_loop_sz = 0;
   254950:  323:    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
        -:  324:    int epsilon;
   254950:  325:    int loop_sz = sizeof(DLOOP_Dataloop);
   254950:  326:    int off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;
        -:  327:
        -:  328:    char *pos;
        -:  329:    DLOOP_Dataloop *new_loop;
        -:  330:
        -:  331:#ifdef HAVE_MAX_STRUCT_ALIGNMENT
   254950:  332:    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
   254950:  333:	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
        -:  334:    }
        -:  335:#endif
        -:  336:
   254950:  337:    if (old_loop != NULL) {
     9514:  338:	DLOOP_Assert((old_loop_sz % align_sz) == 0);
        -:  339:    }
        -:  340:
        -:  341:    /* calculate the space that we actually need for everything */
   254950:  342:    switch (kind) {
        -:  343:	case DLOOP_KIND_STRUCT:
        -:  344:	    /* need space for dataloop pointers and extents */
    #####:  345:	    ptr_sz = count * sizeof(DLOOP_Dataloop *);
    #####:  346:	    extent_sz = count * sizeof(DLOOP_Offset);
        -:  347:	case DLOOP_KIND_INDEXED:
        -:  348:	    /* need space for block sizes */
    18859:  349:	    blk_sz = count * sizeof(DLOOP_Count);
        -:  350:	case DLOOP_KIND_BLOCKINDEXED:
        -:  351:	    /* need space for block offsets */
    29268:  352:	    off_sz = count * sizeof(DLOOP_Offset);
        -:  353:	case DLOOP_KIND_CONTIG:
        -:  354:	case DLOOP_KIND_VECTOR:
        -:  355:	    break;
        -:  356:	default:
    #####:  357:	    DLOOP_Assert(0);
        -:  358:    }
        -:  359:
        -:  360:    /* pad everything that we're going to allocate */
   254950:  361:    epsilon = loop_sz % align_sz;
   254950:  362:    if (epsilon) loop_sz += align_sz - epsilon;
        -:  363:
   254950:  364:    epsilon = off_sz % align_sz;
   254950:  365:    if (epsilon) off_sz += align_sz - epsilon;
        -:  366:
   254950:  367:    epsilon = blk_sz % align_sz;
   254950:  368:    if (epsilon) blk_sz += align_sz - epsilon;
        -:  369:
   254950:  370:    epsilon = ptr_sz % align_sz;
   254950:  371:    if (epsilon) ptr_sz += align_sz - epsilon;
        -:  372:
   254950:  373:    epsilon = extent_sz % align_sz;
   254950:  374:    if (epsilon) extent_sz += align_sz - epsilon;
        -:  375:
   254950:  376:    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
        -:  377:	extent_sz + old_loop_sz;
        -:  378:
        -:  379:    /* allocate space */
   254950:  380:    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
   254950:  381:    if (new_loop == NULL) {
    #####:  382:	*new_loop_p = NULL;
    #####:  383:	return;
        -:  384:    }
        -:  385:
        -:  386:#ifdef DLOOP_DEBUG_MEMORY
        -:  387:    DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, old = %d)\n",
        -:  388:		     (int) new_loop,
        -:  389:		     new_loop_sz,
        -:  390:		     loop_sz,
        -:  391:		     off_sz,
        -:  392:		     blk_sz,
        -:  393:		     ptr_sz,
        -:  394:		     extent_sz,
        -:  395:		     old_loop_sz);
        -:  396:#endif
        -:  397:
        -:  398:    /* set all the pointers in the new dataloop structure */
   254950:  399:    switch (kind) {
        -:  400:	case DLOOP_KIND_STRUCT:
        -:  401:	    /* order is:
        -:  402:	     * - pointers
        -:  403:	     * - blocks
        -:  404:	     * - offsets
        -:  405:	     * - extents
        -:  406:	     */
    #####:  407:	    new_loop->loop_params.s_t.dataloop_array =
        -:  408:		(DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
    #####:  409:	    new_loop->loop_params.s_t.blocksize_array =
        -:  410:		(DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
    #####:  411:	    new_loop->loop_params.s_t.offset_array =
        -:  412:		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
        -:  413:				  ptr_sz + blk_sz);
    #####:  414:	    new_loop->loop_params.s_t.el_extent_array =
        -:  415:		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
        -:  416:				  ptr_sz + blk_sz + off_sz);
    #####:  417:	    break;
        -:  418:	case DLOOP_KIND_INDEXED:
        -:  419:	    /* order is:
        -:  420:	     * - blocks
        -:  421:	     * - offsets
        -:  422:	     */
    18859:  423:	    new_loop->loop_params.i_t.blocksize_array =
        -:  424:		(DLOOP_Count *) (((char *) new_loop) + loop_sz);
    18859:  425:	    new_loop->loop_params.i_t.offset_array =
        -:  426:		(DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
    18859:  427:	    if (old_loop == NULL) {
    18855:  428:		new_loop->loop_params.i_t.dataloop = NULL;
        -:  429:	    }
        -:  430:	    else {
        4:  431:		new_loop->loop_params.i_t.dataloop =
        -:  432:		    (DLOOP_Dataloop *) (((char *) new_loop) +
        -:  433:					(new_loop_sz - old_loop_sz));
        -:  434:	    }
        -:  435:	    break;
        -:  436:	case DLOOP_KIND_BLOCKINDEXED:
    10409:  437:	    new_loop->loop_params.bi_t.offset_array =
        -:  438:		(DLOOP_Offset *) (((char *) new_loop) + loop_sz);
    10409:  439:	    if (old_loop == NULL) {
     9351:  440:		new_loop->loop_params.bi_t.dataloop = NULL;
        -:  441:	    }
        -:  442:	    else {
     1058:  443:		new_loop->loop_params.bi_t.dataloop =
        -:  444:		    (DLOOP_Dataloop *) (((char *) new_loop) +
        -:  445:					(new_loop_sz - old_loop_sz));
        -:  446:	    }
        -:  447:	    break;
        -:  448:	case DLOOP_KIND_CONTIG:
   185660:  449:	    if (old_loop == NULL) {
   178852:  450:		new_loop->loop_params.c_t.dataloop = NULL;
        -:  451:	    }
        -:  452:	    else {
     6808:  453:		new_loop->loop_params.c_t.dataloop =
        -:  454:		    (DLOOP_Dataloop *) (((char *) new_loop) +
        -:  455:					(new_loop_sz - old_loop_sz));
        -:  456:	    }
        -:  457:	    break;
        -:  458:	case DLOOP_KIND_VECTOR:
    40022:  459:	    if (old_loop == NULL) {
    38378:  460:		new_loop->loop_params.v_t.dataloop = NULL;
        -:  461:	    }
        -:  462:	    else {
     1644:  463:		new_loop->loop_params.v_t.dataloop =
        -:  464:		    (DLOOP_Dataloop *) (((char *) new_loop) +
        -:  465:					(new_loop_sz - old_loop_sz));
        -:  466:	    }
        -:  467:	    break;
        -:  468:	default:
    #####:  469:	    DLOOP_Assert(0);
        -:  470:    }
        -:  471:
   254950:  472:    pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
   254950:  473:    if (old_loop != NULL) {
     9514:  474:	PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
        -:  475:    }
        -:  476:
   254950:  477:    *new_loop_p    = new_loop;
   254950:  478:    *new_loop_sz_p = new_loop_sz;
   254950:  479:    return;
        -:  480:}
        -:  481:
        -:  482:/*@
        -:  483:  Dataloop_struct_alloc - allocate the resources used to store a dataloop and
        -:  484:                          copy in old dataloop as appropriate.  this version
        -:  485:                          is specifically for use when a struct dataloop is
        -:  486:                          being created; the space to hold old dataloops in
        -:  487:                          this case must be described back to the
        -:  488:                          implementation in order for efficient copying.
        -:  489:
        -:  490:  Input Parameters:
        -:  491:+ count         - number of elements in dataloop (kind dependent)
        -:  492:. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
        -:  493:. basic_ct      - number of basic types for which new dataloops are needed
        -:  494:. old_loop_p    - address at which to store pointer to old loops
        -:  495:. new_loop_p    - address at which to store new struct dataloop pointer
        -:  496:- new_loop_sz_p - address at which to store new loop size
        -:  497:
        -:  498:  Notes:
        -:  499:  The count parameter passed into this function will often be different
        -:  500:  from the count passed in at the MPI layer due to optimizations.
        -:  501:
        -:  502:  The caller is responsible for filling in the region pointed to by
        -:  503:  old_loop_p (count elements).
        -:  504:@*/
        -:  505:void PREPEND_PREFIX(Dataloop_struct_alloc)(DLOOP_Count count,
        -:  506:					   int old_loop_sz,
        -:  507:					   int basic_ct,
        -:  508:					   DLOOP_Dataloop **old_loop_p,
        -:  509:					   DLOOP_Dataloop **new_loop_p,
        -:  510:					   int *new_loop_sz_p)
    32173:  511:{
    32173:  512:    int new_loop_sz = 0;
    32173:  513:    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
        -:  514:    int epsilon;
    32173:  515:    int loop_sz = sizeof(DLOOP_Dataloop);
        -:  516:    int off_sz, blk_sz, ptr_sz, extent_sz, basic_sz;
        -:  517:
        -:  518:    DLOOP_Dataloop *new_loop;
        -:  519:
        -:  520:#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    32173:  521:    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
    32173:  522:	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
        -:  523:    }
        -:  524:#endif
        -:  525:
        -:  526:    /* calculate the space that we actually need for everything */
    32173:  527:    ptr_sz    = count * sizeof(DLOOP_Dataloop *);
    32173:  528:    extent_sz = count * sizeof(DLOOP_Offset);
    32173:  529:    blk_sz    = count * sizeof(DLOOP_Count);
    32173:  530:    off_sz    = count * sizeof(DLOOP_Offset);
    32173:  531:    basic_sz  = sizeof(DLOOP_Dataloop);
        -:  532:
        -:  533:    /* pad everything that we're going to allocate */
    32173:  534:    epsilon = loop_sz % align_sz;
    32173:  535:    if (epsilon) loop_sz += align_sz - epsilon;
        -:  536:
    32173:  537:    epsilon = off_sz % align_sz;
    32173:  538:    if (epsilon) off_sz += align_sz - epsilon;
        -:  539:
    32173:  540:    epsilon = blk_sz % align_sz;
    32173:  541:    if (epsilon) blk_sz += align_sz - epsilon;
        -:  542:
    32173:  543:    epsilon = ptr_sz % align_sz;
    32173:  544:    if (epsilon) ptr_sz += align_sz - epsilon;
        -:  545:
    32173:  546:    epsilon = extent_sz % align_sz;
    32173:  547:    if (epsilon) extent_sz += align_sz - epsilon;
        -:  548:
    32173:  549:    epsilon = basic_sz % align_sz;
    32173:  550:    if (epsilon) basic_sz += align_sz - epsilon;
        -:  551:
        -:  552:    /* note: we pad *each* basic type dataloop, because the
        -:  553:     * code used to create them assumes that we're going to
        -:  554:     * do that.
        -:  555:     */
        -:  556:
    32173:  557:    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
        -:  558:	extent_sz + (basic_ct * basic_sz) + old_loop_sz;
        -:  559:
        -:  560:    /* allocate space */
    32173:  561:    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
    32173:  562:    if (new_loop == NULL) {
    #####:  563:	*new_loop_p = NULL;
    #####:  564:	return;
        -:  565:    }
        -:  566:
        -:  567:#ifdef DLOOP_DEBUG_MEMORY
        -:  568:    DLOOP_dbg_printf("DLOOP_Dataloop_struct_alloc: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, basics = %d, old = %d)\n",
        -:  569:		     (int) new_loop,
        -:  570:		     new_loop_sz,
        -:  571:		     loop_sz,
        -:  572:		     off_sz,
        -:  573:		     blk_sz,
        -:  574:		     ptr_sz,
        -:  575:		     extent_sz,
        -:  576:		     basic_sz,
        -:  577:		     old_loop_sz);
        -:  578:#endif
        -:  579:
        -:  580:    /* set all the pointers in the new dataloop structure */
    32173:  581:    new_loop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **)
        -:  582:	(((char *) new_loop) + loop_sz);
    32173:  583:    new_loop->loop_params.s_t.blocksize_array =	(DLOOP_Count *)
        -:  584:	(((char *) new_loop) + loop_sz + ptr_sz);
    32173:  585:    new_loop->loop_params.s_t.offset_array = (DLOOP_Offset *)
        -:  586:	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz);
    32173:  587:    new_loop->loop_params.s_t.el_extent_array =	(DLOOP_Offset *)
        -:  588:	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz);
        -:  589:
    32173:  590:    *old_loop_p = (DLOOP_Dataloop *)
        -:  591:	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz + extent_sz);
    32173:  592:    *new_loop_p = new_loop;
    32173:  593:    *new_loop_sz_p = new_loop_sz;
        -:  594:
    32173:  595:    return;
        -:  596:}
        -:  597:
        -:  598:/*@
        -:  599:  Dataloop_dup - make a copy of a dataloop
        -:  600:
        -:  601:  Returns 0 on success, -1 on failure.
        -:  602:@*/
        -:  603:void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
        -:  604:				  int old_loop_sz,
        -:  605:				  DLOOP_Dataloop **new_loop_p)
      768:  606:{
        -:  607:    DLOOP_Dataloop *new_loop;
        -:  608:
      768:  609:    DLOOP_Assert(old_loop != NULL);
      768:  610:    DLOOP_Assert(old_loop_sz > 0);
        -:  611:
      768:  612:    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
      768:  613:    if (new_loop == NULL) {
    #####:  614:	*new_loop_p = NULL;
    #####:  615:	return;
        -:  616:    }
        -:  617:
      768:  618:    PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
      768:  619:    *new_loop_p = new_loop;
      768:  620:    return;
        -:  621:}
        -:  622:
        -:  623:/*@
        -:  624:  Dataloop_stream_size - return the size of the data described by the dataloop
        -:  625:
        -:  626:  Input Parameters:
        -:  627:+ dl_p   - pointer to dataloop for which we will return the size
        -:  628:- sizefn - function for determining size of types in the corresponding stream
        -:  629:           (passing NULL will instead result in el_size values being used)
        -:  630:
        -:  631:@*/
        -:  632:DLOOP_Offset
        -:  633:PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p,
        -:  634:				     DLOOP_Offset (*sizefn)(DLOOP_Type el_type))
    #####:  635:{
    #####:  636:    DLOOP_Offset tmp_sz, tmp_ct = 1;
        -:  637:
        -:  638:    for (;;)
        -:  639:    {
    #####:  640:        if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
        -:  641:        {
        -:  642:            int i;
        -:  643:
    #####:  644:            tmp_sz = 0;
    #####:  645:            for (i = 0; i < dl_p->loop_params.s_t.count; i++)
        -:  646:            {
    #####:  647:                tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) *
        -:  648:                    PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn);
        -:  649:            }
    #####:  650:            return tmp_sz * tmp_ct;
        -:  651:        }
        -:  652:
    #####:  653:        switch (dl_p->kind & DLOOP_KIND_MASK) {
        -:  654:        case DLOOP_KIND_CONTIG:
    #####:  655:            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count);
        -:  656:#ifdef DLOOP_DEBUG_SIZE
        -:  657:            DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
        -:  658:                             (int) dl_p->loop_params.c_t.count, (MPI_Aint) tmp_ct);
        -:  659:#endif
    #####:  660:            break;
        -:  661:        case DLOOP_KIND_VECTOR:
    #####:  662:            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) *
        -:  663:		      (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize);
        -:  664:#ifdef DLOOP_DEBUG_SIZE
        -:  665:            DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
        -:  666:                             (int) dl_p->loop_params.v_t.count,
        -:  667:                             (int) dl_p->loop_params.v_t.blocksize,
        -:  668:                             (MPI_Aint) tmp_ct);
        -:  669:#endif
    #####:  670:            break;
        -:  671:        case DLOOP_KIND_BLOCKINDEXED:
    #####:  672:            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) *
        -:  673:		      (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize);
        -:  674:#ifdef DLOOP_DEBUG_SIZE
        -:  675:            DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
        -:  676:                             (int) dl_p->loop_params.bi_t.count *
        -:  677:                             (int) dl_p->loop_params.bi_t.blocksize,
        -:  678:                             (MPI_Aint) tmp_ct);
        -:  679:#endif
    #####:  680:            break;
        -:  681:        case DLOOP_KIND_INDEXED:
    #####:  682:            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks);
        -:  683:#ifdef DLOOP_DEBUG_SIZE
        -:  684:            DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " MPI_AINT_FMT_DEC_SPEC "\n",
        -:  685:                             (int) dl_p->loop_params.i_t.total_blocks,
        -:  686:                             (MPI_Aint) tmp_ct);
        -:  687:#endif
    #####:  688:            break;
        -:  689:        default:
        -:  690:            /* --BEGIN ERROR HANDLING-- */
    #####:  691:            DLOOP_Assert(0);
        -:  692:            break;
        -:  693:            /* --END ERROR HANDLING-- */
        -:  694:        }
        -:  695:
    #####:  696:        if (dl_p->kind & DLOOP_FINAL_MASK) break;
        -:  697:        else {
    #####:  698:            DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL);
    #####:  699:            dl_p = dl_p->loop_params.cm_t.dataloop;
        -:  700:        }
    #####:  701:    }
        -:  702:
        -:  703:    /* call fn for size using bottom type, or use size if fnptr is NULL */
    #####:  704:    tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size);
        -:  705:
    #####:  706:    return tmp_sz * tmp_ct;
        -:  707:}
        -:  708:
        -:  709:/* --BEGIN ERROR HANDLING-- */
        -:  710:/*@
        -:  711:  Dataloop_print - dump a dataloop tree to stdout for debugging
        -:  712:  purposes
        -:  713:
        -:  714:  Input Parameters:
        -:  715:+ dataloop - root of tree to dump
        -:  716:- depth - starting depth; used to help keep up with where we are in the tree
        -:  717:@*/
        -:  718:void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop,
        -:  719:				    int depth)
    #####:  720:{
        -:  721:    int i;
        -:  722:
    #####:  723:    if (dataloop == NULL)
        -:  724:    {
    #####:  725:        DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n");
    #####:  726:        return;
        -:  727:    }
        -:  728:
    #####:  729:    DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" MPI_AINT_FMT_DEC_SPEC "\n",
        -:  730:		     dataloop, (int) depth, (int) dataloop->kind, (MPI_Aint) dataloop->el_extent);
    #####:  731:    switch(dataloop->kind & DLOOP_KIND_MASK) {
        -:  732:	case DLOOP_KIND_CONTIG:
    #####:  733:	    DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n",
        -:  734:			     (int) dataloop->loop_params.c_t.count,
        -:  735:			     dataloop->loop_params.c_t.dataloop);
    #####:  736:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
    #####:  737:		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1);
        -:  738:	    break;
        -:  739:	case DLOOP_KIND_VECTOR:
    #####:  740:	    DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" MPI_AINT_FMT_DEC_SPEC ", datatype=%p\n",
        -:  741:			     (int) dataloop->loop_params.v_t.count,
        -:  742:			     (int) dataloop->loop_params.v_t.blocksize,
        -:  743:			     (MPI_Aint) dataloop->loop_params.v_t.stride,
        -:  744:			     dataloop->loop_params.v_t.dataloop);
    #####:  745:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
    #####:  746:		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1);
        -:  747:	    break;
        -:  748:	case DLOOP_KIND_BLOCKINDEXED:
    #####:  749:	    DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n",
        -:  750:			     (int) dataloop->loop_params.bi_t.count,
        -:  751:			     (int) dataloop->loop_params.bi_t.blocksize,
        -:  752:			     dataloop->loop_params.bi_t.dataloop);
        -:  753:	    /* print out offsets later */
    #####:  754:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
    #####:  755:		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1);
        -:  756:	    break;
        -:  757:	case DLOOP_KIND_INDEXED:
    #####:  758:	    DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n",
        -:  759:			     (int) dataloop->loop_params.i_t.count,
        -:  760:			     dataloop->loop_params.i_t.dataloop);
        -:  761:	    /* print out blocksizes and offsets later */
    #####:  762:	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
    #####:  763:		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1);
        -:  764:	    break;
        -:  765:	case DLOOP_KIND_STRUCT:
    #####:  766:	    DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count);
    #####:  767:	    DLOOP_dbg_printf("\tblocksizes:\n");
    #####:  768:	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
    #####:  769:		DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]);
    #####:  770:	    DLOOP_dbg_printf("\toffsets:\n");
    #####:  771:	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
    #####:  772:		DLOOP_dbg_printf("\t\t" MPI_AINT_FMT_DEC_SPEC "\n", (MPI_Aint) dataloop->loop_params.s_t.offset_array[i]);
    #####:  773:	    DLOOP_dbg_printf("\tdatatypes:\n");
    #####:  774:	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
    #####:  775:		DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]);
    #####:  776:	    if (dataloop->kind & DLOOP_FINAL_MASK) break;
        -:  777:
    #####:  778:	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
    #####:  779:		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1);
        -:  780:	    }
        -:  781:	    break;
        -:  782:	default:
    #####:  783:	    DLOOP_Assert(0);
        -:  784:	    break;
        -:  785:    }
        -:  786:    return;
        -:  787:}
        -:  788:/* --END ERROR HANDLING-- */