-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/segment.c
        -:    0:Graph:segment.gcno
        -:    0:Data:segment.gcda
        -:    0:Runs:4382
        -:    0:Programs:1376
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <stdio.h>
        -:    9:#include <stdlib.h>
        -:   10:
        -:   11:#include "./dataloop.h"
        -:   12:
        -:   13:#undef DLOOP_DEBUG_MANIPULATE
        -:   14:
        -:   15:#ifndef PREPEND_PREFIX
        -:   16:#error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"
        -:   17:#endif
        -:   18:
        -:   19:/* Notes on functions:
        -:   20: *
        -:   21: * There are a few different sets of functions here:
        -:   22: * - DLOOP_Segment_manipulate() - uses a "piece" function to perform operations
        -:   23: *   using segments (piece functions defined elsewhere)
        -:   24: * - PREPEND_PREFIX functions - these define the externally visible interface
        -:   25: *   to segment functionality
        -:   26: */
        -:   27:
        -:   28:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp);
        -:   29:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp);
        -:   30:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
        -:   31:				       struct DLOOP_Dataloop *dlp,
        -:   32:				       int branch_flag);
        -:   33:/* Segment_init
        -:   34: *
        -:   35: * buf    - datatype buffer location
        -:   36: * count  - number of instances of the datatype in the buffer
        -:   37: * handle - handle for datatype (could be derived or not)
        -:   38: * segp   - pointer to previously allocated segment structure
        -:   39: * flag   - flag indicating which optimizations are valid
        -:   40: *          should be one of DLOOP_DATALOOP_HOMOGENEOUS, _HETEROGENEOUS,
        -:   41: *          of _ALL_BYTES.
        -:   42: *
        -:   43: * Notes:
        -:   44: * - Assumes that the segment has been allocated.
        -:   45: * - Older MPICH2 code may pass "0" to indicate HETEROGENEOUS or "1" to
        -:   46: *   indicate HETEROGENEOUS.
        -:   47: *
        -:   48: */
        -:   49:int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf,
        -:   50:				 DLOOP_Count count,
        -:   51:				 DLOOP_Handle handle,
        -:   52:				 struct DLOOP_Segment *segp,
        -:   53:				 int flag)
   792207:   54:{
   792207:   55:    DLOOP_Offset elmsize = 0;
   792207:   56:    int i, depth = 0;
   792207:   57:    int branch_detected = 0;
        -:   58:
        -:   59:    struct DLOOP_Dataloop_stackelm *elmp;
   792207:   60:    struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;
        -:   61:
   792207:   62:    DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS ||
        -:   63:		 flag == DLOOP_DATALOOP_HOMOGENEOUS   ||
        -:   64:		 flag == DLOOP_DATALOOP_ALL_BYTES);
        -:   65:
        -:   66:#ifdef DLOOP_DEBUG_MANIPULATE
        -:   67:    DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n",
        -:   68:		    count,
        -:   69:		    buf);
        -:   70:#endif
        -:   71:
   792207:   72:    if (!DLOOP_Handle_hasloop_macro(handle)) {
        -:   73:	/* simplest case; datatype has no loop (basic) */
        -:   74:
    23226:   75:	DLOOP_Handle_get_size_macro(handle, elmsize);
        -:   76:
    23226:   77:	sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
    23226:   78:	sblp->loop_params.c_t.count = count;
    23226:   79:	sblp->loop_params.c_t.dataloop = 0;
    23226:   80:	sblp->el_size = elmsize;
    23226:   81:        DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type);
    23226:   82:	DLOOP_Handle_get_extent_macro(handle, sblp->el_extent);
        -:   83:
    23226:   84:	dlp = sblp;
    23226:   85:	depth = 1;
        -:   86:    }
   768981:   87:    else if (count == 0) {
        -:   88:	/* only use the builtin */
    #####:   89:	sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
    #####:   90:	sblp->loop_params.c_t.count = 0;
    #####:   91:	sblp->loop_params.c_t.dataloop = 0;
    #####:   92:	sblp->el_size = 0;
    #####:   93:	sblp->el_extent = 0;
        -:   94:
    #####:   95:	dlp = sblp;
    #####:   96:	depth = 1;
        -:   97:    }
   768981:   98:    else if (count == 1) {
        -:   99:	/* don't use the builtin */
   598764:  100:	DLOOP_Handle_get_loopptr_macro(handle, dlp, flag);
   598764:  101:	DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
        -:  102:    }
        -:  103:    else {
        -:  104:	/* default: need to use builtin to handle contig; must check
        -:  105:	 * loop depth first
        -:  106:	 */
        -:  107:	DLOOP_Dataloop *oldloop; /* loop from original type, before new count */
        -:  108:	DLOOP_Offset type_size, type_extent;
        -:  109:	DLOOP_Type el_type;
        -:  110:	
   170217:  111:	DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
        -:  112:
   170217:  113:	DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag);
   170217:  114:	DLOOP_Assert(oldloop != NULL);
   170217:  115:	DLOOP_Handle_get_size_macro(handle, type_size);
   170217:  116:	DLOOP_Handle_get_extent_macro(handle, type_extent);
   170217:  117:        DLOOP_Handle_get_basic_type_macro(handle, el_type);
        -:  118:
   170217:  119:	if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG))
        -:  120:	{
     4159:  121:	    if (type_size == type_extent)
        -:  122:	    {
        -:  123:		/* use a contig */
     1996:  124:		sblp->kind                     = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
     1996:  125:		sblp->loop_params.c_t.count    = count * oldloop->loop_params.c_t.count;
     1996:  126:		sblp->loop_params.c_t.dataloop = NULL;
     1996:  127:		sblp->el_size                  = oldloop->el_size;
     1996:  128:		sblp->el_extent                = oldloop->el_extent;
     1996:  129:		sblp->el_type                  = oldloop->el_type;
        -:  130:	    }
        -:  131:	    else
        -:  132:	    {
        -:  133:		/* use a vector, with extent of original type becoming the stride */
     2163:  134:		sblp->kind                      = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;
     2163:  135:		sblp->loop_params.v_t.count     = count;
     2163:  136:		sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
     2163:  137:		sblp->loop_params.v_t.stride    = type_extent;
     2163:  138:		sblp->loop_params.v_t.dataloop  = NULL;
     2163:  139:		sblp->el_size                   = oldloop->el_size;
     2163:  140:		sblp->el_extent                 = oldloop->el_extent;
     2163:  141:		sblp->el_type                   = oldloop->el_type;
        -:  142:	    }
        -:  143:	}
        -:  144:	else
        -:  145:	{
        -:  146:	    /* general case */
   166058:  147:	    sblp->kind                     = DLOOP_KIND_CONTIG;
   166058:  148:	    sblp->loop_params.c_t.count    = count;
   166058:  149:	    sblp->loop_params.c_t.dataloop = oldloop;
   166058:  150:	    sblp->el_size                  = type_size;
   166058:  151:	    sblp->el_extent                = type_extent;
   166058:  152:	    sblp->el_type                  = el_type;
        -:  153:
   166058:  154:	    depth++; /* we're adding to the depth with the builtin */
   166058:  155:            DLOOP_Assert(depth < (DLOOP_MAX_DATATYPE_DEPTH));
        -:  156:	}
        -:  157:
   170217:  158:	dlp = sblp;
        -:  159:    }
        -:  160:
        -:  161:    /* assert instead of return b/c dtype/dloop errorhandling code is inconsistent */
   792207:  162:    DLOOP_Assert(depth < (DLOOP_MAX_DATATYPE_DEPTH));
        -:  163:
        -:  164:    /* initialize the rest of the segment values */
   792207:  165:    segp->handle = handle;
   792207:  166:    segp->ptr = (DLOOP_Buffer) buf;
   792207:  167:    segp->stream_off = 0;
   792207:  168:    segp->cur_sp = 0;
   792207:  169:    segp->valid_sp = 0;
        -:  170:
        -:  171:    /* initialize the first stackelm in its entirety */
   792207:  172:    elmp = &(segp->stackelm[0]);
   792207:  173:    DLOOP_Stackelm_load(elmp, dlp, 0);
   792207:  174:    branch_detected = elmp->may_require_reloading;
        -:  175:
        -:  176:    /* Fill in parameters not set by DLOOP_Stackelm_load */
   792207:  177:    elmp->orig_offset = 0;
   792207:  178:    elmp->curblock    = elmp->orig_block;
        -:  179:    /* DLOOP_Stackelm_offset assumes correct orig_count, curcount, loop_p */
   792207:  180:    elmp->curoffset   = /* elmp->orig_offset + */ DLOOP_Stackelm_offset(elmp);
        -:  181:
   792207:  182:    i = 1;
  1757528:  183:    while(!(dlp->kind & DLOOP_FINAL_MASK))
        -:  184:    {
        -:  185:        /* get pointer to next dataloop */
   173114:  186:        switch (dlp->kind & DLOOP_KIND_MASK)
        -:  187:        {
        -:  188:            case DLOOP_KIND_CONTIG:
        -:  189:            case DLOOP_KIND_VECTOR:
        -:  190:            case DLOOP_KIND_BLOCKINDEXED:
        -:  191:            case DLOOP_KIND_INDEXED:
   173114:  192:                dlp = dlp->loop_params.cm_t.dataloop;
   173114:  193:                break;
        -:  194:            case DLOOP_KIND_STRUCT:
    #####:  195:                dlp = dlp->loop_params.s_t.dataloop_array[0];
    #####:  196:                break;
        -:  197:            default:
        -:  198:                /* --BEGIN ERROR HANDLING-- */
    #####:  199:                DLOOP_Assert(0);
        -:  200:                break;
        -:  201:                /* --END ERROR HANDLING-- */
        -:  202:        }
        -:  203:
   173114:  204:        DLOOP_Assert(i < DLOOP_MAX_DATATYPE_DEPTH);
        -:  205:
        -:  206:	/* loop_p, orig_count, orig_block, and curcount are all filled by us now.
        -:  207:	 * the rest are filled in at processing time.
        -:  208:	 */
   173114:  209:	elmp = &(segp->stackelm[i]);
        -:  210:
   173114:  211:	DLOOP_Stackelm_load(elmp, dlp, branch_detected);
   173114:  212:	branch_detected = elmp->may_require_reloading;
   173114:  213:        i++;
        -:  214:
        -:  215:    }
        -:  216:
   792207:  217:    segp->valid_sp = depth-1;
        -:  218:
   792207:  219:    return 0;
        -:  220:}
        -:  221:
        -:  222:/* Segment_alloc
        -:  223: *
        -:  224: */
        -:  225:struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void)
   730015:  226:{
   730015:  227:    return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment));
        -:  228:}
        -:  229:
        -:  230:/* Segment_free
        -:  231: *
        -:  232: * Input Parameters:
        -:  233: * segp - pointer to segment
        -:  234: */
        -:  235:void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp)
   730015:  236:{
   730015:  237:    DLOOP_Free(segp);
        -:  238:    return;
        -:  239:}
        -:  240:
        -:  241:/* DLOOP_Segment_manipulate - do something to a segment
        -:  242: *
        -:  243: * If you think of all the data to be manipulated (packed, unpacked, whatever),
        -:  244: * as a stream of bytes, it's easier to understand how first and last fit in.
        -:  245: *
        -:  246: * This function does all the work, calling the piecefn passed in when it
        -:  247: * encounters a datatype element which falls into the range of first..(last-1).
        -:  248: *
        -:  249: * piecefn can be NULL, in which case this function doesn't do anything when it
        -:  250: * hits a region.  This is used internally for repositioning within this stream.
        -:  251: *
        -:  252: * last is a byte offset to the byte just past the last byte in the stream
        -:  253: * to operate on.  this makes the calculations all over MUCH cleaner.
        -:  254: *
        -:  255: * stream_off, stream_el_size, first, and last are all working in terms of the
        -:  256: * types and sizes for the stream, which might be different from the local sizes
        -:  257: * (in the heterogeneous case).
        -:  258: *
        -:  259: * This is a horribly long function.  Too bad; it's complicated :)! -- Rob
        -:  260: *
        -:  261: * NOTE: THIS IMPLEMENTATION CANNOT HANDLE STRUCT DATALOOPS.
        -:  262: */
        -:  263:#define DLOOP_SEGMENT_SAVE_LOCAL_VALUES		\
        -:  264:{						\
        -:  265:    segp->cur_sp     = cur_sp;			\
        -:  266:    segp->valid_sp   = valid_sp;		\
        -:  267:    segp->stream_off = stream_off;		\
        -:  268:    *lastp           = stream_off;		\
        -:  269:}
        -:  270:
        -:  271:#define DLOOP_SEGMENT_LOAD_LOCAL_VALUES		\
        -:  272:{						\
        -:  273:    last       = *lastp;			\
        -:  274:    cur_sp     = segp->cur_sp;			\
        -:  275:    valid_sp   = segp->valid_sp;		\
        -:  276:    stream_off = segp->stream_off;		\
        -:  277:    cur_elmp   = &(segp->stackelm[cur_sp]);	\
        -:  278:}
        -:  279:
        -:  280:#define DLOOP_SEGMENT_RESET_VALUES				\
        -:  281:{								\
        -:  282:    segp->stream_off     = 0;					\
        -:  283:    segp->cur_sp         = 0; 					\
        -:  284:    cur_elmp             = &(segp->stackelm[0]);		\
        -:  285:    cur_elmp->curcount   = cur_elmp->orig_count;		\
        -:  286:    cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp);	\
        -:  287:    cur_elmp->curblock   = cur_elmp->orig_block;		\
        -:  288:    cur_elmp->curoffset  = cur_elmp->orig_offset +              \
        -:  289:                           DLOOP_Stackelm_offset(cur_elmp);     \
        -:  290:}
        -:  291:
        -:  292:#define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT			\
        -:  293:{								\
        -:  294:    cur_sp--;							\
        -:  295:    if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp];	\
        -:  296:    else {							\
        -:  297:	DLOOP_SEGMENT_SAVE_LOCAL_VALUES;			\
        -:  298:	return;							\
        -:  299:    }								\
        -:  300:}
        -:  301:
        -:  302:#define DLOOP_SEGMENT_PUSH			\
        -:  303:{						\
        -:  304:    cur_sp++;					\
        -:  305:    cur_elmp = &segp->stackelm[cur_sp];		\
        -:  306:}
        -:  307:
        -:  308:#define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \
        -:  309:(elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)]
        -:  310:
        -:  311:#define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \
        -:  312:(elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)]
        -:  313:
        -:  314:#define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \
        -:  315:(elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)]
        -:  316:
        -:  317:#define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \
        -:  318:(elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)]
        -:  319:
        -:  320:#define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \
        -:  321:(elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)]
        -:  322:
        -:  323:#define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \
        -:  324:(elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)]
        -:  325:
        -:  326:#define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \
        -:  327:(elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)]
        -:  328:
        -:  329:void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp,
        -:  330:					DLOOP_Offset first,
        -:  331:					DLOOP_Offset *lastp,
        -:  332:					int (*contigfn) (DLOOP_Offset *blocks_p,
        -:  333:							 DLOOP_Type el_type,
        -:  334:							 DLOOP_Offset rel_off,
        -:  335:							 DLOOP_Buffer bufp,
        -:  336:							 void *v_paramp),
        -:  337:					int (*vectorfn) (DLOOP_Offset *blocks_p,
        -:  338:							 DLOOP_Count count,
        -:  339:							 DLOOP_Count blklen,
        -:  340:							 DLOOP_Offset stride,
        -:  341:							 DLOOP_Type el_type,
        -:  342:							 DLOOP_Offset rel_off,
        -:  343:							 DLOOP_Buffer bufp,
        -:  344:							 void *v_paramp),
        -:  345:					int (*blkidxfn) (DLOOP_Offset *blocks_p,
        -:  346:							 DLOOP_Count count,
        -:  347:							 DLOOP_Count blklen,
        -:  348:							 DLOOP_Offset *offsetarray,
        -:  349:							 DLOOP_Type el_type,
        -:  350:							 DLOOP_Offset rel_off,
        -:  351:							 DLOOP_Buffer bufp,
        -:  352:							 void *v_paramp),
        -:  353:					int (*indexfn) (DLOOP_Offset *blocks_p,
        -:  354:							DLOOP_Count count,
        -:  355:							DLOOP_Count *blockarray,
        -:  356:							DLOOP_Offset *offsetarray,
        -:  357:							DLOOP_Type el_type,
        -:  358:							DLOOP_Offset rel_off,
        -:  359:							DLOOP_Buffer bufp,
        -:  360:							void *v_paramp),
        -:  361:					DLOOP_Offset (*sizefn) (DLOOP_Type el_type),
        -:  362:					void *pieceparams)
  1148633:  363:{
        -:  364:    /* these four are the "local values": cur_sp, valid_sp, last, stream_off */
        -:  365:    int cur_sp, valid_sp;
        -:  366:    DLOOP_Offset last, stream_off;
        -:  367:
        -:  368:    struct DLOOP_Dataloop_stackelm *cur_elmp;
  1148633:  369:    enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL;
        -:  370:
  1148633:  371:    DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
        -:  372:
  1148633:  373:    if (first == *lastp) {
        -:  374:	/* nothing to do */
    #####:  375:	DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (" MPI_AINT_FMT_DEC_SPEC ")\n", first);
    #####:  376:	return;
        -:  377:    }
        -:  378:
        -:  379:    /* first we ensure that stream_off and first are in the same spot */
  1148633:  380:    if (first != stream_off) {
        -:  381:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  382:	DLOOP_dbg_printf("first=" MPI_AINT_FMT_DEC_SPEC "; stream_off=" MPI_AINT_FMT_DEC_SPEC "; resetting.\n",
        -:  383:			 first, stream_off);
        -:  384:#endif
        -:  385:
   133210:  386:	if (first < stream_off) {
   133210:  387:	    DLOOP_SEGMENT_RESET_VALUES;
   133210:  388:	    stream_off = 0;
        -:  389:	}
        -:  390:
   133210:  391:	if (first != stream_off) {
    #####:  392:	    DLOOP_Offset tmp_last = first;
        -:  393:
        -:  394:	    /* use manipulate function with a NULL piecefn to advance
        -:  395:	     * stream offset
        -:  396:	     */
    #####:  397:	    PREPEND_PREFIX(Segment_manipulate)(segp,
        -:  398:					       stream_off,
        -:  399:					       &tmp_last,
        -:  400:					       NULL, /* contig fn */
        -:  401:					       NULL, /* vector fn */
        -:  402:					       NULL, /* blkidx fn */
        -:  403:					       NULL, /* index fn */
        -:  404:					       sizefn,
        -:  405:                                               NULL);
        -:  406:
        -:  407:	    /* --BEGIN ERROR HANDLING-- */
        -:  408:	    /* verify that we're in the right location */
    #####:  409:	    if (tmp_last != first) DLOOP_Assert(0);
        -:  410:	    /* --END ERROR HANDLING-- */
        -:  411:	}
        -:  412:
   133210:  413:	DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
        -:  414:
        -:  415:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  416:	DLOOP_dbg_printf("done repositioning stream_off; first=" MPI_AINT_FMT_DEC_SPEC ", stream_off=" MPI_AINT_FMT_DEC_SPEC ", last=" MPI_AINT_FMT_DEC_SPEC "\n",
        -:  417:		   first, stream_off, last);
        -:  418:#endif
        -:  419:    }
        -:  420:
        -:  421:    for (;;) {
        -:  422:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  423:#if 0
        -:  424:        DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n",
        -:  425:			 cur_sp, (unsigned) cur_elmp);
        -:  426:#endif
        -:  427:#endif
        -:  428:
 30475129:  429:	if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) {
 16544239:  430:	    int piecefn_indicated_exit = -1;
        -:  431:	    DLOOP_Offset myblocks, local_el_size, stream_el_size;
        -:  432:	    DLOOP_Type el_type;
        -:  433:
        -:  434:	    /* structs are never finals (leaves) */
 16544239:  435:	    DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) !=
        -:  436:		   DLOOP_KIND_STRUCT);
        -:  437:
        -:  438:	    /* pop immediately on zero count */
 16544239:  439:	    if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  440:
        -:  441:	    /* size on this system of the int, double, etc. that is
        -:  442:	     * the elementary type.
        -:  443:	     */
 16544239:  444:	    local_el_size  = cur_elmp->loop_p->el_size;
 16544239:  445:	    el_type        = cur_elmp->loop_p->el_type;
 16544239:  446:	    stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size;
        -:  447:
        -:  448:	    /* calculate number of elem. types to work on and function to use.
        -:  449:	     * default is to use the contig piecefn (if there is one).
        -:  450:	     */
 16544239:  451:	    myblocks = cur_elmp->curblock;
 16544239:  452:	    piecefn_type = (contigfn ? PF_CONTIG : PF_NULL);
        -:  453:
        -:  454:	    /* check for opportunities to use other piecefns */
 16544239:  455:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  456:		case DLOOP_KIND_CONTIG:
        -:  457:		    break;
        -:  458:         	case DLOOP_KIND_BLOCKINDEXED:
        -:  459:		    /* only use blkidx piecefn if at start of blkidx type */
   111856:  460:		    if (blkidxfn &&
        -:  461:			cur_elmp->orig_block == cur_elmp->curblock &&
        -:  462:			cur_elmp->orig_count == cur_elmp->curcount)
        -:  463:		    {
        -:  464:			/* TODO: RELAX CONSTRAINTS */
      552:  465:			myblocks = cur_elmp->curblock * cur_elmp->curcount;
      552:  466:			piecefn_type = PF_BLOCKINDEXED;
        -:  467:		    }
        -:  468:		    break;
        -:  469:		case DLOOP_KIND_INDEXED:
        -:  470:		    /* only use index piecefn if at start of the index type.
        -:  471:		     *   count test checks that we're on first block.
        -:  472:		     *   block test checks that we haven't made progress on first block.
        -:  473:		     */
 15274094:  474:		    if (indexfn &&
        -:  475:			cur_elmp->orig_count == cur_elmp->curcount &&
        -:  476:			cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0))
        -:  477:		    {
        -:  478:			/* TODO: RELAX CONSTRAINT ON COUNT? */
 13049193:  479:			myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks;
 13049193:  480:			piecefn_type = PF_INDEXED;
        -:  481:		    }
        -:  482:		    break;
        -:  483:		case DLOOP_KIND_VECTOR:
        -:  484:		    /* only use the vector piecefn if at the start of a
        -:  485:		     * contiguous block.
        -:  486:		     */
   802866:  487:		    if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock)
        -:  488:		    {
   802866:  489:			myblocks = cur_elmp->curblock * cur_elmp->curcount;
   802866:  490:			piecefn_type = PF_VECTOR;
        -:  491:		    }
        -:  492:		    break;
        -:  493:		default:
        -:  494:		    /* --BEGIN ERROR HANDLING-- */
    #####:  495:		    DLOOP_Assert(0);
        -:  496:		    break;
        -:  497:		    /* --END ERROR HANDLING-- */
        -:  498:	    }
        -:  499:
        -:  500:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  501:	    DLOOP_dbg_printf("\thit leaf; cur_sp=%d, elmp=%x, piece_sz=" MPI_AINT_FMT_DEC_SPEC "\n",
        -:  502:			     cur_sp,
        -:  503:		             (unsigned) cur_elmp, myblocks * local_el_size);
        -:  504:#endif
        -:  505:
        -:  506:	    /* enforce the last parameter if necessary by reducing myblocks */
 16544239:  507:	    if (last != SEGMENT_IGNORE_LAST &&
        -:  508:		(stream_off + (myblocks * stream_el_size) > last))
        -:  509:	    {
    13796:  510:		myblocks = ((last - stream_off) / stream_el_size);
        -:  511:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  512:		DLOOP_dbg_printf("\tpartial block count=" MPI_AINT_FMT_DEC_SPEC " (" MPI_AINT_FMT_DEC_SPEC " bytes)\n",
        -:  513:				 myblocks,
        -:  514:                                 myblocks * stream_el_size);
        -:  515:#endif
    13796:  516:		if (myblocks == 0) {
    11076:  517:		    DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
    11076:  518:		    return;
        -:  519:		}
        -:  520:	    }
        -:  521:
        -:  522:	    /* call piecefn to perform data manipulation */
 16533163:  523:	    switch (piecefn_type) {
        -:  524:		case PF_NULL:
    #####:  525:		    piecefn_indicated_exit = 0;
        -:  526:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  527:		    DLOOP_dbg_printf("\tNULL piecefn for this piece\n");
        -:  528:#endif
    #####:  529:		    break;
        -:  530:		case PF_CONTIG:
  2691056:  531:		    DLOOP_Assert(myblocks <= cur_elmp->curblock);
  2691056:  532:		    piecefn_indicated_exit =
        -:  533:			contigfn(&myblocks,
        -:  534:				 el_type,
        -:  535:				 cur_elmp->curoffset, /* relative to segp->ptr */
        -:  536:				 segp->ptr, /* start of buffer (from segment) */
        -:  537:				 pieceparams);
  2691056:  538:		    break;
        -:  539:		case PF_VECTOR:
   801770:  540:		    piecefn_indicated_exit =
        -:  541:			vectorfn(&myblocks,
        -:  542:				 cur_elmp->curcount,
        -:  543:				 cur_elmp->orig_block,
        -:  544:				 cur_elmp->loop_p->loop_params.v_t.stride,
        -:  545:				 el_type,
        -:  546:				 cur_elmp->curoffset,
        -:  547:				 segp->ptr,
        -:  548:				 pieceparams);
   801770:  549:		    break;
        -:  550:		case PF_BLOCKINDEXED:
      552:  551:		    piecefn_indicated_exit =
        -:  552:			blkidxfn(&myblocks,
        -:  553:				 cur_elmp->curcount,
        -:  554:				 cur_elmp->orig_block,
        -:  555:				 cur_elmp->loop_p->loop_params.bi_t.offset_array,
        -:  556:				 el_type,
        -:  557:				 cur_elmp->orig_offset, /* blkidxfn adds offset */
        -:  558:				 segp->ptr,
        -:  559:				 pieceparams);
      552:  560:		    break;
        -:  561:		case PF_INDEXED:
 13039785:  562:		    piecefn_indicated_exit =
        -:  563:			indexfn(&myblocks,
        -:  564:				cur_elmp->curcount,
        -:  565:				cur_elmp->loop_p->loop_params.i_t.blocksize_array,
        -:  566:				cur_elmp->loop_p->loop_params.i_t.offset_array,
        -:  567:				el_type,
        -:  568:				cur_elmp->orig_offset, /* indexfn adds offset value */
        -:  569:				segp->ptr,
        -:  570:				pieceparams);
        -:  571:		    break;
        -:  572:	    }
        -:  573:
        -:  574:	    /* update local values based on piecefn returns (myblocks and
        -:  575:	     * piecefn_indicated_exit)
        -:  576:	     */
 16533163:  577:	    DLOOP_Assert(piecefn_indicated_exit >= 0);
 16533163:  578:	    DLOOP_Assert(myblocks >= 0);
 16533163:  579:	    stream_off += myblocks * stream_el_size;
        -:  580:
        -:  581:	    /* myblocks of 0 or less than cur_elmp->curblock indicates
        -:  582:	     * that we should stop processing and return.
        -:  583:	     */
 16533163:  584:	    if (myblocks == 0) {
   111484:  585:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
   111484:  586:		return;
        -:  587:	    }
 16421679:  588:	    else if (myblocks < (DLOOP_Offset)(cur_elmp->curblock)) {
     1054:  589:		cur_elmp->curoffset += myblocks * local_el_size;
     1054:  590:		cur_elmp->curblock  -= myblocks;
        -:  591:
     1054:  592:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
     1054:  593:		return;
        -:  594:	    }
        -:  595:	    else /* myblocks >= cur_elmp->curblock */ {
 16420625:  596:		int count_index = 0;
        -:  597:
        -:  598:		/* this assumes we're either *just* processing the last parts
        -:  599:		 * of the current block, or we're processing as many blocks as
        -:  600:		 * we like starting at the beginning of one.
        -:  601:		 */
        -:  602:
 16420625:  603:		switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  604:		    case DLOOP_KIND_INDEXED:
        -:  609:
        -:  611:				cur_elmp->curcount;
        -:  613:				DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp,
        -:  614:								 count_index);
        -:  615:			}
        -:  616:
 15152486:  617:			if (cur_elmp->curcount == 0) {
        -:  618:			    /* don't bother to fill in values; we're popping anyway */
 13228437:  619:			    DLOOP_Assert(myblocks == 0);
 13228437:  620:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  621:			}
        -:  622:			else {
  1924049:  623:			    cur_elmp->orig_block = cur_elmp->curblock;
  1924049:  624:			    cur_elmp->curoffset  = cur_elmp->orig_offset +
        -:  625:				DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp,
        -:  626:							      count_index);
        -:  627:
  1924049:  628:			    cur_elmp->curblock  -= myblocks;
  1924049:  629:			    cur_elmp->curoffset += myblocks * local_el_size;
        -:  630:			}
        -:  631:			break;
        -:  632:		    case DLOOP_KIND_VECTOR:
        -:  633:			/* this math relies on assertions at top of code block */
   801767:  634:			cur_elmp->curcount -= myblocks / (DLOOP_Offset)(cur_elmp->curblock);
   801767:  635:			if (cur_elmp->curcount == 0) {
   557422:  636:			    DLOOP_Assert(myblocks % ((DLOOP_Offset)(cur_elmp->curblock)) == 0);
   557422:  637:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  638:			}
        -:  639:			else {
        -:  640:			    /* this math relies on assertions at top of code
        -:  641:			     * block
        -:  642:			     */
   244345:  643:			    cur_elmp->curblock = cur_elmp->orig_block -
        -:  644:				(myblocks % (DLOOP_Offset)(cur_elmp->curblock));
        -:  645:			    /* new offset = original offset +
        -:  646:			     *              stride * whole blocks +
        -:  647:			     *              leftover bytes
        -:  648:			     */
   244345:  649:			    cur_elmp->curoffset = cur_elmp->orig_offset +
        -:  650:				(((DLOOP_Offset)(cur_elmp->orig_count - cur_elmp->curcount)) *
        -:  651:				 cur_elmp->loop_p->loop_params.v_t.stride) +
        -:  652:				(((DLOOP_Offset)(cur_elmp->orig_block - cur_elmp->curblock)) *
        -:  653:				 local_el_size);
        -:  654:			}
        -:  655:			break;
        -:  656:		    case DLOOP_KIND_CONTIG:
        -:  657:			/* contigs that reach this point have always been
        -:  658:			 * completely processed
        -:  659:			 */
   354592:  660:			DLOOP_Assert(myblocks == (DLOOP_Offset)(cur_elmp->curblock) &&
        -:  661:			       cur_elmp->curcount == 1);
   354592:  662:			DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  663:			break;
        -:  664:		    case DLOOP_KIND_BLOCKINDEXED:
   403818:  665:			while (myblocks > 0 && myblocks >= (DLOOP_Offset)(cur_elmp->curblock))
        -:  666:			{
   292038:  667:			    myblocks -= (DLOOP_Offset)(cur_elmp->curblock);
   292038:  668:			    cur_elmp->curcount--;
   292038:  669:			    DLOOP_Assert(cur_elmp->curcount >= 0);
        -:  670:
   292038:  671:			    count_index = cur_elmp->orig_count -
        -:  672:				cur_elmp->curcount;
   292038:  673:			    cur_elmp->curblock = cur_elmp->orig_block;
        -:  674:			}
   111780:  675:			if (cur_elmp->curcount == 0) {
        -:  676:			    /* popping */
     4666:  677:			    DLOOP_Assert(myblocks == 0);
     4666:  678:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  679:			}
        -:  680:			else {
        -:  681:			    /* cur_elmp->orig_block = cur_elmp->curblock; */
   107114:  682:			    cur_elmp->curoffset = cur_elmp->orig_offset +
        -:  683:				DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
        -:  684:								   count_index);
   107114:  685:			    cur_elmp->curblock  -= myblocks;
   107114:  686:			    cur_elmp->curoffset += myblocks * local_el_size;
        -:  687:			}
        -:  688:			break;
        -:  689:		}
        -:  690:	    }
        -:  691:
 15801577:  692:	    if (piecefn_indicated_exit) {
        -:  693:		/* piece function indicated that we should quit processing */
   243250:  694:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
   243250:  695:		return;
        -:  696:	    }
        -:  697:	} /* end of if leaf */
 13930890:  698:	else if (cur_elmp->curblock == 0) {
        -:  699:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  700:	    DLOOP_dbg_printf("\thit end of block; elmp=%x [%d]\n",
        -:  701:			    (unsigned) cur_elmp, cur_sp);
        -:  702:#endif
   312708:  703:	    cur_elmp->curcount--;
        -:  704:
        -:  705:	    /* new block.  for indexed and struct reset orig_block.
        -:  706:	     * reset curblock for all types
        -:  707:	     */
   312708:  708:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  709:		case DLOOP_KIND_CONTIG:
        -:  710:		case DLOOP_KIND_VECTOR:
        -:  711:		case DLOOP_KIND_BLOCKINDEXED:
        -:  712:		    break;
        -:  713:		case DLOOP_KIND_INDEXED:
       34:  714:		    cur_elmp->orig_block =
        -:  715:			DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
       34:  716:		    break;
        -:  717:		case DLOOP_KIND_STRUCT:
    #####:  718:		    cur_elmp->orig_block =
        -:  719:			DLOOP_STACKELM_STRUCT_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
    #####:  720:		    break;
        -:  721:		default:
        -:  722:		    /* --BEGIN ERROR HANDLING-- */
    #####:  723:		    DLOOP_Assert(0);
        -:  724:		    break;
        -:  725:		    /* --END ERROR HANDLING-- */
        -:  726:	    }
   312708:  727:	    cur_elmp->curblock = cur_elmp->orig_block;
        -:  728:
   312708:  729:	    if (cur_elmp->curcount == 0) {
        -:  730:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  731:		DLOOP_dbg_printf("\talso hit end of count; elmp=%x [%d]\n",
        -:  732:				(unsigned) cur_elmp, cur_sp);
        -:  733:#endif
   185116:  734:		DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  735:	    }
        -:  736:	}
        -:  737:	else /* push the stackelm */ {
        -:  738:	    DLOOP_Dataloop_stackelm *next_elmp;
        -:  739:	    int count_index, block_index;
        -:  740:
 13618182:  741:	    count_index = cur_elmp->orig_count - cur_elmp->curcount;
 13618182:  742:	    block_index = cur_elmp->orig_block - cur_elmp->curblock;
        -:  743:
        -:  744:	    /* reload the next stackelm if necessary */
 13618182:  745:	    next_elmp = &(segp->stackelm[cur_sp + 1]);
 13618182:  746:	    if (cur_elmp->may_require_reloading) {
    #####:  747:		DLOOP_Dataloop *load_dlp = NULL;
    #####:  748:		switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  749:		    case DLOOP_KIND_CONTIG:
        -:  750:		    case DLOOP_KIND_VECTOR:
        -:  751:		    case DLOOP_KIND_BLOCKINDEXED:
        -:  752:		    case DLOOP_KIND_INDEXED:
    #####:  753:			load_dlp = cur_elmp->loop_p->loop_params.cm_t.dataloop;
    #####:  754:			break;
        -:  755:		    case DLOOP_KIND_STRUCT:
    #####:  756:			load_dlp = DLOOP_STACKELM_STRUCT_DATALOOP(cur_elmp,
        -:  757:								  count_index);
    #####:  758:			break;
        -:  759:		    default:
        -:  760:			/* --BEGIN ERROR HANDLING-- */
    #####:  761:			DLOOP_Assert(0);
        -:  762:			break;
        -:  763:			/* --END ERROR HANDLING-- */
        -:  764:		}
        -:  765:
        -:  766:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  767:		DLOOP_dbg_printf("\tloading dlp=%x, elmp=%x [%d]\n",
        -:  768:				 (unsigned) load_dlp,
        -:  769:				 (unsigned) next_elmp,
        -:  770:				 cur_sp+1);
        -:  771:#endif
        -:  772:
    #####:  773:		DLOOP_Stackelm_load(next_elmp,
        -:  774:				    load_dlp,
        -:  775:				    1);
        -:  776:	    }
        -:  777:
        -:  778:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  779:	    DLOOP_dbg_printf("\tpushing type, elmp=%x [%d], count=%d, block=%d\n",
        -:  780:			    (unsigned) cur_elmp, cur_sp, count_index,
        -:  781:			     block_index);
        -:  782:#endif
        -:  783:	    /* set orig_offset and all cur values for new stackelm.
        -:  784:	     * this is done in two steps: first set orig_offset based on
        -:  785:	     * current stackelm, then set cur values based on new stackelm.
        -:  786:	     */
 13618182:  787:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  788:		case DLOOP_KIND_CONTIG:
 13459244:  789:		    next_elmp->orig_offset = cur_elmp->curoffset +
        -:  790:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
 13459244:  791:		    break;
        -:  792:		case DLOOP_KIND_VECTOR:
        -:  793:		    /* note: stride is in bytes */
   157324:  794:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  795:			(DLOOP_Offset) count_index * cur_elmp->loop_p->loop_params.v_t.stride +
        -:  796:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
   157324:  797:		    break;
        -:  798:		case DLOOP_KIND_BLOCKINDEXED:
     1443:  799:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  800:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
        -:  801:			DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
        -:  802:							   count_index);
     1443:  803:		    break;
        -:  804:		case DLOOP_KIND_INDEXED:
      171:  805:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  806:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
        -:  807:			DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, count_index);
      171:  808:		    break;
        -:  809:		case DLOOP_KIND_STRUCT:
    #####:  810:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  811:			(DLOOP_Offset) block_index * DLOOP_STACKELM_STRUCT_EL_EXTENT(cur_elmp, count_index) +
        -:  812:			DLOOP_STACKELM_STRUCT_OFFSET(cur_elmp, count_index);
    #####:  813:		    break;
        -:  814:		default:
        -:  815:		    /* --BEGIN ERROR HANDLING-- */
    #####:  816:		    DLOOP_Assert(0);
        -:  817:		    break;
        -:  818:		    /* --END ERROR HANDLING-- */
        -:  819:	    }
        -:  820:
        -:  821:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  822:	    DLOOP_dbg_printf("\tstep 1: next orig_offset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n",
        -:  823:			     next_elmp->orig_offset,
        -:  824:			     next_elmp->orig_offset);
        -:  825:#endif
        -:  826:
 13618182:  827:	    switch (next_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  828:		case DLOOP_KIND_CONTIG:
        -:  829:		case DLOOP_KIND_VECTOR:
   333774:  830:		    next_elmp->curcount  = next_elmp->orig_count;
   333774:  831:		    next_elmp->curblock  = next_elmp->orig_block;
   333774:  832:		    next_elmp->curoffset = next_elmp->orig_offset;
   333774:  833:		    break;
        -:  834:		case DLOOP_KIND_BLOCKINDEXED:
      860:  835:		    next_elmp->curcount  = next_elmp->orig_count;
      860:  836:		    next_elmp->curblock  = next_elmp->orig_block;
      860:  837:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  838:			DLOOP_STACKELM_BLOCKINDEXED_OFFSET(next_elmp, 0);
      860:  839:		    break;
        -:  840:		case DLOOP_KIND_INDEXED:
 13283548:  841:		    next_elmp->curcount  = next_elmp->orig_count;
 13283548:  842:		    next_elmp->curblock  =
        -:  843:			DLOOP_STACKELM_INDEXED_BLOCKSIZE(next_elmp, 0);
 13283548:  844:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  845:			DLOOP_STACKELM_INDEXED_OFFSET(next_elmp, 0);
 13283548:  846:		    break;
        -:  847:		case DLOOP_KIND_STRUCT:
    #####:  848:		    next_elmp->curcount = next_elmp->orig_count;
    #####:  849:		    next_elmp->curblock =
        -:  850:			DLOOP_STACKELM_STRUCT_BLOCKSIZE(next_elmp, 0);
    #####:  851:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  852:			DLOOP_STACKELM_STRUCT_OFFSET(next_elmp, 0);
    #####:  853:		    break;
        -:  854:		default:
        -:  855:		    /* --BEGIN ERROR HANDLING-- */
    #####:  856:		    DLOOP_Assert(0);
        -:  857:		    break;
        -:  858:		    /* --END ERROR HANDLING-- */
        -:  859:	    }
        -:  860:
        -:  861:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  862:	    DLOOP_dbg_printf("\tstep 2: next curoffset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n",
        -:  863:			     next_elmp->curoffset,
        -:  864:			     next_elmp->curoffset);
        -:  865:#endif
        -:  866:
 13618182:  867:	    cur_elmp->curblock--;
 13618182:  868:	    DLOOP_SEGMENT_PUSH;
        -:  869:	} /* end of else push the stackelm */
        -:  870:    } /* end of for (;;) */
        -:  871:
        -:  872:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  873:    DLOOP_dbg_printf("hit end of datatype\n");
        -:  874:#endif
        -:  875:
        -:  876:    DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
        -:  877:    return;
        -:  878:}
        -:  879:
        -:  880:/* DLOOP_Stackelm_blocksize - returns block size for stackelm based on current
        -:  881: * count in stackelm.
        -:  882: *
        -:  883: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
        -:  884: * before this is called!
        -:  885: *
        -:  886: */
        -:  887:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp)
  1098531:  888:{
  1098531:  889:    struct DLOOP_Dataloop *dlp = elmp->loop_p;
        -:  890:
  1098531:  891:    switch(dlp->kind & DLOOP_KIND_MASK) {
        -:  892:	case DLOOP_KIND_CONTIG:
        -:  893:	    /* NOTE: we're dropping the count into the
        -:  894:	     * blksize field for contigs, as described
        -:  895:	     * in the init call.
        -:  896:	     */
   294596:  897:	    return dlp->loop_params.c_t.count;
        -:  898:	    break;
        -:  899:	case DLOOP_KIND_VECTOR:
   617360:  900:	    return dlp->loop_params.v_t.blocksize;
        -:  901:	    break;
        -:  902:	case DLOOP_KIND_BLOCKINDEXED:
     5337:  903:	    return dlp->loop_params.bi_t.blocksize;
        -:  904:	    break;
        -:  905:	case DLOOP_KIND_INDEXED:
   181238:  906:	    return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount];
        -:  907:	    break;
        -:  908:	case DLOOP_KIND_STRUCT:
    #####:  909:	    return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount];
        -:  910:	    break;
        -:  911:	default:
        -:  912:	    /* --BEGIN ERROR HANDLING-- */
    #####:  913:	    DLOOP_Assert(0);
        -:  914:	    break;
        -:  915:	    /* --END ERROR HANDLING-- */
        -:  916:    }
    #####:  917:    return -1;
        -:  918:}
        -:  919:
        -:  920:/* DLOOP_Stackelm_offset - returns starting offset (displacement) for stackelm
        -:  921: * based on current count in stackelm.
        -:  922: *
        -:  923: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
        -:  924: * before this is called!
        -:  925: *
        -:  926: * also, this really is only good at init time for vectors and contigs
        -:  927: * (all the time for indexed) at the moment.
        -:  928: *
        -:  929: */
        -:  930:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp)
   925417:  931:{
   925417:  932:    struct DLOOP_Dataloop *dlp = elmp->loop_p;
        -:  933:
   925417:  934:    switch(dlp->kind & DLOOP_KIND_MASK) {
        -:  935:	case DLOOP_KIND_VECTOR:
        -:  936:	case DLOOP_KIND_CONTIG:
   904930:  937:	    return 0;
        -:  938:	    break;
        -:  939:	case DLOOP_KIND_BLOCKINDEXED:
     5277:  940:	    return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  941:	    break;
        -:  942:	case DLOOP_KIND_INDEXED:
    15210:  943:	    return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  944:	    break;
        -:  945:	case DLOOP_KIND_STRUCT:
    #####:  946:	    return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  947:	    break;
        -:  948:	default:
        -:  949:	    /* --BEGIN ERROR HANDLING-- */
    #####:  950:	    DLOOP_Assert(0);
        -:  951:	    break;
        -:  952:	    /* --END ERROR HANDLING-- */
        -:  953:    }
    #####:  954:    return -1;
        -:  955:}
        -:  956:
        -:  957:/* DLOOP_Stackelm_load
        -:  958: * loop_p, orig_count, orig_block, and curcount are all filled by us now.
        -:  959: * the rest are filled in at processing time.
        -:  960: */
        -:  961:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
        -:  962:				       struct DLOOP_Dataloop *dlp,
        -:  963:				       int branch_flag)
   965321:  964:{
   965321:  965:    elmp->loop_p = dlp;
        -:  966:
   965321:  967:    if ((dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) {
   234533:  968:	elmp->orig_count = 1; /* put in blocksize instead */
        -:  969:    }
        -:  970:    else {
   730788:  971:	elmp->orig_count = dlp->loop_params.count;
        -:  972:    }
        -:  973:
   965321:  974:    if (branch_flag || (dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
        -:  975:    {
    #####:  976:	elmp->may_require_reloading = 1;
        -:  977:    }
        -:  978:    else {
   965321:  979:	elmp->may_require_reloading = 0;
        -:  980:    }
        -:  981:
        -:  982:    /* required by DLOOP_Stackelm_blocksize */
   965321:  983:    elmp->curcount = elmp->orig_count;
        -:  984:
   965321:  985:    elmp->orig_block = DLOOP_Stackelm_blocksize(elmp);
        -:  986:    /* TODO: GO AHEAD AND FILL IN CURBLOCK? */
   965321:  987:}
        -:  988:
        -:  989:/*
        -:  990: * Local variables:
        -:  991: * c-indent-tabs-mode: nil
        -:  992: * End:
        -:  993: */