-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/segment_ops.c
        -:    0:Graph:segment_ops.gcno
        -:    0:Data:segment_ops.gcda
        -:    0:Runs:3459
        -:    0:Programs:899
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <stdio.h>
        -:    9:#include <stdlib.h>
        -:   10:#include <string.h>
        -:   11:#include <sys/types.h>
        -:   12:
        -:   13:#include "dataloop.h"
        -:   14:#include "veccpy.h"
        -:   15:
        -:   16:int PREPEND_PREFIX(Segment_contig_m2m)(DLOOP_Offset *blocks_p,
        -:   17:				       DLOOP_Type el_type,
        -:   18:				       DLOOP_Offset rel_off,
        -:   19:				       void *bufp ATTRIBUTE((unused)),
        -:   20:				       void *v_paramp)
   329989:   21:{
        -:   22:    DLOOP_Offset el_size; /* DLOOP_Count? */
        -:   23:    DLOOP_Offset size;
   329989:   24:    struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp;
        -:   25:
   329989:   26:    DLOOP_Handle_get_size_macro(el_type, el_size);
   329989:   27:    size = *blocks_p * el_size;
        -:   28:
        -:   29:#ifdef MPID_SU_VERBOSE
        -:   30:    dbg_printf("\t[contig unpack: do=" MPI_AINT_FMT_DEC_SPEC ", dp=%x, bp=%x, sz=" MPI_AINT_FMT_DEC_SPEC ", blksz=" MPI_AINT_FMT_DEC_SPEC "]\n",
        -:   31:	       rel_off,
        -:   32:	       (unsigned) bufp,
        -:   33:	       (unsigned) paramp->u.unpack.unpack_buffer,
        -:   34:	       el_size,
        -:   35:	       *blocks_p);
        -:   36:#endif
        -:   37:
   329989:   38:    if (paramp->direction == DLOOP_M2M_TO_USERBUF) {
        -:   39:	/* Ensure that pointer increment fits in a pointer */
        -:   40:	/* userbuf is a pointer (not a displacement) since it is being
        -:   41:	 * used on a memcpy */
        -:   42:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->userbuf)) + rel_off);
   107595:   43:	DLOOP_Memcpy((char *) paramp->userbuf + rel_off, paramp->streambuf, size);
        -:   44:    }
        -:   45:    else {
        -:   46:	/* Ensure that pointer increment fits in a pointer */
        -:   47:	/* userbuf is a pointer (not a displacement) since it is being used on a memcpy */
        -:   48:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->userbuf)) + rel_off);
   222394:   49:	DLOOP_Memcpy(paramp->streambuf, (char *) paramp->userbuf + rel_off, size);
        -:   50:    }
        -:   51:    /* Ensure that pointer increment fits in a pointer */
        -:   52:    /* streambuf is a pointer (not a displacement) since it was used on a memcpy */
        -:   53:    MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) + size);
   329989:   54:    paramp->streambuf += size;
   329989:   55:    return 0;
        -:   56:}
        -:   57:
        -:   58:/* Segment_vector_m2m
        -:   59: *
        -:   60: * Note: this combines both packing and unpacking functionality.
        -:   61: *
        -:   62: * Note: this is only called when the starting position is at the beginning
        -:   63: * of a whole block in a vector type.
        -:   64: */
        -:   65:int PREPEND_PREFIX(Segment_vector_m2m)(DLOOP_Offset *blocks_p,
        -:   66:				       DLOOP_Count count ATTRIBUTE((unused)),
        -:   67:				       DLOOP_Count blksz,
        -:   68:				       DLOOP_Offset stride,
        -:   69:				       DLOOP_Type el_type,
        -:   70:				       DLOOP_Offset rel_off, /* offset into buffer */
        -:   71:				       void *bufp ATTRIBUTE((unused)),
        -:   72:				       void *v_paramp)
   271511:   73:{
        -:   74:    DLOOP_Count i, blocks_left, whole_count;
        -:   75:    DLOOP_Offset el_size;
   271511:   76:    struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp;
        -:   77:    char *cbufp;
        -:   78:
        -:   79:    /* Ensure that pointer increment fits in a pointer */
        -:   80:    /* userbuf is a pointer (not a displacement) since it is being used for a memory copy */
        -:   81:    MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->userbuf)) + rel_off);
   271511:   82:    cbufp = (char*) paramp->userbuf + rel_off;
   271511:   83:    DLOOP_Handle_get_size_macro(el_type, el_size);
        -:   84:
   271511:   85:    whole_count = (blksz > 0) ? (*blocks_p / (DLOOP_Offset) blksz) : 0;
   271511:   86:    blocks_left = (blksz > 0) ? (*blocks_p % (DLOOP_Offset) blksz) : 0;
        -:   87:
   271511:   88:    if (paramp->direction == DLOOP_M2M_TO_USERBUF) {
    85673:   89:	if (el_size == 8
        -:   90:	    MPIR_ALIGN8_TEST(paramp->streambuf,cbufp))
        -:   91:	{
       22:   92:	    MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, stride,
        -:   93:			      int64_t, blksz, whole_count);
       22:   94:	    MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0,
        -:   95:			      int64_t, blocks_left, 1);
        -:   96:	}
    85651:   97:	else if (el_size == 4
        -:   98:		 MPIR_ALIGN4_TEST(paramp->streambuf,cbufp))
        -:   99:	{
    85456:  100:	    MPIDI_COPY_TO_VEC((paramp->streambuf), cbufp, stride,
        -:  101:			      int32_t, blksz, whole_count);
    85456:  102:	    MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0,
        -:  103:			      int32_t, blocks_left, 1);
        -:  104:	}
      195:  105:	else if (el_size == 2) {
       24:  106:	    MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, stride,
        -:  107:			      int16_t, blksz, whole_count);
       24:  108:	    MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0,
        -:  109:			      int16_t, blocks_left, 1);
        -:  110:	}
        -:  111:	else {
    20583:  112:	    for (i=0; i < whole_count; i++) {
    20412:  113:		DLOOP_Memcpy(cbufp, paramp->streambuf, ((DLOOP_Offset) blksz) * el_size);
        -:  114:		/* Ensure that pointer increment fits in a pointer */
        -:  115:		/* streambuf is a pointer (not a displacement) since it is being used for a memory copy */
        -:  116:		MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  117:						 ((DLOOP_Offset) blksz) * el_size);
    20412:  118:		paramp->streambuf += ((DLOOP_Offset) blksz) * el_size;
        -:  119:
        -:  120:		MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (cbufp)) + stride);
    20412:  121:		cbufp += stride;
        -:  122:	    }
      171:  123:	    if (blocks_left) {
    #####:  124:		DLOOP_Memcpy(cbufp, paramp->streambuf, ((DLOOP_Offset) blocks_left) * el_size);
        -:  125:		/* Ensure that pointer increment fits in a pointer */
        -:  126:		/* streambuf is a pointer (not a displacement) since
        -:  127:		 * it is being used for a memory copy */
        -:  128:		MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  129:						 ((DLOOP_Offset) blocks_left) * el_size);
    #####:  130:		paramp->streambuf += ((DLOOP_Offset) blocks_left) * el_size;
        -:  131:	    }
        -:  132:	}
        -:  133:    }
        -:  134:    else /* M2M_FROM_USERBUF */ {
   185838:  135:	if (el_size == 8
        -:  136:	    MPIR_ALIGN8_TEST(cbufp,paramp->streambuf))
        -:  137:	{
    11834:  138:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride,
        -:  139:				int64_t, blksz, whole_count);
    11834:  140:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0,
        -:  141:				int64_t, blocks_left, 1);
        -:  142:	}
   174004:  143:	else if (el_size == 4
        -:  144:		 MPIR_ALIGN4_TEST(cbufp,paramp->streambuf))
        -:  145:	{
   173848:  146:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride,
        -:  147:				int32_t, blksz, whole_count);
   173848:  148:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0,
        -:  149:				int32_t, blocks_left, 1);
        -:  150:	}
      156:  151:	else if (el_size == 2) {
       24:  152:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride,
        -:  153:				int16_t, blksz, whole_count);
       24:  154:	    MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0,
        -:  155:				int16_t, blocks_left, 1);
        -:  156:	}
        -:  157:	else {
    19128:  158:	    for (i=0; i < whole_count; i++) {
    18996:  159:		DLOOP_Memcpy(paramp->streambuf, cbufp, (DLOOP_Offset) blksz * el_size);
        -:  160:		/* Ensure that pointer increment fits in a pointer */
        -:  161:		/* streambuf is a pointer (not a displacement) since
        -:  162:		 * it is being used for a memory copy */
        -:  163:		MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  164:						 (DLOOP_Offset) blksz * el_size);
    18996:  165:		paramp->streambuf += (DLOOP_Offset) blksz * el_size;
    18996:  166:		cbufp += stride;
        -:  167:	    }
      132:  168:	    if (blocks_left) {
    #####:  169:		DLOOP_Memcpy(paramp->streambuf, cbufp, (DLOOP_Offset) blocks_left * el_size);
        -:  170:		/* Ensure that pointer increment fits in a pointer */
        -:  171:		/* streambuf is a pointer (not a displacement) since
        -:  172:		 * it is being used for a memory copy */
        -:  173:		MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  174:						 (DLOOP_Offset) blocks_left * el_size);
    #####:  175:		paramp->streambuf += (DLOOP_Offset) blocks_left * el_size;
        -:  176:	    }
        -:  177:	}
        -:  178:    }
        -:  179:
   271511:  180:    return 0;
        -:  181:}
        -:  182:
        -:  183:/* MPID_Segment_blkidx_m2m
        -:  184: */
        -:  185:int PREPEND_PREFIX(Segment_blkidx_m2m)(DLOOP_Offset *blocks_p,
        -:  186:				       DLOOP_Count count,
        -:  187:				       DLOOP_Count blocklen,
        -:  188:				       DLOOP_Offset *offsetarray,
        -:  189:				       DLOOP_Type el_type,
        -:  190:				       DLOOP_Offset rel_off,
        -:  191:				       void *bufp ATTRIBUTE((unused)),
        -:  192:				       void *v_paramp)
      566:  193:{
      566:  194:    DLOOP_Count curblock = 0;
        -:  195:    DLOOP_Offset el_size;
      566:  196:    DLOOP_Offset blocks_left = *blocks_p;
        -:  197:    char *cbufp;
      566:  198:    struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp;
        -:  199:
      566:  200:    DLOOP_Handle_get_size_macro(el_type, el_size);
        -:  201:
   181328:  202:    while (blocks_left) {
        -:  203:	char *src, *dest;
        -:  204:
   180762:  205:	DLOOP_Assert(curblock < count);
        -:  206:
        -:  207:	/* Ensure that pointer increment fits in a pointer */
        -:  208:	/* userbuf is a pointer (not a displacement) since it is being
        -:  209:	 * used for a memory copy */
        -:  210:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->userbuf)) +
        -:  211:					 rel_off + offsetarray[curblock]);
   180762:  212:	cbufp = (char*) paramp->userbuf + rel_off + offsetarray[curblock];
        -:  213:
   180762:  214:	if (blocklen > blocks_left) blocklen = blocks_left;
        -:  215:
   180762:  216:	if (paramp->direction == DLOOP_M2M_TO_USERBUF) {
   140636:  217:	    src  = paramp->streambuf;
   140636:  218:	    dest = cbufp;
        -:  219:	}
        -:  220:	else {
    40126:  221:	    src  = cbufp;
    40126:  222:	    dest = paramp->streambuf;
        -:  223:	}
        -:  224:
        -:  225:	/* note: macro modifies dest buffer ptr, so we must reset */
   180762:  226:	if (el_size == 8
        -:  227:	    MPIR_ALIGN8_TEST(src, dest))
        -:  228:	{
    #####:  229:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int64_t, blocklen, 1);
        -:  230:	}
   180762:  231:	else if (el_size == 4
        -:  232:		 MPIR_ALIGN4_TEST(src,dest))
        -:  233:	{
     1872:  234:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int32_t, blocklen, 1);
        -:  235:	}
   178890:  236:	else if (el_size == 2) {
    #####:  237:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int16_t, blocklen, 1);
        -:  238:	}
        -:  239:	else {
   178890:  240:	    DLOOP_Memcpy(dest, src, (DLOOP_Offset) blocklen * el_size);
        -:  241:	}
        -:  242:
        -:  243:	/* Ensure that pointer increment fits in a pointer */
        -:  244:	/* streambuf is a pointer (not a displacement) since it is
        -:  245:	 * being used for a memory copy */
        -:  246:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  247:					 (DLOOP_Offset) blocklen * el_size);
   180762:  248:	paramp->streambuf += (DLOOP_Offset) blocklen * el_size;
   180762:  249:	blocks_left -= blocklen;
   180762:  250:	curblock++;
        -:  251:    }
        -:  252:
      566:  253:    return 0;
        -:  254:}
        -:  255:
        -:  256:/* MPID_Segment_index_m2m
        -:  257: */
        -:  258:int PREPEND_PREFIX(Segment_index_m2m)(DLOOP_Offset *blocks_p,
        -:  259:				      DLOOP_Count count,
        -:  260:				      DLOOP_Count *blockarray,
        -:  261:				      DLOOP_Offset *offsetarray,
        -:  262:				      DLOOP_Type el_type,
        -:  263:				      DLOOP_Offset rel_off,
        -:  264:				      void *bufp ATTRIBUTE((unused)),
        -:  265:				      void *v_paramp)
  4656524:  266:{
  4656524:  267:    int curblock = 0;
        -:  268:    DLOOP_Offset el_size;
  4656524:  269:    DLOOP_Offset cur_block_sz, blocks_left = *blocks_p;
        -:  270:    char *cbufp;
  4656524:  271:    struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp;
        -:  272:
  4656524:  273:    DLOOP_Handle_get_size_macro(el_type, el_size);
        -:  274:
 20240803:  275:    while (blocks_left) {
        -:  276:	char *src, *dest;
        -:  277:
 15584279:  278:	DLOOP_Assert(curblock < count);
 15584279:  279:	cur_block_sz = blockarray[curblock];
        -:  280:
        -:  281:	/* Ensure that pointer increment fits in a pointer */
        -:  282:	/* userbuf is a pointer (not a displacement) since it is being
        -:  283:	 * used for a memory copy */
        -:  284:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->userbuf)) +
        -:  285:					 rel_off + offsetarray[curblock]);
 15584279:  286:	cbufp = (char*) paramp->userbuf + rel_off + offsetarray[curblock];
        -:  287:
 15584279:  288:	if (cur_block_sz > blocks_left) cur_block_sz = blocks_left;
        -:  289:
 15584279:  290:	if (paramp->direction == DLOOP_M2M_TO_USERBUF) {
  7725550:  291:	    src  = paramp->streambuf;
  7725550:  292:	    dest = cbufp;
        -:  293:	}
        -:  294:	else {
  7858729:  295:	    src  = cbufp;
  7858729:  296:	    dest = paramp->streambuf;
        -:  297:	}
        -:  298:
        -:  299:	/* note: macro modifies dest buffer ptr, so we must reset */
 15584279:  300:	if (el_size == 8
        -:  301:	    MPIR_ALIGN8_TEST(src, dest))
        -:  302:	{
    #####:  303:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int64_t, cur_block_sz, 1);
        -:  304:	}
 15584279:  305:	else if (el_size == 4
        -:  306:		 MPIR_ALIGN4_TEST(src,dest))
        -:  307:	{
     8385:  308:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int32_t, cur_block_sz, 1);
        -:  309:	}
 15575894:  310:	else if (el_size == 2) {
    #####:  311:	    MPIDI_COPY_FROM_VEC(src, dest, 0, int16_t, cur_block_sz, 1);
        -:  312:	}
        -:  313:	else {
 15575894:  314:	    DLOOP_Memcpy(dest, src, cur_block_sz * el_size);
        -:  315:	}
        -:  316:
        -:  317:	/* Ensure that pointer increment fits in a pointer */
        -:  318:	/* streambuf is a pointer (not a displacement) since it is
        -:  319:	 * being used for a memory copy */
        -:  320:	MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT (paramp->streambuf)) +
        -:  321:					 cur_block_sz * el_size);
 15584279:  322:	paramp->streambuf += cur_block_sz * el_size;
 15584279:  323:	blocks_left -= cur_block_sz;
 15584279:  324:	curblock++;
        -:  325:    }
        -:  326:
  4656524:  327:    return 0;
        -:  328:}
        -:  329:
        -:  330:void PREPEND_PREFIX(Segment_pack)(DLOOP_Segment *segp,
        -:  331:				  DLOOP_Offset   first,
        -:  332:				  DLOOP_Offset  *lastp,
        -:  333:				  void *streambuf)
   197955:  334:{
        -:  335:    struct PREPEND_PREFIX(m2m_params) params;
        -:  336:
        -:  337:    /* experimenting with discarding buf value in the segment, keeping in
        -:  338:     * per-use structure instead. would require moving the parameters around a
        -:  339:     * bit.
        -:  340:     */
   197955:  341:    params.userbuf   = segp->ptr;
   197955:  342:    params.streambuf = streambuf;
   197955:  343:    params.direction = DLOOP_M2M_FROM_USERBUF;
        -:  344:
   197955:  345:    PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp,
        -:  346:				       PREPEND_PREFIX(Segment_contig_m2m),
        -:  347:				       PREPEND_PREFIX(Segment_vector_m2m),
        -:  348:				       PREPEND_PREFIX(Segment_blkidx_m2m),
        -:  349:				       PREPEND_PREFIX(Segment_index_m2m),
        -:  350:				       NULL, /* size fn */
        -:  351:				       &params);
        -:  352:    return;
        -:  353:}
        -:  354:
        -:  355:void PREPEND_PREFIX(Segment_unpack)(DLOOP_Segment *segp,
        -:  356:				    DLOOP_Offset   first,
        -:  357:				    DLOOP_Offset  *lastp,
        -:  358:				    void *streambuf)
   107383:  359:{
        -:  360:    struct PREPEND_PREFIX(m2m_params) params;
        -:  361:
        -:  362:    /* experimenting with discarding buf value in the segment, keeping in
        -:  363:     * per-use structure instead. would require moving the parameters around a
        -:  364:     * bit.
        -:  365:     */
   107383:  366:    params.userbuf   = segp->ptr;
   107383:  367:    params.streambuf = streambuf;
   107383:  368:    params.direction = DLOOP_M2M_TO_USERBUF;
        -:  369:
   107383:  370:    PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp,
        -:  371:				       PREPEND_PREFIX(Segment_contig_m2m),
        -:  372:				       PREPEND_PREFIX(Segment_vector_m2m),
        -:  373:				       PREPEND_PREFIX(Segment_blkidx_m2m),
        -:  374:				       PREPEND_PREFIX(Segment_index_m2m),
        -:  375:				       NULL, /* size fn */
        -:  376:				       &params);
        -:  377:    return;
        -:  378:}
        -:  379:
        -:  380:struct PREPEND_PREFIX(contig_blocks_params) {
        -:  381:    DLOOP_Count  count;
        -:  382:    DLOOP_Offset last_loc;
        -:  383:};
        -:  384:
        -:  385:/* MPID_Segment_contig_count_block
        -:  386: *
        -:  387: * Note: because bufp is just an offset, we can ignore it in our
        -:  388: *       calculations of # of contig regions.
        -:  389: */
        -:  390:static int DLOOP_Segment_contig_count_block(DLOOP_Offset *blocks_p,
        -:  391:					    DLOOP_Type el_type,
        -:  392:					    DLOOP_Offset rel_off,
        -:  393:					    DLOOP_Buffer bufp ATTRIBUTE((unused)),
        -:  394:					    void *v_paramp)
      691:  395:{
        -:  396:    DLOOP_Offset size, el_size;
      691:  397:    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;
        -:  398:
      691:  399:    DLOOP_Assert(*blocks_p > 0);
        -:  400:
      691:  401:    DLOOP_Handle_get_size_macro(el_type, el_size);
      691:  402:    size = *blocks_p * el_size;
        -:  403:
        -:  404:#ifdef MPID_SP_VERBOSE
        -:  405:    MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = " MPI_AINT_FMT_DEC_SPEC "\n",
        -:  406:		    (int) paramp->count,
        -:  407:		    (int) ((char *) bufp + rel_off),
        -:  408:		    paramp->last_loc);
        -:  409:#endif
        -:  410:
      691:  411:    if (paramp->count > 0 && rel_off == paramp->last_loc)
        -:  412:    {
        -:  413:	/* this region is adjacent to the last */
    #####:  414:	paramp->last_loc += size;
        -:  415:    }
        -:  416:    else {
        -:  417:	/* new region */
      691:  418:	paramp->last_loc = rel_off + size;
      691:  419:	paramp->count++;
        -:  420:    }
      691:  421:    return 0;
        -:  422:}
        -:  423:
        -:  424:/* DLOOP_Segment_vector_count_block
        -:  425: *
        -:  426: * Input Parameters:
        -:  427: * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces)
        -:  428: * count    - # of noncontiguous regions
        -:  429: * blksz    - size of each noncontiguous region
        -:  430: * stride   - distance in bytes from start of one region to start of next
        -:  431: * el_type - elemental type (e.g. MPI_INT)
        -:  432: * ...
        -:  433: *
        -:  434: * Note: this is only called when the starting position is at the beginning
        -:  435: * of a whole block in a vector type.
        -:  436: */
        -:  437:static int DLOOP_Segment_vector_count_block(DLOOP_Offset *blocks_p,
        -:  438:					    DLOOP_Count count,
        -:  439:					    DLOOP_Count blksz,
        -:  440:					    DLOOP_Offset stride,
        -:  441:					    DLOOP_Type el_type,
        -:  442:					    DLOOP_Offset rel_off, /* offset into buffer */
        -:  443:					    void *bufp ATTRIBUTE((unused)),
        -:  444:					    void *v_paramp)
        1:  445:{
        -:  446:    DLOOP_Count new_blk_count;
        -:  447:    DLOOP_Offset size, el_size;
        1:  448:    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;
        -:  449:
        1:  450:    DLOOP_Assert(count > 0 && blksz > 0 && *blocks_p > 0);
        -:  451:
        1:  452:    DLOOP_Handle_get_size_macro(el_type, el_size);
        1:  453:    size = el_size * blksz;
        1:  454:    new_blk_count = count;
        -:  455:
        -:  456:    /* if size == stride, then blocks are adjacent to one another */
        1:  457:    if (size == stride) new_blk_count = 1;
        -:  458:
        1:  459:    if (paramp->count > 0 && rel_off == paramp->last_loc)
        -:  460:    {
        -:  461:	/* first block sits at end of last block */
    #####:  462:	new_blk_count--;
        -:  463:    }
        -:  464:
        1:  465:    paramp->last_loc = rel_off + ((DLOOP_Offset)(count-1)) * stride + size;
        1:  466:    paramp->count += new_blk_count;
        1:  467:    return 0;
        -:  468:}
        -:  469:
        -:  470:/* DLOOP_Segment_blkidx_count_block
        -:  471: *
        -:  472: * Note: this is only called when the starting position is at the
        -:  473: * beginning of a whole block in a blockindexed type.
        -:  474: */
        -:  475:static int DLOOP_Segment_blkidx_count_block(DLOOP_Offset *blocks_p,
        -:  476:					    DLOOP_Count count,
        -:  477:					    DLOOP_Count blksz,
        -:  478:					    DLOOP_Offset *offsetarray,
        -:  479:					    DLOOP_Type el_type,
        -:  480:					    DLOOP_Offset rel_off,
        -:  481:					    void *bufp ATTRIBUTE((unused)),
        -:  482:					    void *v_paramp)
    #####:  483:{
        -:  484:    DLOOP_Count i, new_blk_count;
        -:  485:    DLOOP_Offset size, el_size, last_loc;
    #####:  486:    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;
        -:  487:
    #####:  488:    DLOOP_Assert(count > 0 && blksz > 0 && *blocks_p > 0);
        -:  489:
    #####:  490:    DLOOP_Handle_get_size_macro(el_type, el_size);
    #####:  491:    size = el_size * (DLOOP_Offset) blksz;
    #####:  492:    new_blk_count = count;
        -:  493:
    #####:  494:    if (paramp->count > 0 && ((rel_off + offsetarray[0]) == paramp->last_loc))
        -:  495:    {
        -:  496:	/* first block sits at end of last block */
    #####:  497:	new_blk_count--;
        -:  498:    }
        -:  499:
    #####:  500:    last_loc = rel_off + offsetarray[0] + size;
    #####:  501:    for (i=1; i < count; i++) {
    #####:  502:	if (last_loc == rel_off + offsetarray[i]) new_blk_count--;
        -:  503:
    #####:  504:	last_loc = rel_off + offsetarray[i] + size;
        -:  505:    }
        -:  506:
    #####:  507:    paramp->last_loc = last_loc;
    #####:  508:    paramp->count += new_blk_count;
    #####:  509:    return 0;
        -:  510:}
        -:  511:
        -:  512:/* DLOOP_Segment_index_count_block
        -:  513: *
        -:  514: * Note: this is only called when the starting position is at the
        -:  515: * beginning of a whole block in an indexed type.
        -:  516: */
        -:  517:static int DLOOP_Segment_index_count_block(DLOOP_Offset *blocks_p,
        -:  518:					   DLOOP_Count count,
        -:  519:					   DLOOP_Count *blockarray,
        -:  520:					   DLOOP_Offset *offsetarray,
        -:  521:					   DLOOP_Type el_type,
        -:  522:					   DLOOP_Offset rel_off,
        -:  523:					   void *bufp ATTRIBUTE((unused)),
        -:  524:					   void *v_paramp)
     4618:  525:{
        -:  526:    DLOOP_Count new_blk_count;
        -:  527:    DLOOP_Offset el_size, last_loc;
     4618:  528:    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;
        -:  529:
     4618:  530:    DLOOP_Assert(count > 0 && *blocks_p > 0);
        -:  531:
     4618:  532:    DLOOP_Handle_get_size_macro(el_type, el_size);
     4618:  533:    new_blk_count = count;
        -:  534:
     4618:  535:    if (paramp->count > 0 && ((rel_off + offsetarray[0]) == paramp->last_loc))
        -:  536:    {
        -:  537:	/* first block sits at end of last block */
      255:  538:	new_blk_count--;
        -:  539:    }
        -:  540:
        -:  541:    /* Note: when we build an indexed type we combine adjacent regions,
        -:  542:     *       so we're not going to go through and check every piece
        -:  543:     *       separately here. if someone else were building indexed
        -:  544:     *       dataloops by hand, then the loop here might be necessary.
        -:  545:     *       DLOOP_Count i and DLOOP_Offset size would need to be
        -:  546:     *       declared above.
        -:  547:     */
        -:  548:#if 0
        -:  549:    last_loc = rel_off * offsetarray[0] + ((DLOOP_Offset) blockarray[0]) * el_size;
        -:  550:    for (i=1; i < count; i++) {
        -:  551:	if (last_loc == rel_off + offsetarray[i]) new_blk_count--;
        -:  552:
        -:  553:	last_loc = rel_off + offsetarray[i] + ((DLOOP_Offset) blockarray[i]) * el_size;
        -:  554:    }
        -:  555:#else
     4618:  556:    last_loc = rel_off + offsetarray[count-1] + ((DLOOP_Offset) blockarray[count-1]) * el_size;
        -:  557:#endif
        -:  558:
     4618:  559:    paramp->last_loc = last_loc;
     4618:  560:    paramp->count += new_blk_count;
     4618:  561:    return 0;
        -:  562:}
        -:  563:
        -:  564:/* DLOOP_Segment_count_contig_blocks()
        -:  565: *
        -:  566: * Count number of contiguous regions in segment between first and last.
        -:  567: */
        -:  568:void PREPEND_PREFIX(Segment_count_contig_blocks)(DLOOP_Segment *segp,
        -:  569:						 DLOOP_Offset first,
        -:  570:						 DLOOP_Offset *lastp,
        -:  571:						 DLOOP_Count *countp)
     5055:  572:{
        -:  573:    struct PREPEND_PREFIX(contig_blocks_params) params;
        -:  574:
     5055:  575:    params.count    = 0;
     5055:  576:    params.last_loc = 0;
        -:  577:
        -:  578:    /* FIXME: The blkidx and index functions are not used since they
        -:  579:     * optimize the count by coalescing contiguous segments, while
        -:  580:     * functions using the count do not optimize in the same way
        -:  581:     * (e.g., flatten code) */
     5055:  582:    PREPEND_PREFIX(Segment_manipulate)(segp,
        -:  583:				       first,
        -:  584:				       lastp,
        -:  585:				       DLOOP_Segment_contig_count_block,
        -:  586:				       DLOOP_Segment_vector_count_block,
        -:  587:				       DLOOP_Segment_blkidx_count_block,
        -:  588:				       DLOOP_Segment_index_count_block,
        -:  589:				       NULL, /* size fn */
        -:  590:				       (void *) &params);
        -:  591:
     5055:  592:    *countp = params.count;
        -:  593:    return;
        -:  594:}
        -:  595:
        -:  596:/********** FUNCTIONS FOR FLATTENING INTO MPI OFFSETS AND BLKLENS  **********/
        -:  597:
        -:  598:/* Segment_mpi_flatten
        -:  599: *
        -:  600: * Flattens into a set of blocklengths and displacements, as in an
        -:  601: * MPI hindexed type. Note that we use appropriately-sized variables
        -:  602: * in the associated params structure for this reason.
        -:  603: *
        -:  604: * NOTE: blocks will be in units of bytes when returned.
        -:  605: *
        -:  606: * WARNING: there's potential for overflow here as we convert from
        -:  607: *          various types into an index of bytes.
        -:  608: */
        -:  609:struct PREPEND_PREFIX(mpi_flatten_params) {
        -:  610:    int       index, length;
        -:  611:    MPI_Aint  last_end;
        -:  612:    int      *blklens;
        -:  613:    MPI_Aint *disps;
        -:  614:};
        -:  615:
        -:  616:/* DLOOP_Segment_contig_mpi_flatten
        -:  617: *
        -:  618: */
        -:  619:static int DLOOP_Segment_contig_mpi_flatten(DLOOP_Offset *blocks_p,
        -:  620:					    DLOOP_Type el_type,
        -:  621:					    DLOOP_Offset rel_off,
        -:  622:					    void *bufp,
        -:  623:					    void *v_paramp)
    23892:  624:{
        -:  625:    int last_idx, size;
        -:  626:    DLOOP_Offset el_size;
    23892:  627:    char *last_end = NULL;
    23892:  628:    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;
        -:  629:
    23892:  630:    DLOOP_Handle_get_size_macro(el_type, el_size);
    23892:  631:    size = *blocks_p * el_size;
        -:  632:
    23892:  633:    last_idx = paramp->index - 1;
    23892:  634:    if (last_idx >= 0) {
        -:  635:	/* Since disps can be negative, we cannot use
        -:  636:	 * MPID_Ensure_Aint_fits_in_pointer to verify that disps +
        -:  637:	 * blklens fits in a pointer.  Just let it truncate, if the
        -:  638:	 * sizeof a pointer is less than the sizeof an MPI_Aint.
        -:  639:	 */
    #####:  640:	last_end = (char*) MPI_AINT_CAST_TO_VOID_PTR
        -:  641:	           (paramp->disps[last_idx] + ((DLOOP_Offset) paramp->blklens[last_idx]));
        -:  642:    }
        -:  643:
        -:  644:    /* Since bufp can be a displacement and can be negative, we cannot
        -:  645:     * use MPID_Ensure_Aint_fits_in_pointer to ensure the sum fits in
        -:  646:     * a pointer.  Just let it truncate.
        -:  647:     */
    23892:  648:    if ((last_idx == paramp->length-1) &&
        -:  649:        (last_end != ((char *) bufp + rel_off)))
        -:  650:    {
        -:  651:	/* we have used up all our entries, and this region doesn't fit on
        -:  652:	 * the end of the last one.  setting blocks to 0 tells manipulation
        -:  653:	 * function that we are done (and that we didn't process any blocks).
        -:  654:	 */
    #####:  655:	*blocks_p = 0;
    #####:  656:	return 1;
        -:  657:    }
    23892:  658:    else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
        -:  659:    {
        -:  660:	/* add this size to the last vector rather than using up another one */
    #####:  661:	paramp->blklens[last_idx] += size;
        -:  662:    }
        -:  663:    else {
        -:  664:	/* Since bufp can be a displacement and can be negative, we cannot use
        -:  665:	 * MPI_VOID_PTR_CAST_TO_MPI_AINT to cast the sum to a pointer.  Just let it
        -:  666:	 * sign extend.
        -:  667:	 */
    23892:  668:        paramp->disps[last_idx+1]   = MPI_PTR_DISP_CAST_TO_MPI_AINT bufp + rel_off;
    23892:  669:	paramp->blklens[last_idx+1] = size;
    23892:  670:	paramp->index++;
        -:  671:    }
    23892:  672:    return 0;
        -:  673:}
        -:  674:
        -:  675:/* DLOOP_Segment_vector_mpi_flatten
        -:  676: *
        -:  677: * Input Parameters:
        -:  678: * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces)
        -:  679: * count    - # of noncontiguous regions
        -:  680: * blksz    - size of each noncontiguous region
        -:  681: * stride   - distance in bytes from start of one region to start of next
        -:  682: * el_type - elemental type (e.g. MPI_INT)
        -:  683: * ...
        -:  684: *
        -:  685: * Note: this is only called when the starting position is at the beginning
        -:  686: * of a whole block in a vector type.
        -:  687: *
        -:  688: * TODO: MAKE THIS CODE SMARTER, USING THE SAME GENERAL APPROACH AS IN THE
        -:  689: *       COUNT BLOCK CODE ABOVE.
        -:  690: */
        -:  691:static int DLOOP_Segment_vector_mpi_flatten(DLOOP_Offset *blocks_p,
        -:  692:					    DLOOP_Count count,
        -:  693:					    DLOOP_Count blksz,
        -:  694:					    DLOOP_Offset stride,
        -:  695:					    DLOOP_Type el_type,
        -:  696:					    DLOOP_Offset rel_off, /* offset into buffer */
        -:  697:					    void *bufp, /* start of buffer */
        -:  698:					    void *v_paramp)
        1:  699:{
        -:  700:    int i, size, blocks_left;
        -:  701:    DLOOP_Offset el_size;
        1:  702:    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;
        -:  703:
        1:  704:    DLOOP_Handle_get_size_macro(el_type, el_size);
        1:  705:    blocks_left = (int)*blocks_p;
        -:  706:
        3:  707:    for (i=0; i < count && blocks_left > 0; i++) {
        -:  708:	int last_idx;
        2:  709:	char *last_end = NULL;
        -:  710:
        2:  711:	if (blocks_left > blksz) {
        1:  712:	    size = blksz * (int) el_size;
        1:  713:	    blocks_left -= blksz;
        -:  714:	}
        -:  715:	else {
        -:  716:	    /* last pass */
        1:  717:	    size = blocks_left * (int) el_size;
        1:  718:	    blocks_left = 0;
        -:  719:	}
        -:  720:
        2:  721:	last_idx = paramp->index - 1;
        2:  722:	if (last_idx >= 0) {
        -:  723:	    /* Since disps can be negative, we cannot use
        -:  724:	     * MPID_Ensure_Aint_fits_in_pointer to verify that disps +
        -:  725:	     * blklens fits in a pointer.  Nor can we use
        -:  726:	     * MPI_AINT_CAST_TO_VOID_PTR to cast the sum to a pointer.
        -:  727:	     * Just let it truncate, if the sizeof a pointer is less
        -:  728:	     * than the sizeof an MPI_Aint.
        -:  729:	     */
        1:  730:	    last_end = (char *) MPI_AINT_CAST_TO_VOID_PTR
        -:  731:		       (paramp->disps[last_idx] +
        -:  732:			 (MPI_Aint)(paramp->blklens[last_idx]));
        -:  733:	}
        -:  734:
        -:  735:	/* Since bufp can be a displacement and can be negative, we cannot use
        -:  736:	 * MPID_Ensure_Aint_fits_in_pointer to ensure the sum fits in a pointer.
        -:  737:	 * Just let it truncate.
        -:  738:	 */
        2:  739:        if ((last_idx == paramp->length-1) &&
        -:  740:            (last_end != ((char *) bufp + rel_off)))
        -:  741:	{
        -:  742:	    /* we have used up all our entries, and this one doesn't fit on
        -:  743:	     * the end of the last one.
        -:  744:	     */
    #####:  745:	    *blocks_p -= (blocks_left + (size / (int) el_size));
        -:  746:#ifdef MPID_SP_VERBOSE
        -:  747:	    MPIU_dbg_printf("\t[vector to vec exiting (1): next ind = %d, " MPI_AINT_FMT_DEC_SPEC " blocks processed.\n",
        -:  748:			    paramp->u.pack_vector.index,
        -:  749:			    *blocks_p);
        -:  750:#endif
    #####:  751:	    return 1;
        -:  752:	}
        2:  753:        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
        -:  754:	{
        -:  755:	    /* add this size to the last vector rather than using up new one */
    #####:  756:	    paramp->blklens[last_idx] += size;
        -:  757:	}
        -:  758:	else {
        -:  759:	    /* Since bufp can be a displacement and can be negative, we cannot use
        -:  760:	     * MPI_VOID_PTR_CAST_TO_MPI_AINT to cast the sum to a pointer.  Just let it
        -:  761:	     * sign extend.
        -:  762:	     */
        2:  763:            paramp->disps[last_idx+1]   = MPI_PTR_DISP_CAST_TO_MPI_AINT bufp + rel_off;
        2:  764:	    paramp->blklens[last_idx+1] = size;
        2:  765:	    paramp->index++;
        -:  766:	}
        -:  767:
        2:  768:	rel_off += stride;
        -:  769:    }
        -:  770:
        -:  771:#ifdef MPID_SP_VERBOSE
        -:  772:    MPIU_dbg_printf("\t[vector to vec exiting (2): next ind = %d, " MPI_AINT_FMT_DEC_SPEC " blocks processed.\n",
        -:  773:		    paramp->u.pack_vector.index,
        -:  774:		    *blocks_p);
        -:  775:#endif
        -:  776:
        -:  777:    /* if we get here then we processed ALL the blocks; don't need to update
        -:  778:     * blocks_p
        -:  779:     */
        -:  780:
        1:  781:    DLOOP_Assert(blocks_left == 0);
        1:  782:    return 0;
        -:  783:}
        -:  784:
        -:  785:static int DLOOP_Segment_blkidx_mpi_flatten(DLOOP_Offset *blocks_p,
        -:  786:                                            DLOOP_Count count,
        -:  787:                                            DLOOP_Count blksz,
        -:  788:                                            DLOOP_Offset *offsetarray,
        -:  789:                                            DLOOP_Type el_type,
        -:  790:                                            DLOOP_Offset rel_off,
        -:  791:                                            void *bufp,
        -:  792:                                            void *v_paramp)
    #####:  793:{
        -:  794:    int i, size, blocks_left;
        -:  795:    DLOOP_Offset el_size;
    #####:  796:    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;
        -:  797:
    #####:  798:    DLOOP_Handle_get_size_macro(el_type, el_size);
    #####:  799:    blocks_left = *blocks_p;
        -:  800:
    #####:  801:    for (i=0; i < count && blocks_left > 0; i++) {
        -:  802:	int last_idx;
    #####:  803:	char *last_end = NULL;
        -:  804:
    #####:  805:	if (blocks_left > blksz) {
    #####:  806:	    size = blksz * (int) el_size;
    #####:  807:	    blocks_left -= blksz;
        -:  808:	}
        -:  809:	else {
        -:  810:	    /* last pass */
    #####:  811:	    size = blocks_left * (int) el_size;
    #####:  812:	    blocks_left = 0;
        -:  813:	}
        -:  814:
    #####:  815:	last_idx = paramp->index - 1;
    #####:  816:	if (last_idx >= 0) {
        -:  817:	    /* Since disps can be negative, we cannot use
        -:  818:	     * MPID_Ensure_Aint_fits_in_pointer to verify that disps +
        -:  819:	     * blklens fits in a pointer.  Nor can we use
        -:  820:	     * MPI_AINT_CAST_TO_VOID_PTR to cast the sum to a pointer.
        -:  821:	     * Just let it truncate, if the sizeof a pointer is less
        -:  822:	     * than the sizeof an MPI_Aint.
        -:  823:	     */
    #####:  824:	    last_end = (char*) MPI_AINT_CAST_TO_VOID_PTR
        -:  825:		       (paramp->disps[last_idx] +
        -:  826:			((DLOOP_Offset) paramp->blklens[last_idx]));
        -:  827:	}
        -:  828:
        -:  829:	/* Since bufp can be a displacement and can be negative, we
        -:  830:	 * cannot use MPID_Ensure_Aint_fits_in_pointer to ensure the
        -:  831:	 * sum fits in a pointer.  Just let it truncate.
        -:  832:	 */
    #####:  833:        if ((last_idx == paramp->length-1) &&
        -:  834:            (last_end != ((char *) bufp + rel_off)))
        -:  835:	{
        -:  836:	    /* we have used up all our entries, and this one doesn't fit on
        -:  837:	     * the end of the last one.
        -:  838:	     */
    #####:  839:	    *blocks_p -= ((DLOOP_Offset) blocks_left +
        -:  840:			  (((DLOOP_Offset) size) / el_size));
    #####:  841:	    return 1;
        -:  842:	}
    #####:  843:        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
        -:  844:	{
        -:  845:	    /* add this size to the last vector rather than using up new one */
    #####:  846:	    paramp->blklens[last_idx] += size;
        -:  847:	}
        -:  848:	else {
        -:  849:	    /* Since bufp can be a displacement and can be negative, we cannot
        -:  850:	     * use MPI_VOID_PTR_CAST_TO_MPI_AINT to cast the sum to a pointer.
        -:  851:	     * Just let it sign extend.
        -:  852:	     */
    #####:  853:            paramp->disps[last_idx+1]   = MPI_PTR_DISP_CAST_TO_MPI_AINT bufp + 
        -:  854:		rel_off + offsetarray[i];
    #####:  855:	    paramp->blklens[last_idx+1] = size;
    #####:  856:	    paramp->index++;
        -:  857:	}
        -:  858:    }
        -:  859:
        -:  860:    /* if we get here then we processed ALL the blocks; don't need to update
        -:  861:     * blocks_p
        -:  862:     */
    #####:  863:    DLOOP_Assert(blocks_left == 0);
    #####:  864:    return 0;
        -:  865:}
        -:  866:
        -:  867:static int DLOOP_Segment_index_mpi_flatten(DLOOP_Offset *blocks_p,
        -:  868:					   DLOOP_Count count,
        -:  869:					   DLOOP_Count *blockarray,
        -:  870:					   DLOOP_Offset *offsetarray,
        -:  871:					   DLOOP_Type el_type,
        -:  872:					   DLOOP_Offset rel_off,
        -:  873:					   void *bufp,
        -:  874:					   void *v_paramp)
     4618:  875:{
        -:  876:    int i, size, blocks_left;
        -:  877:    DLOOP_Offset el_size;
     4618:  878:    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;
        -:  879:
     4618:  880:    DLOOP_Handle_get_size_macro(el_type, el_size);
     4618:  881:    blocks_left = *blocks_p;
        -:  882:
    61014:  883:    for (i=0; i < count && blocks_left > 0; i++) {
        -:  884:	int last_idx;
    56396:  885:	char *last_end = NULL;
        -:  886:
    56396:  887:	if (blocks_left > blockarray[i]) {
    51778:  888:	    size = blockarray[i] * (int) el_size;
    51778:  889:	    blocks_left -= blockarray[i];
        -:  890:	}
        -:  891:	else {
        -:  892:	    /* last pass */
     4618:  893:	    size = blocks_left * (int) el_size;
     4618:  894:	    blocks_left = 0;
        -:  895:	}
        -:  896:
    56396:  897:	last_idx = paramp->index - 1;
    56396:  898:	if (last_idx >= 0) {
        -:  899:	    /* Since disps can be negative, we cannot use
        -:  900:	     * MPID_Ensure_Aint_fits_in_pointer to verify that disps +
        -:  901:	     * blklens fits in a pointer.  Nor can we use
        -:  902:	     * MPI_AINT_CAST_TO_VOID_PTR to cast the sum to a pointer.
        -:  903:	     * Just let it truncate, if the sizeof a pointer is less
        -:  904:	     * than the sizeof an MPI_Aint.
        -:  905:	     */
    52033:  906:	    last_end = (char *) MPI_AINT_CAST_TO_VOID_PTR
        -:  907:		       (paramp->disps[last_idx] +
        -:  908:			(MPI_Aint)(paramp->blklens[last_idx]));
        -:  909:	}
        -:  910:
        -:  911:	/* Since bufp can be a displacement and can be negative, we
        -:  912:	 * cannot use MPID_Ensure_Aint_fits_in_pointer to ensure the
        -:  913:	 * sum fits in a pointer.  Just let it truncate.
        -:  914:	 */
    56396:  915:        if ((last_idx == paramp->length-1) &&
        -:  916:            (last_end != ((char *) bufp + rel_off)))
        -:  917:	{
        -:  918:	    /* we have used up all our entries, and this one doesn't fit on
        -:  919:	     * the end of the last one.
        -:  920:	     */
    #####:  921:	    *blocks_p -= (blocks_left + (size / (int) el_size));
    #####:  922:	    return 1;
        -:  923:	}
    56651:  924:        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
        -:  925:	{
        -:  926:	    /* add this size to the last vector rather than using up new one */
      255:  927:	    paramp->blklens[last_idx] += size;
        -:  928:	}
        -:  929:	else {
        -:  930:	    /* Since bufp can be a displacement and can be negative, we cannot
        -:  931:	     * use MPI_VOID_PTR_CAST_TO_MPI_AINT to cast the sum to a pointer.
        -:  932:	     * Just let it sign extend.
        -:  933:	     */
    56141:  934:            paramp->disps[last_idx+1]   = MPI_PTR_DISP_CAST_TO_MPI_AINT bufp +
        -:  935:		rel_off + offsetarray[i];
    56141:  936:	    paramp->blklens[last_idx+1] = size; /* these blocks are in bytes */
    56141:  937:	    paramp->index++;
        -:  938:	}
        -:  939:    }
        -:  940:
        -:  941:    /* if we get here then we processed ALL the blocks; don't need to update
        -:  942:     * blocks_p
        -:  943:     */
     4618:  944:    DLOOP_Assert(blocks_left == 0);
     4618:  945:    return 0;
        -:  946:}
        -:  947:
        -:  948:/* MPID_Segment_mpi_flatten - flatten a type into a representation
        -:  949: *                            appropriate for passing to hindexed create.
        -:  950: *
        -:  951: * Parameters:
        -:  952: * segp    - pointer to segment structure
        -:  953: * first   - first byte in segment to pack
        -:  954: * lastp   - in/out parameter describing last byte to pack (and afterwards
        -:  955: *           the last byte _actually_ packed)
        -:  956: *           NOTE: actually returns index of byte _after_ last one packed
        -:  957: * blklens, disps - the usual blocklength and displacement arrays for MPI
        -:  958: * lengthp - in/out parameter describing length of array (and afterwards
        -:  959: *           the amount of the array that has actual data)
        -:  960: */
        -:  961:void PREPEND_PREFIX(Segment_mpi_flatten)(DLOOP_Segment *segp,
        -:  962:					 DLOOP_Offset first,
        -:  963:					 DLOOP_Offset *lastp,
        -:  964:					 int *blklens,
        -:  965:					 MPI_Aint *disps,
        -:  966:					 int *lengthp)
    28256:  967:{
        -:  968:    struct PREPEND_PREFIX(mpi_flatten_params) params;
        -:  969:
    28256:  970:    DLOOP_Assert(*lengthp > 0);
        -:  971:
    28256:  972:    params.index   = 0;
    28256:  973:    params.length  = *lengthp;
    28256:  974:    params.blklens = blklens;
    28256:  975:    params.disps   = disps;
        -:  976:
    28256:  977:    PREPEND_PREFIX(Segment_manipulate)(segp,
        -:  978:				       first,
        -:  979:				       lastp,
        -:  980:				       DLOOP_Segment_contig_mpi_flatten,
        -:  981:				       DLOOP_Segment_vector_mpi_flatten,
        -:  982:				       DLOOP_Segment_blkidx_mpi_flatten,
        -:  983:				       DLOOP_Segment_index_mpi_flatten,
        -:  984:				       NULL,
        -:  985:				       &params);
        -:  986:
        -:  987:    /* last value already handled by MPID_Segment_manipulate */
    28256:  988:    *lengthp = params.index;
        -:  989:    return;
        -:  990:}
        -:  991:
        -:  992:/*
        -:  993: * Local variables:
        -:  994: * c-indent-tabs-mode: nil
        -:  995: * End:
        -:  996: */