-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop_create_blockindexed.c
        -:    0:Graph:dataloop_create_blockindexed.gcno
        -:    0:Data:dataloop_create_blockindexed.gcda
        -:    0:Runs:3459
        -:    0:Programs:899
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <stdio.h>
        -:    9:
        -:   10:#include "./dataloop.h"
        -:   11:
        -:   12:static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
        -:   13:					       void *disp_array,
        -:   14:					       DLOOP_Offset *out_disp_array,
        -:   15:					       int dispinbytes,
        -:   16:					       DLOOP_Offset old_extent);
        -:   17:
        -:   18:/*@
        -:   19:   Dataloop_create_blockindexed - create blockindexed dataloop
        -:   20:
        -:   21:   Arguments:
        -:   22:+  int count
        -:   23:.  void *displacement_array (array of either MPI_Aints or ints)
        -:   24:.  int displacement_in_bytes (boolean)
        -:   25:.  MPI_Datatype old_type
        -:   26:.  DLOOP_Dataloop **output_dataloop_ptr
        -:   27:.  int output_dataloop_size
        -:   28:.  int output_dataloop_depth
        -:   29:-  int flag
        -:   30:
        -:   31:.N Errors
        -:   32:.N Returns 0 on success, -1 on failure.
        -:   33:@*/
        -:   34:int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,
        -:   35:						 int iblklen,
        -:   36:						 void *disp_array,
        -:   37:						 int dispinbytes,
        -:   38:						 DLOOP_Type oldtype,
        -:   39:						 DLOOP_Dataloop **dlp_p,
        -:   40:						 int *dlsz_p,
        -:   41:						 int *dldepth_p,
        -:   42:						 int flag)
    28992:   43:{
    28992:   44:    int err, is_builtin, is_vectorizable = 1;
        -:   45:    int i, new_loop_sz, old_loop_depth;
        -:   46:
        -:   47:    DLOOP_Count contig_count, count, blklen;
        -:   48:    DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
        -:   49:    DLOOP_Dataloop *new_dlp;
        -:   50:
    28992:   51:    count  = (DLOOP_Count) icount; /* avoid subsequent casting */
    28992:   52:    blklen = (DLOOP_Count) iblklen;
        -:   53:
        -:   54:    /* if count or blklen are zero, handle with contig code, call it a int */
    28992:   55:    if (count == 0 || blklen == 0)
        -:   56:    {
    #####:   57:	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
        -:   58:							 MPI_INT,
        -:   59:							 dlp_p,
        -:   60:							 dlsz_p,
        -:   61:							 dldepth_p,
        -:   62:							 flag);
    #####:   63:	return err;
        -:   64:    }
        -:   65:
    28992:   66:    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
        -:   67:
    28992:   68:    if (is_builtin)
        -:   69:    {
    26986:   70:	DLOOP_Handle_get_size_macro(oldtype, old_extent);
    26986:   71:	old_loop_depth = 0;
        -:   72:    }
        -:   73:    else
        -:   74:    {
     2006:   75:	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
     2006:   76:	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
        -:   77:    }
        -:   78:
    28992:   79:    contig_count = PREPEND_PREFIX(Type_blockindexed_count_contig)(count,
        -:   80:                                                                  blklen,
        -:   81:                                                                  disp_array,
        -:   82:                                                                  dispinbytes,
        -:   83:                                                                  old_extent);
        -:   84:
        -:   85:    /* optimization:
        -:   86:     *
        -:   87:     * if contig_count == 1 and block starts at displacement 0,
        -:   88:     * store it as a contiguous rather than a blockindexed dataloop.
        -:   89:     */
    28992:   90:    if ((contig_count == 1) &&
        -:   91:	((!dispinbytes && ((int *) disp_array)[0] == 0) ||
        -:   92:	 (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))
        -:   93:    {
     1440:   94:	err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,
        -:   95:							 oldtype,
        -:   96:							 dlp_p,
        -:   97:							 dlsz_p,
        -:   98:							 dldepth_p,
        -:   99:							 flag);
     1440:  100:	return err;
        -:  101:    }
        -:  102:
        -:  103:    /* optimization:
        -:  104:     *
        -:  105:     * if contig_count == 1 store it as a blockindexed with one
        -:  106:     * element rather than as a lot of individual blocks.
        -:  107:     */
    27552:  108:    if (contig_count == 1)
        -:  109:    {
        -:  110:	/* adjust count and blklen and drop through */
     3222:  111:	blklen *= count;
     3222:  112:	count = 1;
     3222:  113:	iblklen *= icount;
     3222:  114:	icount = 1;
        -:  115:    }
        -:  116:
        -:  117:    /* optimization:
        -:  118:     *
        -:  119:     * if displacements start at zero and result in a fixed stride,
        -:  120:     * store it as a vector rather than a blockindexed dataloop.
        -:  121:     */
    27552:  122:    eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
        -:  123:	(((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);
        -:  124:
    27552:  125:    if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)
        -:  126:    {
    17252:  127:	eff_disp1 = (dispinbytes) ?
        -:  128:	    ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
        -:  129:	    (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
    17252:  130:	last_stride = eff_disp1 - eff_disp0;
        -:  131:
 82314820:  132:	for (i=2; i < count; i++) {
 82297572:  133:	    eff_disp0 = eff_disp1;
 82297572:  134:	    eff_disp1 = (dispinbytes) ?
        -:  135:		((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
        -:  136:		(((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
 82297572:  137:	    if (eff_disp1 - eff_disp0 != last_stride) {
        4:  138:		is_vectorizable = 0;
        4:  139:		break;
        -:  140:	    }
        -:  141:	}
    17252:  142:	if (is_vectorizable)
        -:  143:	{
    17248:  144:	    err = PREPEND_PREFIX(Dataloop_create_vector)(count,
        -:  145:							 blklen,
        -:  146:							 last_stride,
        -:  147:							 1, /* strideinbytes */
        -:  148:							 oldtype,
        -:  149:							 dlp_p,
        -:  150:							 dlsz_p,
        -:  151:							 dldepth_p,
        -:  152:							 flag);
    17248:  153:	    return err;
        -:  154:	}
        -:  155:    }
        -:  156:
        -:  157:    /* TODO: optimization:
        -:  158:     *
        -:  159:     * if displacements result in a fixed stride, but first displacement
        -:  160:     * is not zero, store it as a blockindexed (blklen == 1) of a vector.
        -:  161:     */
        -:  162:
        -:  163:    /* TODO: optimization:
        -:  164:     *
        -:  165:     * if a blockindexed of a contig, absorb the contig into the blocklen
        -:  166:     * parameter and keep the same overall depth
        -:  167:     */
        -:  168:
        -:  169:    /* otherwise storing as a blockindexed dataloop */
        -:  170:
        -:  171:    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
        -:  172:     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
        -:  173:     */
        -:  174:
    10304:  175:    if (is_builtin)
        -:  176:    {
     9278:  177:	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,
        -:  178:				       count,
        -:  179:				       &new_dlp,
        -:  180:				       &new_loop_sz);
        -:  181:	/* --BEGIN ERROR HANDLING-- */
     9278:  182:	if (!new_dlp) return -1;
        -:  183:	/* --END ERROR HANDLING-- */
        -:  184:
     9278:  185:	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;
        -:  186:
     9278:  187:	if (flag == DLOOP_DATALOOP_ALL_BYTES)
        -:  188:	{
    #####:  189:	    blklen            *= old_extent;
    #####:  190:	    new_dlp->el_size   = 1;
    #####:  191:	    new_dlp->el_extent = 1;
    #####:  192:	    new_dlp->el_type   = MPI_BYTE;
        -:  193:	}
        -:  194:	else
        -:  195:	{
     9278:  196:	    new_dlp->el_size   = old_extent;
     9278:  197:	    new_dlp->el_extent = old_extent;
     9278:  198:	    new_dlp->el_type   = oldtype;
        -:  199:	}
        -:  200:    }
        -:  201:    else
        -:  202:    {
     1026:  203:	DLOOP_Dataloop *old_loop_ptr = NULL;
     1026:  204:	int old_loop_sz = 0;
        -:  205:
     1026:  206:	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
     1026:  207:	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
        -:  208:
     1026:  209:	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,
        -:  210:						count,
        -:  211:						old_loop_ptr,
        -:  212:						old_loop_sz,
        -:  213:						&new_dlp,
        -:  214:						&new_loop_sz);
        -:  215:	/* --BEGIN ERROR HANDLING-- */
     1026:  216:	if (!new_dlp) return -1;
        -:  217:	/* --END ERROR HANDLING-- */
        -:  218:
     1026:  219:	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;
        -:  220:
     1026:  221:	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
     1026:  222:	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
     1026:  223:	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
        -:  224:    }
        -:  225:
    10304:  226:    new_dlp->loop_params.bi_t.count     = count;
    10304:  227:    new_dlp->loop_params.bi_t.blocksize = blklen;
        -:  228:
        -:  229:    /* copy in displacement parameters
        -:  230:     *
        -:  231:     * regardless of dispinbytes, we store displacements in bytes in loop.
        -:  232:     */
    10304:  233:    DLOOP_Type_blockindexed_array_copy(count,
        -:  234:				       disp_array,
        -:  235:				       new_dlp->loop_params.bi_t.offset_array,
        -:  236:				       dispinbytes,
        -:  237:				       old_extent);
        -:  238:
    10304:  239:    *dlp_p     = new_dlp;
    10304:  240:    *dlsz_p    = new_loop_sz;
    10304:  241:    *dldepth_p = old_loop_depth + 1;
        -:  242:
    10304:  243:    return 0;
        -:  244:}
        -:  245:
        -:  246:/* DLOOP_Type_blockindexed_array_copy
        -:  247: *
        -:  248: * Unlike the indexed version, this one does not compact adjacent
        -:  249: * blocks, because that would really mess up the blockindexed type!
        -:  250: */
        -:  251:static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
        -:  252:					       void *in_disp_array,
        -:  253:					       DLOOP_Offset *out_disp_array,
        -:  254:					       int dispinbytes,
        -:  255:					       DLOOP_Offset old_extent)
    10304:  256:{
        -:  257:    int i;
    10304:  258:    if (!dispinbytes)
        -:  259:    {
    13618:  260:	for (i=0; i < count; i++)
        -:  261:	{
     8870:  262:	    out_disp_array[i] =
        -:  263:		((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
        -:  264:	}
        -:  265:    }
        -:  266:    else
        -:  267:    {
   575579:  268:	for (i=0; i < count; i++)
        -:  269:	{
   570023:  270:	    out_disp_array[i] =
        -:  271:		((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);
        -:  272:	}
        -:  273:    }
        -:  274:    return;
        -:  275:}
        -:  276:
        -:  277:DLOOP_Count PREPEND_PREFIX(Type_blockindexed_count_contig)(DLOOP_Count count,
        -:  278:                                                           DLOOP_Count blklen,
        -:  279:                                                           void *disp_array,
        -:  280:                                                           int dispinbytes,
        -:  281:                                                           DLOOP_Offset old_extent)
    29525:  282:{
    29525:  283:    int i, contig_count = 1;
        -:  284:
    29525:  285:    if (!dispinbytes)
        -:  286:    {
        -:  287:	/* this is from the MPI type, is of type int */
    22473:  288:	DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];
        -:  289:
 82343312:  290:	for (i=1; i < count; i++)
        -:  291:	{
 82320839:  292:	    DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];
        -:  293:
 82320839:  294:	    if (cur_tdisp + (DLOOP_Offset) blklen != next_tdisp)
        -:  295:	    {
 82318281:  296:		contig_count++;
        -:  297:	    }
 82320839:  298:	    cur_tdisp = next_tdisp;
        -:  299:	}
        -:  300:    }
        -:  301:    else
        -:  302:    {
        -:  303:	/* this is from the MPI type, is of type MPI_Aint */
     7052:  304:	DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];
        -:  305:
   572261:  306:	for (i=1; i < count; i++)
        -:  307:	{
        -:  308:	    DLOOP_Offset next_bdisp =
   565209:  309:		(DLOOP_Offset) ((MPI_Aint *) disp_array)[i];
        -:  310:
   565209:  311:	    if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)
        -:  312:	    {
   565209:  313:		contig_count++;
        -:  314:	    }
   565209:  315:	    cur_bdisp = next_bdisp;
        -:  316:	}
        -:  317:    }
    29525:  318:    return contig_count;
        -:  319:}