-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop_create_blockindexed.c
-: 0:Graph:dataloop_create_blockindexed.gcno
-: 0:Data:dataloop_create_blockindexed.gcda
-: 0:Runs:3459
-: 0:Programs:899
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:
-: 3:/*
-: 4: * (C) 2001 by Argonne National Laboratory.
-: 5: * See COPYRIGHT in top-level directory.
-: 6: */
-: 7:
-: 8:#include <stdio.h>
-: 9:
-: 10:#include "./dataloop.h"
-: 11:
-: 12:static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
-: 13: void *disp_array,
-: 14: DLOOP_Offset *out_disp_array,
-: 15: int dispinbytes,
-: 16: DLOOP_Offset old_extent);
-: 17:
-: 18:/*@
-: 19: Dataloop_create_blockindexed - create blockindexed dataloop
-: 20:
-: 21: Arguments:
-: 22:+ int count
-: 23:. void *displacement_array (array of either MPI_Aints or ints)
-: 24:. int displacement_in_bytes (boolean)
-: 25:. MPI_Datatype old_type
-: 26:. DLOOP_Dataloop **output_dataloop_ptr
-: 27:. int output_dataloop_size
-: 28:. int output_dataloop_depth
-: 29:- int flag
-: 30:
-: 31:.N Errors
-: 32:.N Returns 0 on success, -1 on failure.
-: 33:@*/
-: 34:int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,
-: 35: int iblklen,
-: 36: void *disp_array,
-: 37: int dispinbytes,
-: 38: DLOOP_Type oldtype,
-: 39: DLOOP_Dataloop **dlp_p,
-: 40: int *dlsz_p,
-: 41: int *dldepth_p,
-: 42: int flag)
28992: 43:{
28992: 44: int err, is_builtin, is_vectorizable = 1;
-: 45: int i, new_loop_sz, old_loop_depth;
-: 46:
-: 47: DLOOP_Count contig_count, count, blklen;
-: 48: DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
-: 49: DLOOP_Dataloop *new_dlp;
-: 50:
28992: 51: count = (DLOOP_Count) icount; /* avoid subsequent casting */
28992: 52: blklen = (DLOOP_Count) iblklen;
-: 53:
-: 54: /* if count or blklen are zero, handle with contig code, call it a int */
28992: 55: if (count == 0 || blklen == 0)
-: 56: {
|
#####: 57: err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
-: 58: MPI_INT,
-: 59: dlp_p,
-: 60: dlsz_p,
-: 61: dldepth_p,
-: 62: flag);
#####: 63: return err;
-: 64: }
-: 65:
|
28992: 66: is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
-: 67:
28992: 68: if (is_builtin)
-: 69: {
26986: 70: DLOOP_Handle_get_size_macro(oldtype, old_extent);
26986: 71: old_loop_depth = 0;
-: 72: }
-: 73: else
-: 74: {
2006: 75: DLOOP_Handle_get_extent_macro(oldtype, old_extent);
2006: 76: DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
-: 77: }
-: 78:
28992: 79: contig_count = PREPEND_PREFIX(Type_blockindexed_count_contig)(count,
-: 80: blklen,
-: 81: disp_array,
-: 82: dispinbytes,
-: 83: old_extent);
-: 84:
-: 85: /* optimization:
-: 86: *
-: 87: * if contig_count == 1 and block starts at displacement 0,
-: 88: * store it as a contiguous rather than a blockindexed dataloop.
-: 89: */
28992: 90: if ((contig_count == 1) &&
-: 91: ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
-: 92: (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))
-: 93: {
1440: 94: err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,
-: 95: oldtype,
-: 96: dlp_p,
-: 97: dlsz_p,
-: 98: dldepth_p,
-: 99: flag);
1440: 100: return err;
-: 101: }
-: 102:
-: 103: /* optimization:
-: 104: *
-: 105: * if contig_count == 1 store it as a blockindexed with one
-: 106: * element rather than as a lot of individual blocks.
-: 107: */
27552: 108: if (contig_count == 1)
-: 109: {
-: 110: /* adjust count and blklen and drop through */
3222: 111: blklen *= count;
3222: 112: count = 1;
3222: 113: iblklen *= icount;
3222: 114: icount = 1;
-: 115: }
-: 116:
-: 117: /* optimization:
-: 118: *
-: 119: * if displacements start at zero and result in a fixed stride,
-: 120: * store it as a vector rather than a blockindexed dataloop.
-: 121: */
27552: 122: eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
-: 123: (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);
-: 124:
27552: 125: if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)
-: 126: {
17252: 127: eff_disp1 = (dispinbytes) ?
-: 128: ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
-: 129: (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
17252: 130: last_stride = eff_disp1 - eff_disp0;
-: 131:
82314820: 132: for (i=2; i < count; i++) {
82297572: 133: eff_disp0 = eff_disp1;
82297572: 134: eff_disp1 = (dispinbytes) ?
-: 135: ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
-: 136: (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
82297572: 137: if (eff_disp1 - eff_disp0 != last_stride) {
4: 138: is_vectorizable = 0;
4: 139: break;
-: 140: }
-: 141: }
17252: 142: if (is_vectorizable)
-: 143: {
17248: 144: err = PREPEND_PREFIX(Dataloop_create_vector)(count,
-: 145: blklen,
-: 146: last_stride,
-: 147: 1, /* strideinbytes */
-: 148: oldtype,
-: 149: dlp_p,
-: 150: dlsz_p,
-: 151: dldepth_p,
-: 152: flag);
17248: 153: return err;
-: 154: }
-: 155: }
-: 156:
-: 157: /* TODO: optimization:
-: 158: *
-: 159: * if displacements result in a fixed stride, but first displacement
-: 160: * is not zero, store it as a blockindexed (blklen == 1) of a vector.
-: 161: */
-: 162:
-: 163: /* TODO: optimization:
-: 164: *
-: 165: * if a blockindexed of a contig, absorb the contig into the blocklen
-: 166: * parameter and keep the same overall depth
-: 167: */
-: 168:
-: 169: /* otherwise storing as a blockindexed dataloop */
-: 170:
-: 171: /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
-: 172: * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
-: 173: */
-: 174:
10304: 175: if (is_builtin)
-: 176: {
9278: 177: PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,
-: 178: count,
-: 179: &new_dlp,
-: 180: &new_loop_sz);
|
-: 181: /* --BEGIN ERROR HANDLING-- */
9278: 182: if (!new_dlp) return -1;
-: 183: /* --END ERROR HANDLING-- */
-: 184:
|
9278: 185: new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;
-: 186:
9278: 187: if (flag == DLOOP_DATALOOP_ALL_BYTES)
-: 188: {
|
#####: 189: blklen *= old_extent;
#####: 190: new_dlp->el_size = 1;
#####: 191: new_dlp->el_extent = 1;
#####: 192: new_dlp->el_type = MPI_BYTE;
-: 193: }
-: 194: else
-: 195: {
|
9278: 196: new_dlp->el_size = old_extent;
9278: 197: new_dlp->el_extent = old_extent;
9278: 198: new_dlp->el_type = oldtype;
-: 199: }
-: 200: }
-: 201: else
-: 202: {
1026: 203: DLOOP_Dataloop *old_loop_ptr = NULL;
1026: 204: int old_loop_sz = 0;
-: 205:
1026: 206: DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
1026: 207: DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
-: 208:
1026: 209: PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,
-: 210: count,
-: 211: old_loop_ptr,
-: 212: old_loop_sz,
-: 213: &new_dlp,
-: 214: &new_loop_sz);
|
-: 215: /* --BEGIN ERROR HANDLING-- */
1026: 216: if (!new_dlp) return -1;
-: 217: /* --END ERROR HANDLING-- */
-: 218:
|
1026: 219: new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;
-: 220:
1026: 221: DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
1026: 222: DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
1026: 223: DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
-: 224: }
-: 225:
10304: 226: new_dlp->loop_params.bi_t.count = count;
10304: 227: new_dlp->loop_params.bi_t.blocksize = blklen;
-: 228:
-: 229: /* copy in displacement parameters
-: 230: *
-: 231: * regardless of dispinbytes, we store displacements in bytes in loop.
-: 232: */
10304: 233: DLOOP_Type_blockindexed_array_copy(count,
-: 234: disp_array,
-: 235: new_dlp->loop_params.bi_t.offset_array,
-: 236: dispinbytes,
-: 237: old_extent);
-: 238:
10304: 239: *dlp_p = new_dlp;
10304: 240: *dlsz_p = new_loop_sz;
10304: 241: *dldepth_p = old_loop_depth + 1;
-: 242:
10304: 243: return 0;
-: 244:}
-: 245:
-: 246:/* DLOOP_Type_blockindexed_array_copy
-: 247: *
-: 248: * Unlike the indexed version, this one does not compact adjacent
-: 249: * blocks, because that would really mess up the blockindexed type!
-: 250: */
-: 251:static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
-: 252: void *in_disp_array,
-: 253: DLOOP_Offset *out_disp_array,
-: 254: int dispinbytes,
-: 255: DLOOP_Offset old_extent)
10304: 256:{
-: 257: int i;
10304: 258: if (!dispinbytes)
-: 259: {
13618: 260: for (i=0; i < count; i++)
-: 261: {
8870: 262: out_disp_array[i] =
-: 263: ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
-: 264: }
-: 265: }
-: 266: else
-: 267: {
575579: 268: for (i=0; i < count; i++)
-: 269: {
570023: 270: out_disp_array[i] =
-: 271: ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);
-: 272: }
-: 273: }
-: 274: return;
-: 275:}
-: 276:
-: 277:DLOOP_Count PREPEND_PREFIX(Type_blockindexed_count_contig)(DLOOP_Count count,
-: 278: DLOOP_Count blklen,
-: 279: void *disp_array,
-: 280: int dispinbytes,
-: 281: DLOOP_Offset old_extent)
29525: 282:{
29525: 283: int i, contig_count = 1;
-: 284:
29525: 285: if (!dispinbytes)
-: 286: {
-: 287: /* this is from the MPI type, is of type int */
22473: 288: DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];
-: 289:
82343312: 290: for (i=1; i < count; i++)
-: 291: {
82320839: 292: DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];
-: 293:
82320839: 294: if (cur_tdisp + (DLOOP_Offset) blklen != next_tdisp)
-: 295: {
82318281: 296: contig_count++;
-: 297: }
82320839: 298: cur_tdisp = next_tdisp;
-: 299: }
-: 300: }
-: 301: else
-: 302: {
-: 303: /* this is from the MPI type, is of type MPI_Aint */
7052: 304: DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];
-: 305:
572261: 306: for (i=1; i < count; i++)
-: 307: {
-: 308: DLOOP_Offset next_bdisp =
565209: 309: (DLOOP_Offset) ((MPI_Aint *) disp_array)[i];
-: 310:
565209: 311: if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)
-: 312: {
565209: 313: contig_count++;
-: 314: }
565209: 315: cur_bdisp = next_bdisp;
-: 316: }
-: 317: }
29525: 318: return contig_count;
-: 319:}
|