-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop_create_indexed.c
-: 0:Graph:dataloop_create_indexed.gcno
-: 0:Data:dataloop_create_indexed.gcda
-: 0:Runs:3459
-: 0:Programs:899
-: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
-: 2:
-: 3:/*
-: 4: * (C) 2001 by Argonne National Laboratory.
-: 5: * See COPYRIGHT in top-level directory.
-: 6: */
-: 7:
-: 8:#include <stdlib.h>
-: 9:
-: 10:#include "./dataloop.h"
-: 11:
-: 12:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
-: 13: DLOOP_Count contig_count,
-: 14: int *input_blocklength_array,
-: 15: void *input_displacement_array,
-: 16: DLOOP_Count *output_blocklength_array,
-: 17: DLOOP_Offset *out_disp_array,
-: 18: int dispinbytes,
-: 19: DLOOP_Offset old_extent);
-: 20:
-: 21:/*@
-: 22: DLOOP_Dataloop_create_indexed
-: 23:
-: 24: Arguments:
-: 25:+ int icount
-: 26:. int *iblocklength_array
-: 27:. void *displacement_array (either ints or MPI_Aints)
-: 28:. int dispinbytes
-: 29:. MPI_Datatype oldtype
-: 30:. DLOOP_Dataloop **dlp_p
-: 31:. int *dlsz_p
-: 32:. int *dldepth_p
-: 33:- int flag
-: 34:
-: 35:.N Errors
-: 36:.N Returns 0 on success, -1 on error.
-: 37:@*/
-: 38:
-: 39:int PREPEND_PREFIX(Dataloop_create_indexed)(int icount,
-: 40: int *blocklength_array,
-: 41: void *displacement_array,
-: 42: int dispinbytes,
-: 43: MPI_Datatype oldtype,
-: 44: DLOOP_Dataloop **dlp_p,
-: 45: int *dlsz_p,
-: 46: int *dldepth_p,
-: 47: int flag)
73589: 48:{
-: 49: int err, is_builtin;
-: 50: int i, new_loop_sz, old_loop_depth, blksz;
-: 51: DLOOP_Count first;
-: 52:
73589: 53: DLOOP_Count old_type_count = 0, contig_count, count;
-: 54: DLOOP_Offset old_extent;
-: 55: struct DLOOP_Dataloop *new_dlp;
-: 56:
73589: 57: count = (DLOOP_Count) icount; /* avoid subsequent casting */
-: 58:
-: 59:
-: 60: /* if count is zero, handle with contig code, call it an int */
73589: 61: if (count == 0)
-: 62: {
|
#####: 63: err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
-: 64: MPI_INT,
-: 65: dlp_p,
-: 66: dlsz_p,
-: 67: dldepth_p,
-: 68: flag);
#####: 69: return err;
-: 70: }
-: 71:
-: 72: /* Skip any initial zero-length blocks */
|
78893: 73: for (first = 0; first < count; first++)
77003: 74: if ((DLOOP_Count) blocklength_array[first])
71699: 75: break;
-: 76:
-: 77:
73589: 78: is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
-: 79:
73589: 80: if (is_builtin)
-: 81: {
72231: 82: DLOOP_Handle_get_extent_macro(oldtype, old_extent);
72231: 83: old_loop_depth = 0;
-: 84: }
-: 85: else
-: 86: {
1358: 87: DLOOP_Handle_get_extent_macro(oldtype, old_extent);
1358: 88: DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
-: 89: }
-: 90:
-: 92: {
-: 94: }
-: 95:
73589: 96: contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
-: 97: blocklength_array,
-: 98: displacement_array,
-: 99: dispinbytes,
-: 100: old_extent);
-: 101:
-: 102: /* if contig_count is zero (no data), handle with contig code */
73589: 103: if (contig_count == 0)
-: 104: {
1890: 105: err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
-: 106: MPI_INT,
-: 107: dlp_p,
-: 108: dlsz_p,
-: 109: dldepth_p,
-: 110: flag);
1890: 111: return err;
-: 112: }
-: 113:
-: 114: /* optimization:
-: 115: *
-: 116: * if contig_count == 1 and block starts at displacement 0,
-: 117: * store it as a contiguous rather than an indexed dataloop.
-: 118: */
71699: 119: if ((contig_count == 1) &&
-: 120: ((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
-: 121: (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
-: 122: {
30270: 123: err = PREPEND_PREFIX(Dataloop_create_contiguous)((int) old_type_count,
-: 124: oldtype,
-: 125: dlp_p,
-: 126: dlsz_p,
-: 127: dldepth_p,
-: 128: flag);
30270: 129: return err;
-: 130: }
-: 131:
-: 132: /* optimization:
-: 133: *
-: 134: * if contig_count == 1 (and displacement != 0), store this as
-: 135: * a single element blockindexed rather than a lot of individual
-: 136: * blocks.
-: 137: */
41429: 138: if (contig_count == 1)
-: 139: {
1714: 140: err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
-: 141: (int) old_type_count,
-: 142: &(((int *)displacement_array)[first]),
-: 143: dispinbytes,
-: 144: oldtype,
-: 145: dlp_p,
-: 146: dlsz_p,
-: 147: dldepth_p,
-: 148: flag);
-: 149:
1714: 150: return err;
-: 151: }
-: 152:
-: 153: /* optimization:
-: 154: *
-: 155: * if block length is the same for all blocks, store it as a
-: 156: * blockindexed rather than an indexed dataloop.
-: 157: */
39715: 158: blksz = blocklength_array[first];
82920878: 159: for (i = first+1; i < count; i++)
-: 160: {
82897272: 161: if (blocklength_array[i] != blksz)
-: 162: {
16109: 163: blksz--;
16109: 164: break;
-: 165: }
-: 166: }
39715: 167: if (blksz == blocklength_array[first])
-: 168: {
23606: 169: err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
-: 170: blksz,
-: 171: &(((int *)displacement_array)[first]),
-: 172: dispinbytes,
-: 173: oldtype,
-: 174: dlp_p,
-: 175: dlsz_p,
-: 176: dldepth_p,
-: 177: flag);
-: 178:
23606: 179: return err;
-: 180: }
-: 181:
-: 182: /* note: blockindexed looks for the vector optimization */
-: 183:
-: 184: /* TODO: optimization:
-: 185: *
-: 186: * if an indexed of a contig, absorb the contig into the blocklen array
-: 187: * and keep the same overall depth
-: 188: */
-: 189:
-: 190: /* otherwise storing as an indexed dataloop */
-: 191:
16109: 192: if (is_builtin)
-: 193: {
16105: 194: PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
-: 195: count,
-: 196: &new_dlp,
-: 197: &new_loop_sz);
|
-: 198: /* --BEGIN ERROR HANDLING-- */
16105: 199: if (!new_dlp) return -1;
-: 200: /* --END ERROR HANDLING-- */
-: 201:
|
16105: 202: new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;
-: 203:
16105: 204: if (flag == DLOOP_DATALOOP_ALL_BYTES)
-: 205: {
-: 206: /* blocklengths are modified below */
|
#####: 207: new_dlp->el_size = 1;
#####: 208: new_dlp->el_extent = 1;
#####: 209: new_dlp->el_type = MPI_BYTE;
-: 210: }
-: 211: else
-: 212: {
|
16105: 213: new_dlp->el_size = old_extent;
16105: 214: new_dlp->el_extent = old_extent;
16105: 215: new_dlp->el_type = oldtype;
-: 216: }
-: 217: }
-: 218: else
-: 219: {
4: 220: DLOOP_Dataloop *old_loop_ptr = NULL;
4: 221: int old_loop_sz = 0;
-: 222:
4: 223: DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
4: 224: DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
-: 225:
4: 226: PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
-: 227: contig_count,
-: 228: old_loop_ptr,
-: 229: old_loop_sz,
-: 230: &new_dlp,
-: 231: &new_loop_sz);
|
-: 232: /* --BEGIN ERROR HANDLING-- */
4: 233: if (!new_dlp) return -1;
-: 234: /* --END ERROR HANDLING-- */
-: 235:
|
4: 236: new_dlp->kind = DLOOP_KIND_INDEXED;
-: 237:
4: 238: DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
4: 239: DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
4: 240: DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
-: 241: }
-: 242:
16109: 243: new_dlp->loop_params.i_t.count = contig_count;
16109: 244: new_dlp->loop_params.i_t.total_blocks = old_type_count;
-: 245:
-: 246: /* copy in blocklength and displacement parameters (in that order)
-: 247: *
-: 248: * regardless of dispinbytes, we store displacements in bytes in loop.
-: 249: */
16109: 250: DLOOP_Type_indexed_array_copy(count,
-: 251: contig_count,
-: 252: blocklength_array,
-: 253: displacement_array,
-: 254: new_dlp->loop_params.i_t.blocksize_array,
-: 255: new_dlp->loop_params.i_t.offset_array,
-: 256: dispinbytes,
-: 257: old_extent);
-: 258:
16109: 259: if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
-: 260: {
-: 261: DLOOP_Count *tmp_blklen_array =
|
#####: 262: new_dlp->loop_params.i_t.blocksize_array;
-: 263:
#####: 264: for (i=0; i < contig_count; i++)
-: 265: {
-: 266: /* increase block lengths so they are in bytes */
#####: 267: tmp_blklen_array[i] *= old_extent;
-: 268: }
-: 269:
#####: 270: new_dlp->loop_params.i_t.total_blocks *= old_extent;
-: 271: }
-: 272:
|
16109: 273: *dlp_p = new_dlp;
16109: 274: *dlsz_p = new_loop_sz;
16109: 275: *dldepth_p = old_loop_depth + 1;
-: 276:
16109: 277: return MPI_SUCCESS;
-: 278:}
-: 279:
-: 280:/* DLOOP_Type_indexed_array_copy()
-: 281: *
-: 282: * Copies arrays into place, combining adjacent contiguous regions and
-: 283: * dropping zero-length regions.
-: 284: *
-: 285: * Extent passed in is for the original type.
-: 286: *
-: 287: * Output displacements are always output in bytes, while block
-: 288: * lengths are always output in terms of the base type.
-: 289: */
-: 290:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
-: 291: DLOOP_Count contig_count,
-: 292: int *in_blklen_array,
-: 293: void *in_disp_array,
-: 294: DLOOP_Count *out_blklen_array,
-: 295: DLOOP_Offset *out_disp_array,
-: 296: int dispinbytes,
-: 297: DLOOP_Offset old_extent)
16109: 298:{
16109: 299: DLOOP_Count i, first, cur_idx = 0;
-: 300:
-: 301: /* Skip any initial zero-length blocks */
16109: 302: for (first = 0; first < count; ++first)
16109: 303: if ((DLOOP_Count) in_blklen_array[first])
16109: 304: break;
-: 305:
16109: 306: out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first];
-: 307:
16109: 308: if (!dispinbytes)
-: 309: {
5034: 310: out_disp_array[0] = (DLOOP_Offset)
-: 311: ((int *) in_disp_array)[first] * old_extent;
-: 312:
2103134: 313: for (i = first+1; i < count; ++i)
-: 314: {
2098100: 315: if (in_blklen_array[i] == 0)
-: 316: {
|
#####: 317: continue;
-: 318: }
|
2098100: 319: else if (out_disp_array[cur_idx] +
-: 320: ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
-: 321: ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
-: 322: {
-: 323: /* adjacent to current block; add to block */
2052: 324: out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
-: 325: }
-: 326: else
-: 327: {
2096048: 328: cur_idx++;
2096048: 329: DLOOP_Assert(cur_idx < contig_count);
2096048: 330: out_disp_array[cur_idx] =
-: 331: ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
2096048: 332: out_blklen_array[cur_idx] = in_blklen_array[i];
-: 333: }
-: 334: }
-: 335: }
-: 336: else /* input displacements already in bytes */
-: 337: {
11075: 338: out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first];
-: 339:
112789: 340: for (i = first+1; i < count; ++i)
-: 341: {
101714: 342: if (in_blklen_array[i] == 0)
-: 343: {
2: 344: continue;
-: 345: }
101712: 346: else if (out_disp_array[cur_idx] +
-: 347: ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
-: 348: ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
-: 349: {
-: 350: /* adjacent to current block; add to block */
21848: 351: out_blklen_array[cur_idx] += in_blklen_array[i];
-: 352: }
-: 353: else
-: 354: {
79864: 355: cur_idx++;
79864: 356: DLOOP_Assert(cur_idx < contig_count);
79864: 357: out_disp_array[cur_idx] =
-: 358: (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
79864: 359: out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
-: 360: }
-: 361: }
-: 362: }
-: 363:
16109: 364: DLOOP_Assert(cur_idx == contig_count - 1);
-: 365: return;
-: 366:}
-: 367:
-: 368:/* DLOOP_Type_indexed_count_contig()
-: 369: *
-: 370: * Determines the actual number of contiguous blocks represented by the
-: 371: * blocklength/displacement arrays. This might be less than count (as
-: 372: * few as 1).
-: 373: *
-: 374: * Extent passed in is for the original type.
-: 375: */
-: 376:DLOOP_Count PREPEND_PREFIX(Type_indexed_count_contig)(DLOOP_Count count,
-: 377: int *blocklength_array,
-: 378: void *displacement_array,
-: 379: int dispinbytes,
-: 380: DLOOP_Offset old_extent)
92706: 381:{
92706: 382: DLOOP_Count i, contig_count = 1;
-: 383: DLOOP_Count cur_blklen, first;
-: 384:
92706: 385: if (count)
-: 386: {
-: 387: /* Skip any initial zero-length blocks */
98852: 388: for (first = 0; first < count; ++first)
96962: 389: if ((DLOOP_Count) blocklength_array[first])
90816: 390: break;
-: 391:
92706: 392: if (first == count) { /* avoid invalid reads later on */
1890: 393: contig_count = 0;
1890: 394: return contig_count;
-: 395: }
-: 396:
90816: 397: cur_blklen = (DLOOP_Count) blocklength_array[first];
90816: 398: if (!dispinbytes)
-: 399: {
-: 400: DLOOP_Offset cur_tdisp =
53741: 401: (DLOOP_Offset) ((int *) displacement_array)[first];
-: 402:
-: 404: {
-: 406: {
2714: 407: continue;
-: 408: }
-: 410: (DLOOP_Offset) ((int *) displacement_array)[i])
-: 411: {
-: 412: /* adjacent to current block; add to block */
7905630: 413: cur_blklen += (DLOOP_Count) blocklength_array[i];
-: 414: }
-: 415: else
-: 416: {
-: 420: }
-: 421: }
-: 422: }
-: 423: else
-: 424: {
-: 425: DLOOP_Offset cur_bdisp =
37075: 426: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[first];
-: 427:
14135187: 428: for (i = first+1; i < count; ++i)
-: 429: {
14098112: 430: if (blocklength_array[i] == 0)
-: 431: {
3: 432: continue;
-: 433: }
14098109: 434: else if (cur_bdisp + (DLOOP_Offset) cur_blklen * old_extent ==
-: 435: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i])
-: 436: {
-: 437: /* adjacent to current block; add to block */
13172873: 438: cur_blklen += (DLOOP_Count) blocklength_array[i];
-: 439: }
-: 440: else
-: 441: {
925236: 442: cur_bdisp =
-: 443: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i];
925236: 444: cur_blklen = (DLOOP_Count) blocklength_array[i];
925236: 445: contig_count++;
-: 446: }
-: 447: }
-: 448: }
-: 449: }
90816: 450: return contig_count;
-: 451:}
|