Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 static const char USMID[] = "@(#) libfi/array/spread.c 92.0 10/08/98 14:37:14";
00038
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #define INCREMENT() \
00058 curdim[0]++; \
00059 if (curdim[0] < src_ext[0]) { \
00060 src_indx[0] += src_strd[0]; \
00061 sindx += src_strd[0]; \
00062 res_indx[0] += res_strd[0]; \
00063 res_tmp2 += res_strd[0]; \
00064 } else { \
00065 sindx -= src_indx[0]; \
00066 res_tmp2 -= res_indx[0]; \
00067 curdim[0] = 0; \
00068 src_indx[0] = 0; \
00069 res_indx[0] = 0; \
00070 curdim[1]++; \
00071 if (curdim[1] < src_ext[1]) { \
00072 src_indx[1] += src_strd[1]; \
00073 sindx += src_strd[1]; \
00074 res_indx[1] += res_strd[1]; \
00075 res_tmp2 += res_strd[1]; \
00076 } else { \
00077 sindx -= src_indx[1]; \
00078 res_tmp2 -= res_indx[1]; \
00079 curdim[1] = 0; \
00080 src_indx[1] = 0; \
00081 res_indx[1] = 0; \
00082 curdim[2]++; \
00083 if (curdim[2] < src_ext[2]) { \
00084 src_indx[2] += src_strd[2]; \
00085 sindx += src_strd[2]; \
00086 res_indx[2] += res_strd[2]; \
00087 res_tmp2 += res_strd[2]; \
00088 } else { \
00089 sindx -= src_indx[2]; \
00090 res_tmp2 -= res_indx[2]; \
00091 curdim[2] = 0; \
00092 src_indx[2] = 0; \
00093 res_indx[2] = 0; \
00094 curdim[3]++; \
00095 if (curdim[3] < src_ext[3]) { \
00096 src_indx[3] += src_strd[3]; \
00097 sindx += src_strd[3]; \
00098 res_indx[3] += res_strd[3]; \
00099 res_tmp2 += res_strd[3]; \
00100 } else { \
00101 sindx -= src_indx[3]; \
00102 res_tmp2 -= res_indx[3]; \
00103 curdim[3] = 0; \
00104 src_indx[3] = 0; \
00105 res_indx[3] = 0; \
00106 curdim[4]++; \
00107 if (curdim[4] < src_ext[4]) { \
00108 src_indx[4] += src_strd[4]; \
00109 sindx += src_strd[4]; \
00110 res_indx[4] += res_strd[4]; \
00111 res_tmp2 += res_strd[4]; \
00112 } else { \
00113 sindx -= src_indx[4]; \
00114 res_tmp2 -= res_indx[4]; \
00115 curdim[4] = 0; \
00116 src_indx[4] = 0; \
00117 res_indx[4] = 0; \
00118 curdim[5]++; \
00119 src_indx[5] += src_strd[5]; \
00120 sindx += src_strd[5]; \
00121 res_indx[5] += res_strd[5]; \
00122 res_tmp2 += res_strd[5]; \
00123 } \
00124 } \
00125 } \
00126 } \
00127 }
00128
00129 #ifdef _UNICOS
00130 #pragma _CRI duplicate _SPREAD as SPREAD@
00131 #endif
00132 void
00133 _SPREAD ( DopeVectorType * result,
00134 DopeVectorType * source,
00135 _f_int *dimp,
00136 _f_int *ncopiesp)
00137 {
00138 char *cs;
00139 char *cr;
00140 char * restrict cptr1;
00141 char * restrict cptr2;
00142 _f_int8 * restrict uptr1;
00143 _f_int8 * restrict uptr2;
00144 _f_int * restrict fptr1;
00145 _f_int * restrict fptr2;
00146 _f_real16 * restrict dptr1;
00147 _f_real16 * restrict dptr2;
00148 #ifdef _F_COMP16
00149 dblcmplx * restrict xptr1;
00150 dblcmplx * restrict xptr2;
00151 #endif
00152 _f_int4 * restrict hptr1;
00153 _f_int4 * restrict hptr2;
00154 void * restrict sptr;
00155 void * restrict rptr;
00156 _f_int dim;
00157 long ncopies;
00158 _f_int rank;
00159 _f_int type;
00160 _f_int subtype;
00161 _f_int arithmetic;
00162 long nbytes;
00163 long sindx;
00164 long sindx2;
00165 long rindx;
00166 _f_int bytealligned;
00167 _f_int bucketsize;
00168 long src_strd[MAXDIM];
00169 long src_ext[MAXDIM];
00170 long src_indx[MAXDIM];
00171 long res_strd[MAXDIM];
00172 long res_ext[MAXDIM];
00173 long res_indx[MAXDIM];
00174 long curdim[MAXDIM];
00175 long tot_ext;
00176 long src_tot_ext;
00177 _f_int ndim;
00178 long res_tmp1;
00179 long res_tmp2;
00180 long res_dim_strd;
00181 long cnt;
00182 long i, j, k;
00183 _f_int early_exit;
00184
00185
00186
00187 rank = source->n_dim;
00188 type = source->type_lens.type;
00189
00190
00191
00192 early_exit = 0;
00193 for (i = 0; i < rank; i++) {
00194 if (source->dimension[i].extent == 0)
00195 early_exit = 1;
00196 }
00197 if (result->assoc) {
00198 for (i = 0; i < result->n_dim; i++) {
00199 if (result->dimension[i].extent == 0)
00200 early_exit = 1;
00201 }
00202 }
00203
00204
00205
00206
00207
00208 #ifdef _UNICOS
00209 #pragma _CRI shortloop
00210 #endif
00211 for (i = 0; i < MAXDIM; i++) {
00212 curdim[i] = 0;
00213 src_strd[i] = 0;
00214 src_ext[i] = 0;
00215 src_indx[i] = 0;
00216 res_strd[i] = 0;
00217 res_ext[i] = 0;
00218 res_indx[i] = 0;
00219 }
00220
00221
00222
00223 if (*dimp < 1 || *dimp > rank+2)
00224 _lerror (_LELVL_ABORT, FESCIDIM);
00225 dim = *dimp - 1;
00226 if (*ncopiesp > 0)
00227 ncopies = *ncopiesp;
00228 else
00229 ncopies = 0;
00230
00231
00232
00233 switch (type) {
00234 case DVTYPE_ASCII :
00235 bytealligned = 1;
00236 bucketsize = _fcdlen (source->base_addr.charptr);
00237 subtype = DVSUBTYPE_CHAR;
00238 arithmetic = 0;
00239 break;
00240 case DVTYPE_DERIVEDBYTE :
00241 bytealligned = 1;
00242 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00243 subtype = DVSUBTYPE_CHAR;
00244 arithmetic = 0;
00245 break;
00246 case DVTYPE_DERIVEDWORD :
00247 bytealligned = 0;
00248 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00249 subtype = DVSUBTYPE_DERIVED;
00250 arithmetic = 0;
00251 break;
00252 default :
00253 bytealligned = 0;
00254 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00255 if (source->type_lens.int_len == 64) {
00256 subtype = DVSUBTYPE_BIT64;
00257 } else if (source->type_lens.int_len == 32) {
00258 subtype = DVSUBTYPE_BIT32;
00259 bucketsize = 1;
00260 } else if (source->type_lens.int_len == 256) {
00261 subtype = DVSUBTYPE_BIT256;
00262 } else {
00263 subtype = DVSUBTYPE_BIT128;
00264 }
00265 arithmetic = 1;
00266 }
00267
00268
00269 if (!result->assoc) {
00270 result->base_addr.a.ptr = (void *) NULL;
00271 result->orig_base = 0;
00272 result->orig_size = 0;
00273
00274 tot_ext = 1;
00275 #ifdef _UNICOS
00276 #pragma _CRI novector
00277 #endif
00278 for (i = 0; i < dim; i++) {
00279 result->dimension[i].extent = source->dimension[i].extent;
00280 result->dimension[i].stride_mult = tot_ext * bucketsize;
00281 result->dimension[i].low_bound = 1;
00282 tot_ext *= source->dimension[i].extent;
00283 }
00284 result->dimension[i].extent = ncopies;
00285 result->dimension[i].stride_mult = tot_ext * bucketsize;
00286 result->dimension[i].low_bound = 1;
00287 tot_ext *= ncopies;
00288 i++;
00289 #ifdef _UNICOS
00290 #pragma _CRI novector
00291 #endif
00292 for ( ; i < rank+1; i++) {
00293 result->dimension[i].extent = source->dimension[i-1].extent;
00294 result->dimension[i].stride_mult = tot_ext * bucketsize;
00295 result->dimension[i].low_bound = 1;
00296 tot_ext *= source->dimension[i-1].extent;
00297 }
00298
00299 if (!bytealligned) {
00300 nbytes = bucketsize * BYTES_PER_WORD;
00301 #ifdef _CRAYMPP
00302 if (subtype == DVSUBTYPE_BIT32)
00303 nbytes /= 2;
00304 #endif
00305 } else {
00306 nbytes = bucketsize;
00307 }
00308 for (i = 0; i < result->n_dim; i++)
00309 nbytes *= result->dimension[i].extent;
00310 if (ncopies > 0 && nbytes > 0) {
00311 result->base_addr.a.ptr = (void *) malloc (nbytes);
00312 if (result->base_addr.a.ptr == NULL)
00313 _lerror (_LELVL_ABORT, FENOMEMY);
00314 } else {
00315 result->base_addr.a.ptr = NULL;
00316 }
00317
00318 result->assoc = 1;
00319 result->base_addr.a.el_len = source->base_addr.a.el_len;
00320 if (type == DVTYPE_ASCII) {
00321 result->base_addr.a.ptr = (void *) result->base_addr.a.ptr;
00322 cr = (char *) result->base_addr.a.ptr;
00323 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00324 }
00325 result->orig_base = (void *) result->base_addr.a.ptr;
00326 result->orig_size = nbytes * BITS_PER_BYTE;
00327 }
00328
00329
00330
00331 if (ncopies == 0 || early_exit == 1)
00332 return;
00333
00334
00335
00336 if (!bytealligned) {
00337 sptr = (void *) source->base_addr.a.ptr;
00338 rptr = (void *) result->base_addr.a.ptr;
00339 } else {
00340 if (type == DVTYPE_ASCII) {
00341 cs = _fcdtocp (source->base_addr.charptr);
00342 cr = _fcdtocp (result->base_addr.charptr);
00343 } else {
00344 cs = (char *) source->base_addr.a.ptr;
00345 cr = (char *) result->base_addr.a.ptr;
00346 }
00347 }
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 if (rank == 0) {
00361 if (bucketsize > 1 && arithmetic) {
00362 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00363 } else {
00364 res_strd[0] = result->dimension[0].stride_mult;
00365 }
00366 switch (subtype) {
00367 case DVSUBTYPE_BIT64 :
00368 uptr1 = (_f_int8 *) sptr;
00369 uptr2 = (_f_int8 *) rptr;
00370 for (i = 0; i < ncopies; i++) {
00371 rindx = i * res_strd[0];
00372 uptr2[rindx] = uptr1[0];
00373 }
00374 break;
00375
00376 case DVSUBTYPE_BIT32 :
00377 hptr1 = (_f_int4 *) sptr;
00378 hptr2 = (_f_int4 *) rptr;
00379 for (i = 0; i < ncopies; i++) {
00380 rindx = i * res_strd[0];
00381 hptr2[rindx] = hptr1[0];
00382 }
00383 break;
00384
00385 case DVSUBTYPE_BIT128 :
00386 dptr1 = (_f_real16 *) sptr;
00387 dptr2 = (_f_real16 *) rptr;
00388 for (i = 0; i < ncopies; i++) {
00389 rindx = i * res_strd[0];
00390 dptr2[rindx] = dptr1[0];
00391 }
00392 break;
00393
00394 case DVSUBTYPE_CHAR :
00395 cptr1 = (char *) cs;
00396 for (i = 0; i < ncopies; i++) {
00397 cptr2 = (char *) cr + (i * res_strd[0]);
00398 (void) memcpy (cptr2, cptr1, bucketsize);
00399 }
00400 break;
00401
00402 case DVSUBTYPE_DERIVED :
00403 for (i = 0; i < bucketsize; i++) {
00404 fptr1 = (_f_int *) sptr + i;
00405 fptr2 = (_f_int *) rptr + i;
00406 for (j = 0; j < ncopies; j++) {
00407 rindx = j * res_strd[0];
00408 fptr2[rindx] = fptr1[0];
00409 }
00410 }
00411 break;
00412
00413 #ifdef _F_COMP16
00414 case DVSUBTYPE_BIT256 :
00415 xptr1 = (dblcmplx *) sptr;
00416 xptr2 = (dblcmplx *) rptr;
00417 for (i = 0; i < ncopies; i++) {
00418 rindx = i * res_strd[0];
00419 xptr2[rindx].re = xptr1[0].re;
00420 xptr2[rindx].im = xptr1[0].im;
00421 }
00422 break;
00423 #endif
00424
00425 default :
00426 _lerror (_LELVL_ABORT, FEINTDTY);
00427 }
00428 } else if (rank == 1) {
00429 src_ext[0] = source->dimension[0].extent;
00430 if (bucketsize > 1 && arithmetic) {
00431 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00432 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00433 res_strd[1] = result->dimension[1].stride_mult / bucketsize;
00434 } else {
00435 src_strd[0] = source->dimension[0].stride_mult;
00436 res_strd[0] = result->dimension[0].stride_mult;
00437 res_strd[1] = result->dimension[1].stride_mult;
00438 }
00439
00440 if (dim == 0)
00441 ndim = 1;
00442 else
00443 ndim = 0;
00444
00445 switch (subtype) {
00446 case DVSUBTYPE_BIT64 :
00447 uptr1 = (_f_int8 *) sptr;
00448 uptr2 = (_f_int8 *) rptr;
00449 for (i = 0; i < src_ext[0]; i++) {
00450 res_tmp1 = i * res_strd[ndim];
00451 sindx = i * src_strd[0];
00452 for (j = 0; j < ncopies; j++) {
00453 rindx = res_tmp1 + (j * res_strd[dim]);
00454 uptr2[rindx] = uptr1[sindx];
00455 }
00456 }
00457 break;
00458
00459 case DVSUBTYPE_BIT32 :
00460 hptr1 = (_f_int4 *) sptr;
00461 hptr2 = (_f_int4 *) rptr;
00462 for (i = 0; i < src_ext[0]; i++) {
00463 res_tmp1 = i * res_strd[ndim];
00464 sindx = i * src_strd[0];
00465 for (j = 0; j < ncopies; j++) {
00466 rindx = res_tmp1 + (j * res_strd[dim]);
00467 hptr2[rindx] = hptr1[sindx];
00468 }
00469 }
00470 break;
00471
00472 case DVSUBTYPE_BIT128 :
00473 dptr1 = (_f_real16 *) sptr;
00474 dptr2 = (_f_real16 *) rptr;
00475 for (i = 0; i < src_ext[0]; i++) {
00476 res_tmp1 = i * res_strd[ndim];
00477 sindx = i * src_strd[0];
00478 for (j = 0; j < ncopies; j++) {
00479 rindx = res_tmp1 + (j * res_strd[dim]);
00480 dptr2[rindx] = dptr1[sindx];
00481 }
00482 }
00483 break;
00484
00485 case DVSUBTYPE_CHAR :
00486 for (i = 0; i < src_ext[0]; i++) {
00487 res_tmp1 = i * res_strd[ndim];
00488 sindx = i * src_strd[0];
00489 cptr1 = (char *) cs + sindx;
00490 for (j = 0; j < ncopies; j++) {
00491 rindx = res_tmp1 + (j * res_strd[dim]);
00492 cptr2 = (char *) cr + rindx;
00493 (void) memcpy (cptr2, cptr1, bucketsize);
00494 }
00495 }
00496 break;
00497
00498 case DVSUBTYPE_DERIVED :
00499 for (i = 0; i < bucketsize; i++) {
00500 fptr1 = (_f_int *) sptr + i;
00501 fptr2 = (_f_int *) rptr + i;
00502 for (j = 0; j < src_ext[0]; j++) {
00503 res_tmp1 = j * res_strd[ndim];
00504 sindx = j * src_strd[0];
00505 for (k = 0; k < ncopies; k++) {
00506 rindx = res_tmp1 + (k * res_strd[dim]);
00507 fptr2[rindx] = fptr1[sindx];
00508 }
00509 }
00510 }
00511 break;
00512
00513 #ifdef _F_COMP16
00514 case DVSUBTYPE_BIT256 :
00515 xptr1 = (dblcmplx *) sptr;
00516 xptr2 = (dblcmplx *) rptr;
00517 for (i = 0; i < src_ext[0]; i++) {
00518 res_tmp1 = i * res_strd[ndim];
00519 sindx = i * src_strd[0];
00520 for (j = 0; j < ncopies; j++) {
00521 rindx = res_tmp1 + (j * res_strd[dim]);
00522 xptr2[rindx].re = xptr1[sindx].re;
00523 xptr2[rindx].im = xptr1[sindx].im;
00524 }
00525 }
00526 break;
00527 #endif
00528
00529 default :
00530 _lerror (_LELVL_ABORT, FEINTDTY);
00531 }
00532 } else {
00533 src_tot_ext = 1;
00534 #ifdef _UNICOS
00535 #pragma _CRI shortloop
00536 #endif
00537 for (i = 0; i < rank; i++) {
00538 if (bucketsize > 1 && arithmetic) {
00539 src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00540 } else {
00541 src_strd[i] = source->dimension[i].stride_mult;
00542 }
00543 src_ext[i] = source->dimension[i].extent;
00544 src_tot_ext *= src_ext[i];
00545 src_indx[i] = 0;
00546 curdim[i] = 0;
00547 }
00548
00549 cnt = 0;
00550 tot_ext = 1;
00551 #ifdef _UNICOS
00552 #pragma _CRI shortloop
00553 #endif
00554 for (i = 0; i <= rank; i++) {
00555 res_indx[i] = 0;
00556 if (i != dim) {
00557 if (type == DVTYPE_ASCII || type == DVTYPE_DERIVEDBYTE ||
00558 type == DVTYPE_DERIVEDWORD)
00559 res_strd[cnt] = tot_ext * bucketsize;
00560 else
00561 res_strd[cnt] = tot_ext;
00562 res_ext[cnt] = result->dimension[i].extent;
00563 tot_ext *= res_ext[cnt];
00564 cnt++;
00565 } else {
00566 if (type == DVTYPE_ASCII || type == DVTYPE_DERIVEDBYTE ||
00567 type == DVTYPE_DERIVEDWORD)
00568 res_dim_strd = tot_ext * bucketsize;
00569 else
00570 res_dim_strd = tot_ext;
00571 tot_ext *= ncopies;
00572 }
00573 }
00574
00575 switch (subtype) {
00576 case DVSUBTYPE_BIT64 :
00577 uptr1 = (_f_int8 *) sptr;
00578 uptr2 = (_f_int8 *) rptr;
00579 sindx = 0;
00580 res_tmp2 = 0;
00581 for (i = 0; i < src_tot_ext; i++) {
00582 for (j = 0; j < ncopies; j++) {
00583 rindx = res_tmp2 + (j * res_dim_strd);
00584 uptr2[rindx] = uptr1[sindx];
00585 }
00586 INCREMENT();
00587 }
00588 break;
00589
00590 case DVSUBTYPE_BIT32 :
00591 hptr1 = (_f_int4 *) sptr;
00592 hptr2 = (_f_int4 *) rptr;
00593 sindx = 0;
00594 res_tmp2 = 0;
00595 for (i = 0; i < src_tot_ext; i++) {
00596 for (j = 0; j < ncopies; j++) {
00597 rindx = res_tmp2 + (j * res_dim_strd);
00598 hptr2[rindx] = hptr1[sindx];
00599 }
00600 INCREMENT();
00601 }
00602 break;
00603
00604 case DVSUBTYPE_BIT128 :
00605 dptr1 = (_f_real16 *) sptr;
00606 dptr2 = (_f_real16 *) rptr;
00607 sindx = 0;
00608 res_tmp2 = 0;
00609 for (i = 0; i < src_tot_ext; i++) {
00610 for (j = 0; j < ncopies; j++) {
00611 rindx = res_tmp2 + (j * res_dim_strd);
00612 dptr2[rindx] = dptr1[sindx];
00613 }
00614 INCREMENT();
00615 }
00616 break;
00617
00618 case DVSUBTYPE_CHAR :
00619 sindx = 0;
00620 res_tmp2 = 0;
00621 for (i = 0; i < src_tot_ext; i++) {
00622 cptr1 = (char *) cs + sindx;
00623 for (j = 0; j < ncopies; j++) {
00624 rindx = res_tmp2 + (j * res_dim_strd);
00625 cptr2 = (char *) cr + rindx;
00626 (void) memcpy (cptr2, cptr1, bucketsize);
00627 }
00628 INCREMENT();
00629 }
00630 break;
00631
00632 case DVSUBTYPE_DERIVED :
00633 for (i = 0; i < bucketsize; i++) {
00634 fptr1 = (_f_int *) sptr + i;
00635 fptr2 = (_f_int *) rptr + i;
00636 sindx = 0;
00637 res_tmp2 = 0;
00638 for (j = 0; j < src_tot_ext; j++) {
00639 for (k = 0; k < ncopies; k++) {
00640 rindx = res_tmp2 + (k * res_dim_strd);
00641 fptr2[rindx] = fptr1[sindx];
00642 }
00643 INCREMENT();
00644 }
00645 }
00646 break;
00647
00648 #ifdef _F_COMP16
00649 case DVSUBTYPE_BIT256 :
00650 xptr1 = (dblcmplx *) sptr;
00651 xptr2 = (dblcmplx *) rptr;
00652 sindx = 0;
00653 res_tmp2 = 0;
00654 for (i = 0; i < src_tot_ext; i++) {
00655 for (j = 0; j < ncopies; j++) {
00656 rindx = res_tmp2 + (j * res_dim_strd);
00657 xptr2[rindx].re = xptr1[sindx].re;
00658 xptr2[rindx].im = xptr1[sindx].im;
00659 }
00660 INCREMENT();
00661 }
00662 break;
00663 #endif
00664
00665 default :
00666 _lerror (_LELVL_ABORT, FEINTDTY);
00667 }
00668 }
00669 }