00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #pragma ident "@(#) libfi/array/cshift.c 92.1 07/07/99 15:52:02"
00038
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #ifdef _UNICOS
00056 #pragma _CRI duplicate _CSHIFT as CSHIFT@
00057 #endif
00058 void
00059 _CSHIFT (DopeVectorType * result,
00060 DopeVectorType * source,
00061 DopeVectorType * shift,
00062 _f_int *dimp)
00063 {
00064 char *cs;
00065 char *cr;
00066 char * restrict cptr1;
00067 char * restrict cptr2;
00068 _f_int8 * restrict uptr1;
00069 _f_int8 * restrict uptr2;
00070 _f_int * restrict wptr1;
00071 _f_int * restrict wptr2;
00072 _f_real16 * restrict dptr1;
00073 _f_real16 * restrict dptr2;
00074 #ifdef _F_COMP16
00075 dblcmplx * restrict xptr1;
00076 dblcmplx * restrict xptr2;
00077 #endif
00078 _f_int4 * restrict hptr1;
00079 _f_int4 * restrict hptr2;
00080 void * restrict sptr;
00081 void * restrict rptr;
00082 long * restrict shptr;
00083 _f_int8 * restrict save_uptr1;
00084 _f_int8 * restrict save_uptr2a;
00085 _f_int8 * restrict save_uptr2b;
00086 _f_int * restrict save_wptr1;
00087 _f_int * restrict save_wptr2a;
00088 _f_int * restrict save_wptr2b;
00089 _f_int4 * restrict iptr4;
00090 _f_int8 * restrict iptr8;
00091 _f_int bucketsize;
00092 _f_int shft_size;
00093 long nbytes;
00094 _f_int num;
00095 _f_int bytealligned;
00096 long sindx;
00097 long sindx2;
00098 long shindx;
00099 long rindx;
00100 long rindx2;
00101 _f_int shft;
00102 _f_int dim;
00103 _f_int non_dim;
00104 long curdim[MAXDIM-1];
00105 long src_strd[MAXDIM-1];
00106 long src_ext[MAXDIM-1];
00107 long src_off[MAXDIM-1];
00108 long res_strd[MAXDIM-1];
00109 long res_off[MAXDIM-1];
00110 long shft_strd[MAXDIM-1];
00111 long shft_off[MAXDIM-1];
00112 _f_int rank;
00113 _f_int type;
00114 _f_int subtype;
00115 _f_int arithmetic;
00116 long extent;
00117 long extlow;
00118 long exthi;
00119 long src_dim_strd;
00120 long res_dim_strd;
00121 long shft_dim;
00122 long shft_dim_strd;
00123 long tot_ext;
00124 long i, j, k, l;
00125
00126
00127
00128 rank = source->n_dim;
00129 type = source->type_lens.type;
00130
00131
00132
00133 switch (type) {
00134 case DVTYPE_ASCII :
00135 bytealligned = 1;
00136 bucketsize = _fcdlen (source->base_addr.charptr);
00137 subtype = DVSUBTYPE_CHAR;
00138 arithmetic = 0;
00139 break;
00140 case DVTYPE_DERIVEDBYTE :
00141 bytealligned = 1;
00142 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00143 subtype = DVSUBTYPE_CHAR;
00144 arithmetic = 0;
00145 break;
00146 case DVTYPE_DERIVEDWORD :
00147 bytealligned = 0;
00148 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00149 subtype = DVSUBTYPE_DERIVED;
00150 arithmetic = 0;
00151 break;
00152 default :
00153 bytealligned = 0;
00154 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00155 if (source->type_lens.int_len == 64) {
00156 subtype = DVSUBTYPE_BIT64;
00157 } else if (source->type_lens.int_len == 32) {
00158 subtype = DVSUBTYPE_BIT32;
00159 bucketsize = 1;
00160 } else if (source->type_lens.int_len == 256) {
00161 subtype = DVSUBTYPE_BIT256;
00162 } else {
00163 subtype = DVSUBTYPE_BIT128;
00164 }
00165 arithmetic = 1;
00166 }
00167 shft_size = shift->base_addr.a.el_len / BITS_PER_WORD;
00168 #ifdef _CRAYMPP
00169 if (shft_size == 0)
00170 shft_size = 1;
00171 #endif
00172
00173
00174
00175 if (!result->assoc) {
00176 result->base_addr.a.ptr = (void *) NULL;
00177 result->orig_base = 0;
00178 result->orig_size = 0;
00179 #ifdef _UNICOS
00180 #pragma _CRI shortloop
00181 #endif
00182 for (i = 0, tot_ext = bucketsize; i < rank; i++) {
00183 result->dimension[i].extent = source->dimension[i].extent;
00184 result->dimension[i].low_bound = 1;
00185 result->dimension[i].stride_mult = tot_ext;
00186 tot_ext *= result->dimension[i].extent;
00187 }
00188
00189
00190
00191 if (!bytealligned) {
00192 nbytes = bucketsize * BYTES_PER_WORD;
00193 #ifdef _CRAYMPP
00194 if (subtype == DVSUBTYPE_BIT32)
00195 nbytes /= 2;
00196 #endif
00197 } else {
00198 nbytes = bucketsize;
00199 }
00200 #ifdef _UNICOS
00201 #pragma _CRI shortloop
00202 #endif
00203 for (i = 0; i < rank; i++)
00204 nbytes *= result->dimension[i].extent;
00205 if (nbytes > 0) {
00206 result->base_addr.a.ptr = (void *) malloc(nbytes);
00207 if (result->base_addr.a.ptr == NULL)
00208 _lerror(_LELVL_ABORT, FENOMEMY);
00209 }
00210
00211 result->assoc = 1;
00212 result->base_addr.a.el_len = source->base_addr.a.el_len;
00213 if (type == DVTYPE_ASCII) {
00214 cr = (char *) result->base_addr.a.ptr;
00215 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00216 }
00217 result->orig_base = result->base_addr.a.ptr;
00218 result->orig_size = nbytes * BITS_PER_BYTE;
00219 }
00220
00221
00222
00223
00224
00225
00226 #ifdef _UNICOS
00227 #pragma _CRI shortloop
00228 #endif
00229 for (i = 0; i < rank; i++) {
00230 if (!source->dimension[i].extent)
00231 return;
00232 }
00233 if (result->assoc) {
00234 #ifdef _UNICOS
00235 #pragma _CRI shortloop
00236 #endif
00237 for (i = 0; i < rank; i++)
00238 if (!result->dimension[i].extent)
00239 return;
00240 }
00241 if (shift->n_dim > 1) {
00242 #ifdef _UNICOS
00243 #pragma _CRI shortloop
00244 #endif
00245 for (i = 0; i < rank-1; i++)
00246 if (!shift->dimension[i].extent)
00247 return;
00248 }
00249
00250
00251
00252 if (!bytealligned) {
00253 sptr = (void *) source->base_addr.a.ptr;
00254 rptr = (void *) result->base_addr.a.ptr;
00255 } else {
00256 if (type == DVTYPE_ASCII) {
00257 cs = _fcdtocp (source->base_addr.charptr);
00258 cr = _fcdtocp (result->base_addr.charptr);
00259 } else {
00260 cs = (char *) source->base_addr.a.ptr;
00261 cr = (char *) result->base_addr.a.ptr;
00262 }
00263 }
00264 shptr = (void *) shift->base_addr.a.ptr;
00265
00266
00267
00268 if (dimp == NULL)
00269 dim = 0;
00270 else {
00271 if (*dimp < 1 || *dimp > rank)
00272 _lerror (_LELVL_ABORT, FESCIDIM);
00273 dim = *dimp - 1;
00274 }
00275
00276
00277
00278 if (rank == 1) {
00279
00280
00281
00282
00283
00284
00285
00286
00287 if (shift->base_addr.a.el_len == 64) {
00288 iptr8 = (_f_int8 *) shptr;
00289 shft = *iptr8 % source->dimension[0].extent;
00290 } else {
00291 iptr4 = (_f_int4 *) shptr;
00292 shft = *iptr4 % source->dimension[0].extent;
00293 }
00294 if (shft < 0)
00295 shft += source->dimension[0].extent;
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308 if (bucketsize > 1 && arithmetic) {
00309 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00310 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00311 } else {
00312 src_strd[0] = source->dimension[0].stride_mult;
00313 res_strd[0] = result->dimension[0].stride_mult;
00314 }
00315
00316
00317
00318
00319
00320
00321 extent = source->dimension[0].extent;
00322
00323
00324
00325
00326
00327 switch (subtype) {
00328 case DVSUBTYPE_CHAR :
00329 cptr1 = (char *) cs;
00330 cptr2 = (char *) cr + (extent - shft) * bucketsize;
00331 src_dim_strd = source->dimension[0].stride_mult;
00332 res_dim_strd = result->dimension[0].stride_mult;
00333 for (i = 0; i < shft; i++) {
00334 (void) memcpy (cptr2, cptr1, bucketsize);
00335 cptr1 += src_dim_strd;
00336 cptr2 += res_dim_strd;
00337 }
00338 cptr2 = (char *) cr;
00339 for ( ; i < extent; i++) {
00340 (void) memcpy (cptr2, cptr1, bucketsize);
00341 cptr1 += src_dim_strd;
00342 cptr2 += res_dim_strd;
00343 }
00344 break;
00345
00346 case DVSUBTYPE_DERIVED :
00347 src_dim_strd = source->dimension[0].stride_mult;
00348 res_dim_strd = result->dimension[0].stride_mult;
00349 for (i = 0; i < bucketsize; i++) {
00350 wptr1 = (_f_int *) sptr;
00351 wptr2 = (_f_int *) rptr + (extent-shft) *
00352 bucketsize;
00353 for (j = 0; j < shft; j++) {
00354 sindx = i + (j * src_dim_strd);
00355 rindx = i + (j * res_dim_strd);
00356 wptr2[rindx] = wptr1[sindx];
00357 }
00358 wptr2 = (_f_int *) rptr;
00359 for (k = 0 ; j < extent; j++, k++) {
00360 sindx = i + (j * src_dim_strd);
00361 rindx = i + (k * res_dim_strd);
00362 wptr2[rindx] = wptr1[sindx];
00363 }
00364 }
00365 break;
00366
00367 case DVSUBTYPE_BIT64 :
00368 uptr1 = (_f_int8 *) sptr;
00369 uptr2 = (_f_int8 *) rptr + ((extent - shft) *
00370 res_strd[0]);
00371 src_dim_strd = src_strd[0];
00372 res_dim_strd = res_strd[0];
00373 #ifndef CRAY2
00374 for (i = 0; i < shft; i++) {
00375 sindx = i * src_dim_strd;
00376 rindx = i * res_dim_strd;
00377 uptr2[rindx] = uptr1[sindx];
00378 }
00379 uptr2 = (_f_int8 *) rptr;
00380 for (j = 0 ; i < extent; i++, j++) {
00381 sindx = i * src_dim_strd;
00382 rindx = j * res_dim_strd;
00383 uptr2[rindx] = uptr1[sindx];
00384 }
00385 #else
00386 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft);
00387 uptr2 = (_f_int8 *) rptr;
00388 uptr1 = (_f_int8 *) sptr + (shft * src_dim_strd);
00389 shft = extent - shft;
00390 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft);
00391 #endif
00392 break;
00393
00394 case DVSUBTYPE_BIT32 :
00395 hptr1 = (_f_int4 *) sptr;
00396 hptr2 = (_f_int4 *) rptr + ((extent - shft) * res_strd[0]);
00397 src_dim_strd = src_strd[0];
00398 res_dim_strd = res_strd[0];
00399 for (i = 0; i < shft; i++) {
00400 sindx = i * src_dim_strd;
00401 rindx = i * res_dim_strd;
00402 hptr2[rindx] = hptr1[sindx];
00403 }
00404 hptr2 = (_f_int4 *) rptr;
00405 for (j = 0 ; i < extent; i++, j++) {
00406 sindx = i * src_dim_strd;
00407 rindx = j * res_dim_strd;
00408 hptr2[rindx] = hptr1[sindx];
00409 }
00410 break;
00411
00412 case DVSUBTYPE_BIT128 :
00413 dptr1 = (_f_real16 *) sptr;
00414 dptr2 = (_f_real16 *) rptr +
00415 ((extent - shft) * res_strd[0]);
00416 src_dim_strd = src_strd[0];
00417 res_dim_strd = res_strd[0];
00418 for (i = 0; i < shft; i++) {
00419 sindx = i * src_dim_strd;
00420 rindx = i * res_dim_strd;
00421 dptr2[rindx] = dptr1[sindx];
00422 }
00423 dptr2 = (_f_real16 *) rptr;
00424 for (j = 0 ; i < extent; i++, j++) {
00425 sindx = i * src_dim_strd;
00426 rindx = j * res_dim_strd;
00427 dptr2[rindx] = dptr1[sindx];
00428 }
00429 break;
00430
00431 #ifdef _F_COMP16
00432 case DVSUBTYPE_BIT256 :
00433 xptr1 = (dblcmplx *) sptr;
00434 xptr2 = (dblcmplx *) rptr + ((extent - shft) * res_strd[0]);
00435 src_dim_strd = src_strd[0];
00436 res_dim_strd = res_strd[0];
00437 for (i = 0; i < shft; i++) {
00438 sindx = i * src_dim_strd;
00439 rindx = i * res_dim_strd;
00440 xptr2[rindx].re = xptr1[sindx].re;
00441 xptr2[rindx].im = xptr1[sindx].im;
00442 }
00443 xptr2 = (dblcmplx *) rptr;
00444 for (j = 0 ; i < extent; i++, j++) {
00445 sindx = i * src_dim_strd;
00446 rindx = j * res_dim_strd;
00447 xptr2[rindx].re = xptr1[sindx].re;
00448 xptr2[rindx].im = xptr1[sindx].im;
00449 }
00450 break;
00451 #endif
00452
00453 default :
00454 _lerror (_LELVL_ABORT, FEINTDTY);
00455 }
00456
00457
00458
00459 } else if (rank == 2) {
00460
00461
00462
00463 if (dim == 0)
00464 non_dim = 1;
00465 else
00466 non_dim = 0;
00467
00468
00469
00470 if (bucketsize > 1 && arithmetic) {
00471 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00472 src_strd[1] = source->dimension[1].stride_mult / bucketsize;
00473 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00474 res_strd[1] = result->dimension[1].stride_mult / bucketsize;
00475 } else {
00476 src_strd[0] = source->dimension[0].stride_mult;
00477 src_strd[1] = source->dimension[1].stride_mult;
00478 res_strd[0] = result->dimension[0].stride_mult;
00479 res_strd[1] = result->dimension[1].stride_mult;
00480 }
00481 shft_strd[0] = shift->dimension[0].stride_mult / shft_size;
00482
00483
00484
00485 src_dim_strd = src_strd[dim];
00486 res_dim_strd = res_strd[dim];
00487
00488
00489
00490 extent = source->dimension[dim].extent;
00491 extlow = -extent;
00492 exthi = 2 * extent;
00493
00494
00495
00496 if (shift->base_addr.a.el_len == 64)
00497 iptr8 = (_f_int8 *) shptr;
00498 else
00499 iptr4 = (_f_int4 *) shptr;
00500 if (shift->n_dim == 1) {
00501 shft_dim = 1;
00502 shft_dim_strd = shift->dimension[0].stride_mult / shft_size;
00503 shindx = 0;
00504 } else {
00505 shft_dim = 0;
00506 if (shift->base_addr.a.el_len == 64)
00507 shft = iptr8[0];
00508 else
00509 shft = iptr4[0];
00510 if (shft >= extent) {
00511 if (shft < exthi)
00512 shft -= extent;
00513 else
00514 shft %= extent;
00515 } else if (shft < 0) {
00516 if (shft >= extlow)
00517 shft += extent;
00518 else {
00519 shft %= extent;
00520 if (shft < 0)
00521 shft += extent;
00522 }
00523 }
00524 }
00525
00526
00527
00528 for (i = 0; i < source->dimension[non_dim].extent; i++) {
00529
00530
00531
00532 if (shft_dim == 1) {
00533 if (shift->base_addr.a.el_len == 64)
00534 shft = iptr8[shindx];
00535 else
00536 shft = iptr4[shindx];
00537 if (shft >= extent) {
00538 if (shft < exthi)
00539 shft -= extent;
00540 else
00541 shft %= extent;
00542 } else if (shft < 0) {
00543 if (shft >= extlow)
00544 shft += extent;
00545 else {
00546 shft %= extent;
00547 if (shft < 0)
00548 shft += extent;
00549 }
00550 }
00551 shindx += shft_dim_strd;
00552 }
00553
00554 switch (subtype) {
00555 case DVSUBTYPE_CHAR :
00556 cptr1 = (char *) cs + (i * src_strd[non_dim]);
00557 cptr2 = (char *) cr + (i * res_strd[non_dim]) +
00558 ((extent - shft) * res_dim_strd);
00559 for (j = 0; j < shft; j++) {
00560 (void) memcpy (cptr2, cptr1, bucketsize);
00561 cptr1 += src_dim_strd;
00562 cptr2 += src_dim_strd;
00563 }
00564 cptr2 = (char *) cr + (i * res_strd[non_dim]);
00565 for ( ; j < extent; j++) {
00566 (void) memcpy (cptr2, cptr1, bucketsize);
00567 cptr1 += src_dim_strd;
00568 cptr2 += src_dim_strd;
00569 }
00570 break;
00571
00572 case DVSUBTYPE_DERIVED :
00573 wptr1 = (_f_int *) sptr +
00574 (i * src_strd[non_dim]);
00575 save_wptr1 = wptr1;
00576 wptr2 = (_f_int *) rptr +
00577 (i * res_strd[non_dim]) +
00578 ((extent - shft) * res_dim_strd);
00579 save_wptr2a = wptr2;
00580 wptr2 = (_f_int *) rptr +
00581 (i * res_strd[non_dim]);
00582 save_wptr2b = wptr2;
00583 for (j = 0; j < bucketsize; j++) {
00584 wptr1 = save_wptr1;
00585 wptr2 = save_wptr2a;
00586 for (k = 0; k < shft; k++) {
00587 sindx = j + (k * src_dim_strd);
00588 rindx = j + (k * res_dim_strd);
00589 wptr2[rindx] = wptr1[sindx];
00590 }
00591 wptr2 = save_wptr2b;
00592 for (l = 0; k < extent; k++, l++) {
00593 sindx = j + (k * src_dim_strd);
00594 rindx = j + (l * res_dim_strd);
00595 wptr2[rindx] = wptr1[sindx];
00596 }
00597 }
00598 break;
00599
00600 case DVSUBTYPE_BIT64 :
00601 uptr1 = (_f_int8 *) sptr +
00602 (i * src_strd[non_dim]);
00603 save_uptr1 = uptr1;
00604 uptr2 = (_f_int8 *) rptr +
00605 (i * res_strd[non_dim]) +
00606 ((extent - shft) * res_dim_strd);
00607 #ifndef CRAY2
00608 for (j = 0; j < shft; j++) {
00609 sindx = j * src_dim_strd;
00610 rindx = j * res_dim_strd;
00611 uptr2[rindx] = uptr1[sindx];
00612 }
00613 uptr2 = (_f_int8 *) rptr +
00614 (i * res_strd[non_dim]);
00615 for (k = 0 ; j < extent; j++, k++) {
00616 sindx = j * src_dim_strd;
00617 rindx = k * res_dim_strd;
00618 uptr2[rindx] = uptr1[sindx];
00619 }
00620 #else
00621 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00622 shft);
00623 uptr2 = (_f_int8 *) rptr +
00624 (i * res_strd[non_dim]);
00625 uptr1 = save_uptr1 + (shft * src_dim_strd);
00626 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00627 extent-shft);
00628 #endif
00629 break;
00630
00631 case DVSUBTYPE_BIT32 :
00632 hptr1 = (_f_int4 *) sptr + (i * src_strd[non_dim]);
00633 hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]) +
00634 ((extent - shft) * res_dim_strd);
00635 for (j = 0; j < shft; j++) {
00636 sindx = j * src_dim_strd;
00637 rindx = j * res_dim_strd;
00638 hptr2[rindx] = hptr1[sindx];
00639 }
00640 hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]);
00641 for (k = 0 ; j < extent; j++, k++) {
00642 sindx = j * src_dim_strd;
00643 rindx = k * res_dim_strd;
00644 hptr2[rindx] = hptr1[sindx];
00645 }
00646 break;
00647
00648 case DVSUBTYPE_BIT128 :
00649 dptr1 = (_f_real16 *) sptr + (i * src_strd[non_dim]);
00650 dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]) +
00651 ((extent - shft) * res_dim_strd);
00652 for (j = 0; j < shft; j++) {
00653 sindx = j * src_dim_strd;
00654 rindx = j * res_dim_strd;
00655 dptr2[rindx] = dptr1[sindx];
00656 }
00657 dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]);
00658 for (k = 0 ; j < extent; j++, k++) {
00659 sindx = j * src_dim_strd;
00660 rindx = k * res_dim_strd;
00661 dptr2[rindx] = dptr1[sindx];
00662 }
00663 break;
00664
00665 #ifdef _F_COMP16
00666 case DVSUBTYPE_BIT256 :
00667 xptr1 = (dblcmplx *) sptr + (i * src_strd[non_dim]);
00668 xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]) +
00669 ((extent - shft) * res_dim_strd);
00670 for (j = 0; j < shft; j++) {
00671 sindx = j * src_dim_strd;
00672 rindx = j * res_dim_strd;
00673 xptr2[rindx].re = xptr1[sindx].re;
00674 xptr2[rindx].im = xptr1[sindx].im;
00675 }
00676 xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]);
00677 for (k = 0 ; j < extent; j++, k++) {
00678 sindx = j * src_dim_strd;
00679 rindx = k * res_dim_strd;
00680 xptr2[rindx].re = xptr1[sindx].re;
00681 xptr2[rindx].im = xptr1[sindx].im;
00682 }
00683 break;
00684 #endif
00685
00686 default :
00687 _lerror (_LELVL_ABORT, FEINTDTY);
00688 }
00689 }
00690
00691
00692
00693 } else {
00694
00695 if (dim == 0) {
00696 i = 0;
00697 tot_ext = 1;
00698 } else
00699 #ifdef _UNICOS
00700 #pragma _CRI shortloop
00701 #endif
00702 for (i = 0, tot_ext = 1; i < dim; i++) {
00703 tot_ext *= source->dimension[i].extent;
00704 src_ext[i] = source->dimension[i].extent;
00705 if (bucketsize > 1 && arithmetic) {
00706 src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00707 res_strd[i] = result->dimension[i].stride_mult / bucketsize;
00708 } else {
00709 src_strd[i] = source->dimension[i].stride_mult;
00710 res_strd[i] = result->dimension[i].stride_mult;
00711 }
00712 }
00713 if (i < (rank - 1))
00714 #ifdef _UNICOS
00715 #pragma _CRI shortloop
00716 #endif
00717 for ( ; i < rank-1; i++) {
00718 tot_ext *= source->dimension[i+1].extent;
00719 src_ext[i] = source->dimension[i+1].extent;
00720 if (bucketsize > 1 && arithmetic) {
00721 src_strd[i] = source->dimension[i+1].stride_mult/bucketsize;
00722 res_strd[i] = result->dimension[i+1].stride_mult/bucketsize;
00723 } else {
00724 src_strd[i] = source->dimension[i+1].stride_mult;
00725 res_strd[i] = result->dimension[i+1].stride_mult;
00726 }
00727 }
00728
00729
00730
00731 extent = source->dimension[dim].extent;
00732 extlow = -extent;
00733 exthi = 2 * extent;
00734
00735 if (bucketsize > 1 && arithmetic) {
00736 src_dim_strd = source->dimension[dim].stride_mult / bucketsize;
00737 res_dim_strd = result->dimension[dim].stride_mult / bucketsize;
00738 } else {
00739 src_dim_strd = source->dimension[dim].stride_mult;
00740 res_dim_strd = result->dimension[dim].stride_mult;
00741 }
00742
00743
00744
00745 if (shift->base_addr.a.el_len == 64)
00746 iptr8 = (_f_int8 *) shptr;
00747 else
00748 iptr4 = (_f_int4 *) shptr;
00749 if (shift->n_dim == 0) {
00750 shft_dim = 0;
00751 if (shift->base_addr.a.el_len == 64)
00752 shft = iptr8[0];
00753 else
00754 shft = iptr4[0];
00755 if (shft >= extent) {
00756 if (shft < exthi)
00757 shft -= extent;
00758 else
00759 shft %= extent;
00760 } else if (shft < 0) {
00761 if (shft >= extlow)
00762 shft += extent;
00763 else {
00764 shft %= extent;
00765 if (shft < 0)
00766 shft += extent;
00767 }
00768 }
00769 } else {
00770 shft_dim = 1;
00771 shindx = 0;
00772 #ifdef _UNICOS
00773 #pragma _CRI shortloop
00774 #endif
00775 for (i = 0; i < rank-1; i++) {
00776 shft_strd[i] = shift->dimension[i].stride_mult;
00777 }
00778 }
00779
00780
00781
00782 for (i = 0; i < rank-1; i++) {
00783 curdim[i] = 0;
00784 src_off[i] = 0;
00785 res_off[i] = 0;
00786 shft_off[i] = 0;
00787 }
00788
00789
00790
00791
00792
00793
00794 for (i = 0; i < tot_ext; i++) {
00795
00796
00797
00798 if (shft_dim) {
00799 switch (rank) {
00800 case 3 :
00801 shindx = shft_off[0] + shft_off[1];
00802 break;
00803 case 4 :
00804 shindx = shft_off[0] + shft_off[1] + shft_off[2];
00805 break;
00806 case 5 :
00807 shindx = shft_off[0] + shft_off[1] +
00808 shft_off[2] + shft_off[3];
00809 break;
00810 case 6 :
00811 shindx = shft_off[0] + shft_off[1] + shft_off[2] +
00812 shft_off[3] + shft_off[4];
00813 break;
00814 default :
00815 shindx = shft_off[0] + shft_off[1] + shft_off[2] +
00816 shft_off[3] + shft_off[4] + shft_off[5];
00817 }
00818 shft = shptr[shindx];
00819 if (shft >= extent) {
00820 if (shft < exthi)
00821 shft -= extent;
00822 else
00823 shft %= extent;
00824 } else if (shft < 0) {
00825 if (shft >= extlow)
00826 shft += extent;
00827 else {
00828 shft %= extent;
00829 if (shft < 0)
00830 shft += extent;
00831 }
00832 }
00833 }
00834
00835 switch (rank) {
00836 case 3 :
00837 sindx = src_off[0] + src_off[1];
00838 rindx = res_off[0] + res_off[1];
00839 break;
00840 case 4 :
00841 sindx = src_off[0] + src_off[1] + src_off[2];
00842 rindx = res_off[0] + res_off[1] + res_off[2];
00843 break;
00844 case 5 :
00845 sindx = src_off[0] + src_off[1] +
00846 src_off[2] + src_off[3];
00847 rindx = res_off[0] + res_off[1] +
00848 res_off[2] + res_off[3];
00849 break;
00850 case 6 :
00851 sindx = src_off[0] + src_off[1] + src_off[2] +
00852 src_off[3] + src_off[4];
00853 rindx = res_off[0] + res_off[1] + res_off[2] +
00854 res_off[3] + res_off[4];
00855 break;
00856 default :
00857 sindx = src_off[0] + src_off[1] + src_off[2] +
00858 src_off[3] + src_off[4] + src_off[5];
00859 rindx = res_off[0] + res_off[1] + res_off[2] +
00860 res_off[3] + res_off[4] + res_off[5];
00861 }
00862
00863 switch (subtype) {
00864 case DVSUBTYPE_CHAR :
00865 cptr1 = (char *) cs + sindx;
00866 cptr2 = (char *) cr + rindx +
00867 ((extent - shft) * res_dim_strd);
00868 for (j = 0; j < shft; j++) {
00869 (void) memcpy (cptr2, cptr1, bucketsize);
00870 cptr1 += src_dim_strd;
00871 cptr2 += res_dim_strd;
00872 }
00873 cptr2 = (char *) cr + rindx;
00874 for ( ; j < extent; j++) {
00875 (void) memcpy (cptr2, cptr1, bucketsize);
00876 cptr1 += src_dim_strd;
00877 cptr2 += res_dim_strd;
00878 }
00879 break;
00880
00881 case DVSUBTYPE_DERIVED :
00882 wptr1 = (_f_int *) sptr + sindx;
00883 save_wptr1 = wptr1;
00884 wptr2 = (_f_int *) rptr + rindx +
00885 ((extent - shft) * res_dim_strd);
00886 save_wptr2a = wptr2;
00887 wptr2 = (_f_int *) rptr + rindx;
00888 save_wptr2b = wptr2;
00889 for (j = 0; j < bucketsize; j++) {
00890 wptr1 = save_wptr1;
00891 wptr2 = save_wptr2a;
00892 for (k = 0; k < shft; k++) {
00893 sindx2 = j + (k * src_dim_strd);
00894 rindx2 = j + (k * res_dim_strd);
00895 wptr2[rindx2] = wptr1[sindx2];
00896 }
00897 wptr2 = save_wptr2b;
00898 for (l = 0; k < extent; k++, l++) {
00899 sindx2 = j + (k * src_dim_strd);
00900 rindx2 = j + (l * res_dim_strd);
00901 wptr2[rindx2] = wptr1[sindx2];
00902 }
00903 }
00904 break;
00905
00906 case DVSUBTYPE_BIT64 :
00907 uptr1 = (_f_int8 *) sptr + sindx;
00908 save_uptr1 = uptr1;
00909 uptr2 = (_f_int8 *) rptr + rindx +
00910 ((extent - shft) * res_dim_strd);
00911 #ifndef CRAY2
00912 for (j = 0; j < shft; j++) {
00913 sindx2 = j * src_dim_strd;
00914 rindx2 = j * res_dim_strd;
00915 uptr2[rindx2] = uptr1[sindx2];
00916 }
00917 uptr2 = (_f_int8 *) rptr + rindx;
00918 for (k = 0 ; j < extent; j++, k++) {
00919 sindx2 = j * src_dim_strd;
00920 rindx2 = k * res_dim_strd;
00921 uptr2[rindx2] = uptr1[sindx2];
00922 }
00923 #else
00924 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00925 shft);
00926 uptr2 = (_f_int8 *) rptr + rindx;
00927 uptr1 = save_uptr1 + (shft * src_dim_strd);
00928 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00929 extent-shft);
00930 #endif
00931 break;
00932
00933 case DVSUBTYPE_BIT32 :
00934 hptr1 = (_f_int4 *) sptr + sindx;
00935 hptr2 = (_f_int4 *) rptr + rindx +
00936 ((extent - shft) * res_dim_strd);
00937 for (j = 0; j < shft; j++) {
00938 sindx2 = j * src_dim_strd;
00939 rindx2 = j * res_dim_strd;
00940 hptr2[rindx2] = hptr1[sindx2];
00941 }
00942 hptr2 = (_f_int4 *) rptr + rindx;
00943 for (k = 0; j < extent; j++, k++) {
00944 sindx2 = j * src_dim_strd;
00945 rindx2 = k * res_dim_strd;
00946 hptr2[rindx2] = hptr1[sindx2];
00947 }
00948 break;
00949
00950 case DVSUBTYPE_BIT128 :
00951 dptr1 = (_f_real16 *) sptr + sindx;
00952 dptr2 = (_f_real16 *) rptr + rindx +
00953 ((extent - shft) * res_dim_strd);
00954 for (j = 0; j < shft; j++) {
00955 sindx2 = j * src_dim_strd;
00956 rindx2 = j * res_dim_strd;
00957 dptr2[rindx2] = dptr1[sindx2];
00958 }
00959 dptr2 = (_f_real16 *) rptr + rindx;
00960 for (k = 0; j < extent; j++, k++) {
00961 sindx2 = j * src_dim_strd;
00962 rindx2 = k * res_dim_strd;
00963 dptr2[rindx2] = dptr1[sindx2];
00964 }
00965 break;
00966
00967 #ifdef _F_COMP16
00968 case DVSUBTYPE_BIT256 :
00969 xptr1 = (dblcmplx *) sptr + sindx;
00970 xptr2 = (dblcmplx *) rptr + rindx +
00971 ((extent - shft) * res_dim_strd);
00972 for (j = 0; j < shft; j++) {
00973 sindx2 = j * src_dim_strd;
00974 rindx2 = j * res_dim_strd;
00975 xptr2[rindx2].re = xptr1[sindx2].re;
00976 xptr2[rindx2].im = xptr1[sindx2].im;
00977 }
00978 xptr2 = (dblcmplx *) rptr + rindx;
00979 for (k = 0; j < extent; j++, k++) {
00980 sindx2 = j * src_dim_strd;
00981 rindx2 = k * res_dim_strd;
00982 xptr2[rindx2].re = xptr1[sindx2].re;
00983 xptr2[rindx2].im = xptr1[sindx2].im;
00984 }
00985 break;
00986 #endif
00987
00988 default :
00989 _lerror (_LELVL_ABORT, FEINTDTY);
00990 }
00991
00992
00993
00994 curdim[0]++;
00995 if (curdim[0] < src_ext[0]) {
00996 src_off[0] += src_strd[0];
00997 res_off[0] += res_strd[0];
00998 shft_off[0] += shft_strd[0];
00999 } else {
01000 curdim[0] = 0;
01001 src_off[0] = 0;
01002 res_off[0] = 0;
01003 shft_off[0] = 0;
01004 curdim[1]++;
01005 if (curdim[1] < src_ext[1]) {
01006 src_off[1] += src_strd[1];
01007 res_off[1] += res_strd[1];
01008 shft_off[1] += shft_strd[1];
01009 } else {
01010 curdim[1] = 0;
01011 src_off[1] = 0;
01012 res_off[1] = 0;
01013 shft_off[1] = 0;
01014 curdim[2]++;
01015 if (curdim[2] < src_ext[2]) {
01016 src_off[2] += src_strd[2];
01017 res_off[2] += res_strd[2];
01018 shft_off[2] += shft_strd[2];
01019 } else {
01020 curdim[2] = 0;
01021 src_off[2] = 0;
01022 res_off[2] = 0;
01023 shft_off[2] = 0;
01024 curdim[3]++;
01025 if (curdim[3] < src_ext[3]) {
01026 src_off[3] += src_strd[3];
01027 res_off[3] += res_strd[3];
01028 shft_off[3] += shft_strd[3];
01029 } else {
01030 curdim[3] = 0;
01031 src_off[3] = 0;
01032 res_off[3] = 0;
01033 shft_off[3] = 0;
01034 curdim[4]++;
01035 if (curdim[4] < src_ext[4]) {
01036 src_off[4] += src_strd[4];
01037 res_off[4] += res_strd[4];
01038 shft_off[4] += shft_strd[4];
01039 } else {
01040 curdim[4] = 0;
01041 src_off[4] = 0;
01042 res_off[4] = 0;
01043 shft_off[4] = 0;
01044 curdim[5]++;
01045 if (curdim[5] < src_ext[5]) {
01046 src_off[5] += src_strd[5];
01047 res_off[5] += res_strd[5];
01048 shft_off[5] += shft_strd[5];
01049 }
01050 }
01051 }
01052 }
01053 }
01054 }
01055 }
01056 }
01057 }