Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/eoshift.c 92.1 07/07/99 15:52:02" 00038 00039 #include <stddef.h> 00040 #include <liberrno.h> 00041 #include <cray/dopevec.h> 00042 #include <cray/portdefs.h> 00043 #include "arraydefs.h" 00044 00045 /* 00046 * EOSHIFT routine. Perform an end-off shift of an array expression of 00047 * rank one or perform end-off shifts on all the complete rank-one 00048 * sections along a given dimension of an array expression of rank two 00049 * or greater. Elements are shifted off at one end of a section and 00050 * copies of a boundary value are shifted in at the other end. Different 00051 * sections may have different boundary values and may be shifted by 00052 * different amounts and in different directions. 00053 */ 00054 00055 /* Declare constants used for default boundary values */ 00056 00057 const _f_int1 defaulti1 = 0; 00058 const _f_int2 defaulti2 = 0; 00059 const _f_int4 defaulti4 = 0; 00060 const _f_int8 defaulti8 = 0; 00061 const _f_log1 defaultl1 = _btol(0); 00062 const _f_log2 defaultl2 = _btol(0); 00063 const _f_log4 defaultl4 = _btol(0); 00064 const _f_log8 defaultl8 = _btol(0); 00065 const _f_real4 defaultr4 = 0.0; 00066 const _f_real8 defaultr8 = 0.0; 00067 00068 #if defined _F_REAL16 && _F_REAL16 != (-1) 00069 #if defined(_WORD32) || defined(__mips) 00070 const _f_comp8 defaultr16 = {0.0,0.0}; 00071 #else 00072 const _f_comp8 defaultr16 = 0.0 + 0.0i; 00073 #endif /* _WORD32 or __mips */ 00074 #endif 00075 00076 #ifdef _F_COMP4 00077 #if _F_COMP4 > 0 00078 const _f_comp4 defaultc4 = 0.0 + 0.0i; 00079 #else 00080 const _f_comp4 defaultc4 = {0.0, 0.0}; 00081 #endif 00082 #endif 00083 00084 #ifdef _F_COMP8 00085 #if _F_COMP8 > 0 00086 const _f_comp8 defaultc8 = 0.0 + 0.0i; 00087 #else 00088 const _f_comp8 defaultc8 = {0.0, 0.0}; 00089 #endif 00090 #endif 00091 00092 #ifdef _F_COMP4 00093 #define BIT64_DEFAULT() \ 00094 if (type == DVTYPE_INTEGER) \ 00095 bnd64 = *(_f_int8 *) &defaulti8; \ 00096 else if (type == DVTYPE_REAL) \ 00097 bnd64 = *(_f_int8 *) &defaultr8; \ 00098 else if (type == DVTYPE_COMPLEX) { \ 00099 bnd64 = *(_f_int8 *) &defaultc4; \ 00100 } else \ 00101 bnd64 = *(_f_int8 *) &defaultl8; 00102 #else 00103 #define BIT64_DEFAULT() \ 00104 if (type == DVTYPE_INTEGER) \ 00105 bnd64 = *(_f_int8 *) &defaulti8; \ 00106 else if (type == DVTYPE_REAL) \ 00107 bnd64 = *(_f_int8 *) &defaultr8; \ 00108 else \ 00109 bnd64 = *(_f_int8 *) &defaultl8; 00110 #endif 00111 00112 #if defined _F_REAL16 00113 #if _F_REAL16 > 1 00114 #define BIT128_DEFAULT() \ 00115 if (type == DVTYPE_REAL) \ 00116 bnd128 = *(_f_comp8 *) &defaultr16; \ 00117 else \ 00118 bnd128 = *(_f_comp8 *) &defaultc8; 00119 #else 00120 #define BIT128_DEFAULT() \ 00121 bnd128 = *(_f_comp8 *) &defaultc8; 00122 #endif 00123 #endif 00124 00125 #ifdef _F_COMP16 00126 #define BIT256_DEFAULT() \ 00127 bnd256.re = *(_f_real16 *) &defaultr16; \ 00128 bnd256.im = *(_f_real16 *) &defaultr16; 00129 #endif 00130 00131 #define BIT32_DEFAULT() \ 00132 if (type == DVTYPE_INTEGER) { \ 00133 bnd32 = *(_f_int4 *) &defaulti4; \ 00134 } else if (type == DVTYPE_REAL) { \ 00135 bnd32 = *(_f_int4 *) &defaultr4; \ 00136 } else { \ 00137 bnd32 = *(_f_int4 *) &defaultl4; \ 00138 } 00139 00140 #define BIT16_DEFAULT() \ 00141 if (type == DVTYPE_INTEGER) { \ 00142 bnd16 = *(_f_int2 *) &defaulti2; \ 00143 } else { \ 00144 bnd16 = *(_f_int2 *) &defaultl2; \ 00145 } 00146 00147 #define BIT8_DEFAULT() \ 00148 if (type == DVTYPE_INTEGER) { \ 00149 bnd8 = *(_f_int1 *) &defaulti1; \ 00150 } else { \ 00151 bnd8 = *(_f_int1 *) &defaultl1; \ 00152 } 00153 00154 00155 #ifdef _UNICOS 00156 #pragma _CRI duplicate _EOSHIFT as EOSHIFT@ 00157 #endif 00158 void 00159 _EOSHIFT (DopeVectorType * result, 00160 DopeVectorType * source, 00161 DopeVectorType * shift, 00162 DopeVectorType * boundary, 00163 _f_int *dimp) 00164 { 00165 char *cs; /* char ptr to source array */ 00166 char *cr; /* char ptr to result array */ 00167 char *cb; /* char ptr to boundary array */ 00168 char * restrict cptr1; /* char */ 00169 char * restrict cptr2; /* char */ 00170 char * restrict cptr3; /* char */ 00171 _f_int8 * restrict uptr1; /* 64-bit */ 00172 _f_int8 * restrict uptr2; /* 64-bit */ 00173 _f_int8 * restrict uptr3; /* 64-bit */ 00174 _f_comp8 * restrict xptr1; /* 128-bit */ 00175 _f_comp8 * restrict xptr2; /* 128-bit */ 00176 _f_comp8 * restrict xptr3; /* 128-bit */ 00177 _f_int * restrict fptr1; /* default word */ 00178 _f_int * restrict fptr2; /* default word */ 00179 _f_int * restrict fptr3; /* default word */ 00180 #ifdef _F_COMP16 00181 dblcmplx * restrict dxptr1; /* 256-bit */ 00182 dblcmplx * restrict dxptr2; /* 256-bit */ 00183 dblcmplx * restrict dxptr3; /* 256-bit */ 00184 #endif 00185 _f_int4 * restrict hptr1; /* 32-bit */ 00186 _f_int4 * restrict hptr2; /* 32-bit */ 00187 _f_int4 * restrict hptr3; /* 32-bit */ 00188 void * restrict sptr; /* ptr to src data area */ 00189 void * restrict rptr; /* ptr to res data area */ 00190 _f_int * restrict shptr; /* ptr to shift data area */ 00191 _f_int * restrict bptr; /* ptr to boundary area */ 00192 _f_int8 * restrict save_uptr1; /* save copy of uptr1 */ 00193 _f_int8 * restrict save_uptr2a; /* save copy of uptr2 */ 00194 _f_int8 * restrict save_uptr2b; /* save copy of fptr2 */ 00195 _f_int * restrict save_fptr1; /* save copy of fptr1 */ 00196 _f_int * restrict save_fptr2a; /* save copy of fptr2 */ 00197 _f_int * restrict save_fptr2b; /* save copy of uptr2 */ 00198 char * restrict save_cptr1; /* save copy of cptr1 */ 00199 char * restrict save_cptr2a; /* save copy of cptr2 */ 00200 char * restrict save_cptr2b; /* save copy of cptr2 */ 00201 _f_comp8 * restrict save_xptr; /* save copy of xptr2 */ 00202 #ifdef _F_COMP16 00203 dblcmplx * restrict save_dxptr; /* save copy of dxptr2 */ 00204 #endif 00205 _f_int4 * restrict save_hptr; /* save copy of hptr2 */ 00206 _f_int4 * restrict save_hptr1; /* save copy of hptr2 */ 00207 _f_int4 * restrict save_hptr2; /* save copy of hptr2 */ 00208 _f_int4 * restrict i4ptr; /* ptr to shift data area */ 00209 _f_int8 * restrict i8ptr; /* ptr to shift data area */ 00210 _f_int bucketsize; /* size of each data element */ 00211 _f_int shft_size; /* size of each strd element */ 00212 _f_int nbytes; /* # of bytes in data area */ 00213 _f_int bytealligned; /* byte alligned flag */ 00214 long sindx; /* source index */ 00215 long sindx2; /* source index */ 00216 long shindx; /* shift index */ 00217 long rindx; /* result array index */ 00218 long rindx2; /* result array index */ 00219 long bindx; /* boundary array index */ 00220 long shft; /* shift value */ 00221 _f_int dim; /* dim value */ 00222 _f_int non_dim; /* non-shift dim for 2x2 */ 00223 long curdim[MAXDIM-1 ]; /* current indices */ 00224 long src_strd[MAXDIM-1]; /* index stride */ 00225 long src_ext[MAXDIM-1]; /* extents for source array */ 00226 long src_off[MAXDIM-1]; /* source offset */ 00227 long res_strd[MAXDIM-1]; /* index stride */ 00228 long res_off[MAXDIM-1]; /* result offset */ 00229 long shft_strd[MAXDIM-1]; /* stride for shift array */ 00230 long shft_off[MAXDIM-1]; /* shift offset */ 00231 long bnd_strd[MAXDIM-1]; /* stride for shift array */ 00232 long bnd_off[MAXDIM-1]; /* shift offset */ 00233 _f_int rank; /* rank of source matrix */ 00234 _f_int type; /* type */ 00235 _f_int subtype; /* sub-type */ 00236 _f_int arithmetic; /* arithmetic data type */ 00237 long extent; /* extent temporary */ 00238 long src_dim_strd; /* stride for source index */ 00239 long res_dim_strd; /* stride for result index */ 00240 long src_use_strd; /* stride for source index */ 00241 long res_use_strd; /* stride for result index */ 00242 long shft_dim; /* shift dimension */ 00243 long shft_dim_strd; /* shift stride */ 00244 _f_int bnd_dim; /* boundary dimension */ 00245 long bnd_dim_strd; /* boundary stride */ 00246 long tot_ext; /* total extent */ 00247 long shft_cnt; /* shift count index */ 00248 long src_tmp; /* temporary value for src */ 00249 long res_tmp; /* temporary value for res */ 00250 _f_int1 bnd8; /* boundary value holder */ 00251 _f_int2 bnd16; /* boundary value holder */ 00252 _f_int4 bnd32; /* boundary value holder */ 00253 _f_int8 bnd64; /* boundary value holder */ 00254 _f_comp8 bnd128; /* boundary value holder */ 00255 #ifdef _F_COMP16 00256 dblcmplx bnd256; /* boundary value holder */ 00257 #endif 00258 long i, j, k, l; /* index variables */ 00259 00260 /* Set type and dimension global variables */ 00261 00262 rank = source->n_dim; 00263 type = source->type_lens.type; 00264 00265 /* Size calculation is based on variable type */ 00266 00267 switch (type) { 00268 case DVTYPE_ASCII : 00269 bytealligned = 1; 00270 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */ 00271 subtype = DVSUBTYPE_CHAR; 00272 arithmetic = 0; 00273 break; 00274 case DVTYPE_DERIVEDBYTE : 00275 bytealligned = 1; 00276 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE; 00277 subtype = DVSUBTYPE_CHAR; 00278 arithmetic = 0; 00279 break; 00280 case DVTYPE_DERIVEDWORD : 00281 bytealligned = 0; 00282 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD; 00283 subtype = DVSUBTYPE_DERIVED; 00284 arithmetic = 0; 00285 break; 00286 default : 00287 bytealligned = 0; 00288 bucketsize = source->type_lens.int_len / BITS_PER_WORD; 00289 if (source->type_lens.int_len == 64) { 00290 subtype = DVSUBTYPE_BIT64; 00291 } else if (source->type_lens.int_len == 32) { 00292 subtype = DVSUBTYPE_BIT32; 00293 bucketsize = 1; 00294 } else if (source->type_lens.int_len == 256) { 00295 subtype = DVSUBTYPE_BIT256; 00296 } else { 00297 subtype = DVSUBTYPE_BIT128; 00298 } 00299 arithmetic = 1; 00300 } 00301 shft_size = shift->type_lens.int_len / BITS_PER_WORD; 00302 #ifdef _CRAYMPP 00303 if (shft_size == 0) 00304 shft_size = 1; 00305 #endif 00306 00307 /* If necessary, fill result dope vector */ 00308 00309 if (!result->assoc) { 00310 result->base_addr.a.ptr = (void *) NULL; 00311 result->orig_base = 0; 00312 result->orig_size = 0; 00313 00314 #ifdef _UNICOS 00315 #pragma _CRI shortloop 00316 #endif 00317 for (i = 0, tot_ext = bucketsize; i < rank; i++) { 00318 result->dimension[i].extent = source->dimension[i].extent; 00319 result->dimension[i].low_bound = 1; 00320 result->dimension[i].stride_mult = tot_ext; 00321 tot_ext *= result->dimension[i].extent; 00322 } 00323 00324 /* Determine size of space to allocate */ 00325 00326 if (!bytealligned) { 00327 nbytes = bucketsize * BYTES_PER_WORD; 00328 #ifdef _CRAYMPP 00329 if (subtype == DVSUBTYPE_BIT32) 00330 nbytes /= 2; 00331 #endif 00332 } else 00333 nbytes = bucketsize; 00334 #ifdef _UNICOS 00335 #pragma _CRI shortloop 00336 #endif 00337 for (i = 0; i < rank; i++) 00338 nbytes *= result->dimension[i].extent; 00339 if (nbytes > 0) { 00340 result->base_addr.a.ptr = (void *) malloc(nbytes); 00341 if (result->base_addr.a.ptr == NULL) 00342 _lerror(_LELVL_ABORT, FENOMEMY); 00343 } 00344 result->orig_base = (void *) result->base_addr.a.ptr; 00345 result->orig_size = nbytes * BITS_PER_BYTE; 00346 00347 result->assoc = 1; 00348 result->base_addr.a.el_len = source->base_addr.a.el_len; 00349 if (type == DVTYPE_ASCII) { 00350 cr = (char *) result->base_addr.a.ptr; 00351 result->base_addr.charptr = _cptofcd (cr, bucketsize); 00352 } 00353 } 00354 00355 /* 00356 * Check to see if any of the matrices have size 0. If any do, 00357 * return without doing anything. 00358 */ 00359 00360 #ifdef _UNICOS 00361 #pragma _CRI shortloop 00362 #endif 00363 for (i = 0; i < rank; i++) { 00364 if (!source->dimension[i].extent) 00365 return; 00366 } 00367 if (result->assoc) { 00368 #ifdef _UNICOS 00369 #pragma _CRI shortloop 00370 #endif 00371 for (i = 0; i < rank; i++) { 00372 if (!result->dimension[i].extent) 00373 return; 00374 } 00375 } 00376 if (shift->n_dim > 1) { 00377 #ifdef _UNICOS 00378 #pragma _CRI shortloop 00379 #endif 00380 for (i = 0; i < rank-1; i++) { 00381 if (!shift->dimension[i].extent) 00382 return; 00383 } 00384 } 00385 if (boundary) { 00386 if (boundary->n_dim > 1) { 00387 #ifdef _UNICOS 00388 #pragma _CRI shortloop 00389 #endif 00390 for (i = 0; i < rank-1; i++) { 00391 if (!boundary->dimension[i].extent) 00392 return; 00393 } 00394 } 00395 } 00396 00397 /* Set up scalar pointers to all of the argument data areas */ 00398 00399 if (!bytealligned) { 00400 sptr = (void *) source->base_addr.a.ptr; 00401 rptr = (void *) result->base_addr.a.ptr; 00402 if (boundary) 00403 bptr = (void *) boundary->base_addr.a.ptr; 00404 } else { 00405 if (type == DVTYPE_ASCII) { 00406 cs = _fcdtocp (source->base_addr.charptr); 00407 cr = _fcdtocp (result->base_addr.charptr); 00408 if (boundary) 00409 cb = _fcdtocp (boundary->base_addr.charptr); 00410 } else { 00411 cs = (char *) source->base_addr.a.ptr; 00412 cr = (char *) result->base_addr.a.ptr; 00413 if (boundary) 00414 cb = (char *) boundary->base_addr.a.ptr; 00415 } 00416 } 00417 shptr = (void *) shift->base_addr.a.ptr; 00418 00419 /* If dim argument is not present, set it to 1 (0 for C) */ 00420 00421 if (dimp == NULL) 00422 dim = 0; 00423 else { 00424 if (*dimp < 1 || *dimp > rank) 00425 _lerror (_LELVL_ABORT, FESCIDIM); 00426 dim = *dimp - 1; 00427 } 00428 00429 /* If source is a 1-dimensional array */ 00430 00431 if (rank == 1) { 00432 00433 /* 00434 * In order to assist vectorization, the following value has been 00435 * taken out of the array variable, and put into a scalar. 00436 */ 00437 00438 extent = source->dimension[0].extent; 00439 00440 /* 00441 * Get shift value which will be used for each loop iteration. 00442 * Once the value is obtained, make sure that it is positive. This 00443 * will ensure that the final calculated value can only be positive, 00444 * thus eliminating a test from the inner loop. 00445 */ 00446 00447 if (shift->type_lens.int_len == 64) { 00448 i8ptr = (_f_int8 *) shptr; 00449 shft = *i8ptr; 00450 } else { 00451 i4ptr = (_f_int4 *) shptr; 00452 shft = *i4ptr; 00453 } 00454 if (shft > extent) 00455 shft = extent; 00456 else if (shft < -extent) 00457 shft = -extent; 00458 00459 /* 00460 * The index calculation for each element of the source and result 00461 * matrices make use of some 'shortcuts'. These involved the use 00462 * of some variables which contain information about indices. One 00463 * of these variables exists for each dimension of the source and 00464 * result matrices. 00465 * 00466 * strd - stride value based in terms of elements rather than 00467 * words. 00468 */ 00469 00470 if (bucketsize > 1 && arithmetic) { 00471 src_dim_strd = source->dimension[0].stride_mult / bucketsize; 00472 res_dim_strd = result->dimension[0].stride_mult / bucketsize; 00473 } else { 00474 src_dim_strd = source->dimension[0].stride_mult; 00475 res_dim_strd = result->dimension[0].stride_mult; 00476 } 00477 00478 /* 00479 * Each data type will be handled with its own inner loop. 00480 */ 00481 00482 switch (subtype) { 00483 case DVSUBTYPE_CHAR : 00484 if (shft >= 0) { 00485 cptr1 = (char *) cs; 00486 cptr2 = (char *) cr + (extent - shft) * bucketsize; 00487 save_cptr2a = (char *) cr; 00488 src_use_strd = src_dim_strd; 00489 res_use_strd = res_dim_strd; 00490 } else { 00491 shft = -shft; 00492 cptr1 = (char *) cs + ((extent - 1) * src_dim_strd); 00493 cptr2 = (char *) cr + ((shft - 1) * res_dim_strd); 00494 save_cptr2a = (char *) cr + 00495 ((extent - 1) * res_dim_strd); 00496 src_use_strd = -src_dim_strd; 00497 res_use_strd = -res_dim_strd; 00498 } 00499 if (boundary) 00500 cptr3 = (char *) cb; 00501 if (shft < extent) 00502 shft_cnt = shft; 00503 else 00504 shft_cnt = extent; 00505 for (i = 0; i < shft_cnt; i++) { 00506 if (boundary) 00507 (void) memcpy (cptr2, cptr3, bucketsize); 00508 else 00509 (void) memset (cptr2, ' ', bucketsize); 00510 cptr1 += src_use_strd; 00511 cptr2 += res_use_strd; 00512 } 00513 cptr2 = save_cptr2a; 00514 for ( ; i < extent; i++) { 00515 (void) memcpy (cptr2, cptr1, bucketsize); 00516 cptr1 += src_use_strd; 00517 cptr2 += res_use_strd; 00518 } 00519 00520 break; 00521 00522 case DVSUBTYPE_DERIVED : 00523 if (shft >= 0) { 00524 save_fptr1 = (_f_int *) sptr; 00525 save_fptr2a = (_f_int *) rptr + 00526 (extent-shft) * bucketsize; 00527 save_fptr2b = (_f_int *) rptr; 00528 src_use_strd = src_dim_strd; 00529 res_use_strd = res_dim_strd; 00530 } else { 00531 shft = -shft; 00532 save_fptr1 = (_f_int *) sptr + 00533 ((extent - 1) * src_dim_strd); 00534 save_fptr2a = (_f_int *) rptr + 00535 ((shft - 1) * res_dim_strd); 00536 save_fptr2b = (_f_int *) rptr + 00537 ((extent - 1) * res_dim_strd); 00538 src_use_strd = -src_dim_strd; 00539 res_use_strd = -res_dim_strd; 00540 } 00541 if (boundary) 00542 fptr3 = (_f_int *) bptr; 00543 if (shft < extent) 00544 shft_cnt = shft; 00545 else 00546 shft_cnt = extent; 00547 for (i = 0; i < bucketsize; i++) { 00548 fptr1 = save_fptr1; 00549 fptr2 = save_fptr2a; 00550 for (j = 0; j < shft_cnt; j++) { 00551 rindx = (j * res_use_strd) + i; 00552 fptr2[rindx] = fptr3[i]; 00553 } 00554 fptr2 = save_fptr2b; 00555 for (k = 0; j < extent; j++, k++) { 00556 sindx = (j * src_use_strd) + i; 00557 rindx = (k * res_use_strd) + i; 00558 fptr2[rindx] = fptr1[sindx]; 00559 } 00560 } 00561 break; 00562 00563 case DVSUBTYPE_BIT64 : 00564 if (shft >= 0) { 00565 uptr1 = (_f_int8 *) sptr; 00566 save_uptr1 = uptr1; 00567 uptr2 = (_f_int8 *) rptr + 00568 ((extent - shft) * res_dim_strd); 00569 save_uptr2b = (_f_int8 *) rptr; 00570 src_use_strd = src_dim_strd; 00571 res_use_strd = res_dim_strd; 00572 } else { 00573 shft = -shft; 00574 uptr1 = (_f_int8 *) sptr + 00575 ((extent - 1) * src_dim_strd); 00576 save_uptr1 = uptr1; 00577 uptr2 = (_f_int8 *) rptr + 00578 ((shft - 1) * res_dim_strd); 00579 save_uptr2b = (_f_int8 *) rptr + 00580 ((extent - 1) * res_dim_strd); 00581 src_use_strd = -src_dim_strd; 00582 res_use_strd = -res_dim_strd; 00583 } 00584 if (shft < extent) 00585 shft_cnt = shft; 00586 else 00587 shft_cnt = extent; 00588 00589 if (boundary) { 00590 uptr3 = (_f_int8 *) bptr; 00591 bnd64 = uptr3[0]; 00592 } else { 00593 BIT64_DEFAULT(); 00594 } 00595 00596 for (i = 0; i < shft_cnt; i++) { 00597 rindx = i * res_use_strd; 00598 uptr2[rindx] = bnd64; 00599 } 00600 uptr2 = save_uptr2b; 00601 #ifndef CRAY2 00602 for (j = 0; i < extent; i++, j++) { 00603 sindx = i * src_use_strd; 00604 rindx = j * res_use_strd; 00605 uptr2[rindx] = uptr1[sindx]; 00606 } 00607 #else 00608 uptr1 = save_uptr1 + (shft_cnt * res_use_strd); 00609 shft_cnt = extent - shft_cnt; 00610 memstride ( uptr2, res_use_strd, 00611 uptr1, src_use_strd, shft_cnt); 00612 #endif 00613 break; 00614 00615 case DVSUBTYPE_BIT32 : 00616 if (shft >= 0) { 00617 hptr1 = (_f_int4 *) sptr; 00618 hptr2 = (_f_int4 *) rptr + 00619 ((extent - shft) * res_dim_strd); 00620 save_hptr = (_f_int4 *) rptr; 00621 src_use_strd = src_dim_strd; 00622 res_use_strd = res_dim_strd; 00623 } else { 00624 shft = -shft; 00625 hptr1 = (_f_int4 *) sptr + 00626 ((extent - 1) * src_dim_strd); 00627 hptr2 = (_f_int4 *) rptr + 00628 ((shft - 1) * res_dim_strd); 00629 save_hptr = (_f_int4 *) rptr + 00630 ((extent - 1) * res_dim_strd); 00631 src_use_strd = -src_dim_strd; 00632 res_use_strd = -res_dim_strd; 00633 } 00634 if (shft < extent) 00635 shft_cnt = shft; 00636 else 00637 shft_cnt = extent; 00638 00639 if (boundary) { 00640 hptr3 = (_f_int4 *) bptr; 00641 bnd32 = hptr3[0]; 00642 } else { 00643 BIT32_DEFAULT(); 00644 } 00645 00646 for (i = 0; i < shft_cnt; i++) { 00647 rindx = i * res_use_strd; 00648 hptr2[rindx] = bnd32; 00649 } 00650 hptr2 = save_hptr; 00651 for (j = 0; i < extent ; i++, j++) { 00652 sindx = i * src_use_strd; 00653 rindx = j * res_use_strd; 00654 hptr2[rindx] = hptr1[sindx]; 00655 } 00656 break; 00657 00658 case DVSUBTYPE_BIT128 : 00659 if (shft >= 0) { 00660 xptr1 = (_f_comp8 *) sptr; 00661 xptr2 = (_f_comp8 *) rptr + 00662 ((extent - shft) * res_dim_strd); 00663 save_xptr = (_f_comp8 *) rptr; 00664 src_use_strd = src_dim_strd; 00665 res_use_strd = res_dim_strd; 00666 } else { 00667 shft = -shft; 00668 xptr1 = (_f_comp8 *) sptr + 00669 ((extent - 1) * src_dim_strd); 00670 xptr2 = (_f_comp8 *) rptr + 00671 ((shft - 1) * res_dim_strd); 00672 save_xptr = (_f_comp8 *) rptr + 00673 ((extent - 1) * res_dim_strd); 00674 src_use_strd = -src_dim_strd; 00675 res_use_strd = -res_dim_strd; 00676 } 00677 if (shft < extent) 00678 shft_cnt = shft; 00679 else 00680 shft_cnt = extent; 00681 00682 if (boundary) { 00683 xptr3 = (_f_comp8 *) bptr; 00684 bnd128 = xptr3[0]; 00685 } else { 00686 BIT128_DEFAULT(); 00687 } 00688 00689 for (i = 0; i < shft_cnt; i++) { 00690 rindx = i * res_use_strd; 00691 xptr2[rindx] = bnd128; 00692 } 00693 xptr2 = save_xptr; 00694 for (j = 0; i < extent ; i++, j++) { 00695 sindx = i * src_use_strd; 00696 rindx = j * res_use_strd; 00697 xptr2[rindx] = xptr1[sindx]; 00698 } 00699 break; 00700 00701 #ifdef _F_COMP16 00702 case DVSUBTYPE_BIT256 : 00703 if (shft >= 0) { 00704 dxptr1 = (dblcmplx *) sptr; 00705 dxptr2 = (dblcmplx *) rptr + 00706 ((extent - shft) * res_dim_strd); 00707 save_dxptr = (dblcmplx *) rptr; 00708 src_use_strd = src_dim_strd; 00709 res_use_strd = res_dim_strd; 00710 } else { 00711 shft = -shft; 00712 dxptr1 = (dblcmplx *) sptr + 00713 ((extent - 1) * src_dim_strd); 00714 dxptr2 = (dblcmplx *) rptr + 00715 ((shft - 1) * res_dim_strd); 00716 save_dxptr = (dblcmplx *) rptr + 00717 ((extent - 1) * res_dim_strd); 00718 src_use_strd = -src_dim_strd; 00719 res_use_strd = -res_dim_strd; 00720 } 00721 if (shft < extent) 00722 shft_cnt = shft; 00723 else 00724 shft_cnt = extent; 00725 00726 if (boundary) { 00727 dxptr3 = (dblcmplx *) bptr; 00728 bnd256.re = dxptr3[0].re; 00729 bnd256.im = dxptr3[0].im; 00730 } else { 00731 BIT256_DEFAULT(); 00732 } 00733 for (i = 0; i < shft_cnt; i++) { 00734 rindx = i * res_use_strd; 00735 dxptr2[rindx].re = bnd256.re; 00736 dxptr2[rindx].im = bnd256.im; 00737 } 00738 dxptr2 = save_dxptr; 00739 for (j = 0; i < extent ; i++, j++) { 00740 sindx = i * src_use_strd; 00741 rindx = j * res_use_strd; 00742 dxptr2[rindx].re = dxptr1[sindx].re; 00743 dxptr2[rindx].im = dxptr1[sindx].im; 00744 } 00745 break; 00746 #endif 00747 00748 default: 00749 _lerror(_LELVL_ABORT, FEINTDTY); 00750 } 00751 00752 /* Arrays with rank of 2 */ 00753 00754 } else if (rank == 2) { 00755 00756 /* Set dimension and non_dimension indices */ 00757 00758 if (dim == 0) 00759 non_dim = 1; 00760 else 00761 non_dim = 0; 00762 00763 /* Set up shortcut variables for both array dimensions */ 00764 00765 if (bucketsize > 1 && arithmetic) { 00766 src_strd[0] = source->dimension[0].stride_mult / bucketsize; 00767 src_strd[1] = source->dimension[1].stride_mult / bucketsize; 00768 res_strd[0] = result->dimension[0].stride_mult / bucketsize; 00769 res_strd[1] = result->dimension[1].stride_mult / bucketsize; 00770 if (boundary) 00771 bnd_strd[0] = boundary->dimension[0].stride_mult/bucketsize; 00772 } else { 00773 src_strd[0] = source->dimension[0].stride_mult; 00774 src_strd[1] = source->dimension[1].stride_mult; 00775 res_strd[0] = result->dimension[0].stride_mult; 00776 res_strd[1] = result->dimension[1].stride_mult; 00777 if (boundary) 00778 bnd_strd[0] = boundary->dimension[0].stride_mult; 00779 } 00780 shft_strd[0] = shift->dimension[0].stride_mult / shft_size; 00781 00782 /* Put information about DIM index in scalars for vectorization */ 00783 00784 src_dim_strd = src_strd[dim]; 00785 res_dim_strd = res_strd[dim]; 00786 if (boundary) 00787 bnd_dim_strd = bnd_strd[0]; 00788 00789 /* Set up extent temporary variables */ 00790 00791 extent = source->dimension[dim].extent; 00792 00793 /* 00794 * The following expressions are used repeatedly in this section, and 00795 * can be put into scalar variables so that they do not have to be 00796 * recalculated every time they are used. 00797 */ 00798 00799 src_tmp = (extent - 1) * src_strd[dim]; 00800 res_tmp = (extent - 1) * res_strd[dim]; 00801 00802 /* Set up shift variables */ 00803 00804 if (shift->n_dim == 1) { 00805 shft_dim = 1; 00806 shft_dim_strd = shift->dimension[0].stride_mult / shft_size; 00807 shindx = 0; 00808 } else { /* scalar value, only get it once */ 00809 shft_dim = 0; 00810 } 00811 if (shift->type_lens.int_len == 64) { 00812 i8ptr = (_f_int8 *) shptr; 00813 shft = i8ptr[0]; 00814 } else { 00815 i4ptr = (_f_int4 *) shptr; 00816 shft = i4ptr[0]; 00817 } 00818 00819 /* Set up boundary index */ 00820 00821 if (boundary) { 00822 bindx = 0; 00823 if (boundary->n_dim > 0) 00824 bnd_dim = 1; 00825 else 00826 bnd_dim = 0; 00827 } 00828 00829 /* Outer loop is for dimension not being shifted */ 00830 00831 for (i = 0; i < source->dimension[non_dim].extent; i++) { 00832 00833 /* Get shift value if it is not a scalar */ 00834 00835 if (shift->type_lens.int_len == 64) { 00836 if (shft_dim == 1) { 00837 shft = i8ptr[shindx]; 00838 shindx += shft_dim_strd; 00839 } else 00840 shft = i8ptr[0]; 00841 } else { 00842 if (shft_dim == 1) { 00843 shft = i4ptr[shindx]; 00844 shindx += shft_dim_strd; 00845 } else 00846 shft = i4ptr[0]; 00847 } 00848 if (shft > extent) 00849 shft = extent; 00850 else if (shft < -extent) 00851 shft = -extent; 00852 00853 switch (subtype) { 00854 case DVSUBTYPE_CHAR : 00855 if (shft >= 0) { 00856 cptr1 = (char *) cs + (i * src_strd[non_dim]); 00857 cptr2 = (char *) cr + (i * res_strd[non_dim]) + 00858 ((extent - shft) * res_strd[dim]); 00859 save_cptr2a = (char *) cr + (i * res_strd[non_dim]); 00860 src_use_strd = src_dim_strd; 00861 res_use_strd = res_dim_strd; 00862 } else { 00863 shft = -shft; 00864 cptr1 = (char *) cs + (i * src_strd[non_dim]) + 00865 src_tmp; 00866 cptr2 = ( char *) cr + 00867 (i * res_strd[non_dim]) + 00868 ((shft - 1) * res_strd[dim]); 00869 save_cptr2a = (char *) cr + (i * res_strd[non_dim]) 00870 + res_tmp; 00871 src_use_strd = -src_dim_strd; 00872 res_use_strd = -res_dim_strd; 00873 } 00874 if (boundary) 00875 cptr3 = (char *) cb + bindx; 00876 00877 if (shft < extent) 00878 shft_cnt = shft; 00879 else 00880 shft_cnt = extent; 00881 for (j = 0; j < shft_cnt; j++) { 00882 if (boundary) 00883 (void) memcpy (cptr2, cptr3, bucketsize); 00884 else 00885 (void) memset (cptr2, ' ', bucketsize); 00886 cptr1 += src_use_strd; 00887 cptr2 += res_use_strd; 00888 } 00889 cptr2 = save_cptr2a; 00890 for ( ; j < extent; j++) { 00891 (void) memcpy (cptr2, cptr1, bucketsize); 00892 cptr1 += src_use_strd; 00893 cptr2 += res_use_strd; 00894 } 00895 break; 00896 00897 case DVSUBTYPE_DERIVED : 00898 if (shft >= 0) { 00899 save_fptr1 = (_f_int *) sptr + 00900 (i * src_strd[non_dim]); 00901 save_fptr2a = (_f_int *) rptr + 00902 (i * res_strd[non_dim]) + 00903 ((extent - shft) * res_dim_strd); 00904 save_fptr2b = (_f_int *) rptr + 00905 (i * res_strd[non_dim]); 00906 src_use_strd = src_dim_strd; 00907 res_use_strd = res_dim_strd; 00908 } else { 00909 shft = -shft; 00910 save_fptr1 = (_f_int *) sptr + 00911 (i * src_strd[non_dim]) + src_tmp; 00912 save_fptr2a = (_f_int *) rptr + 00913 (i * res_strd[non_dim]) + 00914 ((shft - 1) * res_strd[dim]); 00915 save_fptr2b = (_f_int *) rptr + 00916 (i * src_strd[non_dim]) + res_tmp; 00917 src_use_strd = -src_dim_strd; 00918 res_use_strd = -res_dim_strd; 00919 } 00920 fptr3 = (_f_int *) bptr + bindx; 00921 00922 if (shft < extent) 00923 shft_cnt = shft; 00924 else 00925 shft_cnt = extent; 00926 00927 for (j = 0; j < bucketsize; j++) { 00928 fptr1 = save_fptr1; 00929 fptr2 = save_fptr2a; 00930 for (k = 0; k < shft_cnt; k++) { 00931 rindx = (k * res_use_strd) + j; 00932 fptr2[rindx] = fptr3[j]; 00933 } 00934 fptr2 = save_fptr2b; 00935 for (l = 0; k < extent; k++, l++) { 00936 sindx = (k * src_use_strd) + j; 00937 rindx = (l * res_use_strd) + j; 00938 fptr2[rindx] = fptr1[sindx]; 00939 } 00940 } 00941 break; 00942 00943 case DVSUBTYPE_BIT64 : 00944 if (shft >= 0) { 00945 uptr1 = (_f_int8 *) sptr + 00946 (i * src_strd[non_dim]); 00947 save_uptr1 = uptr1; 00948 uptr2 = (_f_int8 *) rptr + 00949 (i * res_strd[non_dim]) + 00950 ((extent - shft) * res_dim_strd); 00951 save_uptr2b = (_f_int8 *) rptr + 00952 (i * res_strd[non_dim]); 00953 src_use_strd = src_dim_strd; 00954 res_use_strd = res_dim_strd; 00955 } else { 00956 shft = -shft; 00957 uptr1 = (_f_int8 *) sptr + 00958 (i * src_strd[non_dim]) + src_tmp; 00959 save_uptr1 = uptr1; 00960 uptr2 = (_f_int8 *) rptr + 00961 (i * res_strd[non_dim]) + 00962 ((shft - 1) * res_strd[dim]); 00963 save_uptr2b = (_f_int8 *) rptr + 00964 (i * res_strd[non_dim]) + res_tmp; 00965 src_use_strd = -src_dim_strd; 00966 res_use_strd = -res_dim_strd; 00967 } 00968 00969 if (shft < extent) 00970 shft_cnt = shft; 00971 else 00972 shft_cnt = extent; 00973 00974 if (boundary) { 00975 uptr3 = (_f_int8 *) bptr + bindx; 00976 bnd64 = uptr3[0]; 00977 } else { 00978 BIT64_DEFAULT(); 00979 } 00980 00981 for (j = 0; j < shft_cnt; j++) { 00982 rindx = (j * res_use_strd); 00983 uptr2[rindx] = bnd64; 00984 } 00985 uptr2 = save_uptr2b; 00986 #ifndef CRAY2 00987 for (k = 0; j < extent; j++, k++) { 00988 sindx = (j * src_use_strd); 00989 rindx = (k * res_use_strd); 00990 uptr2[rindx] = uptr1[sindx]; 00991 } 00992 #else 00993 uptr1 = save_uptr1 + (shft_cnt * src_use_strd); 00994 shft_cnt = extent - shft_cnt; 00995 memstride ( uptr2, res_use_strd, 00996 uptr1, src_use_strd, shft_cnt); 00997 #endif 00998 break; 00999 01000 case DVSUBTYPE_BIT32 : 01001 if (shft >= 0) { 01002 hptr1 = (_f_int4 *) sptr + 01003 (i * src_strd[non_dim]); 01004 hptr2 = (_f_int4 *) rptr + 01005 (i * res_strd[non_dim]) + 01006 ((extent - shft) * res_dim_strd); 01007 save_hptr = (_f_int4 *) rptr + 01008 (i * res_strd[non_dim]); 01009 src_use_strd = src_dim_strd; 01010 res_use_strd = res_dim_strd; 01011 } else { 01012 shft = -shft; 01013 hptr1 = (_f_int4 *) sptr + 01014 (i * src_strd[non_dim]) + src_tmp; 01015 hptr2 = (_f_int4 *) rptr + 01016 (i * res_strd[non_dim]) + 01017 ((shft - 1) * res_strd[dim]); 01018 save_hptr = (_f_int4 *) rptr + 01019 (i * res_strd[non_dim]) + res_tmp; 01020 src_use_strd = -src_dim_strd; 01021 res_use_strd = -res_dim_strd; 01022 } 01023 01024 if (shft < extent) 01025 shft_cnt = shft; 01026 else 01027 shft_cnt = extent; 01028 01029 if (boundary) { 01030 hptr3 = (_f_int4 *) bptr + bindx; 01031 bnd32 = hptr3[0]; 01032 } else { 01033 BIT32_DEFAULT(); 01034 } 01035 01036 for (j = 0; j < shft_cnt; j++) { 01037 rindx = j * res_use_strd; 01038 hptr2[rindx] = bnd32; 01039 } 01040 hptr2 = save_hptr; 01041 for (k = 0; j < extent; j++, k++) { 01042 sindx = j * src_use_strd; 01043 rindx = k * res_use_strd; 01044 hptr2[rindx] = hptr1[sindx]; 01045 } 01046 break; 01047 01048 case DVSUBTYPE_BIT128 : 01049 if (shft >= 0) { 01050 xptr1 = (_f_comp8 *) sptr + 01051 (i * src_strd[non_dim]); 01052 xptr2 = (_f_comp8 *) rptr + 01053 (i * res_strd[non_dim]) + 01054 ((extent - shft) * res_dim_strd); 01055 save_xptr = (_f_comp8 *) rptr + 01056 (i * res_strd[non_dim]); 01057 src_use_strd = src_dim_strd; 01058 res_use_strd = res_dim_strd; 01059 } else { 01060 shft = -shft; 01061 xptr1 = (_f_comp8 *) sptr + 01062 (i * src_strd[non_dim]) + src_tmp; 01063 xptr2 = (_f_comp8 *) rptr + 01064 (i * res_strd[non_dim]) + 01065 ((shft - 1) * res_strd[dim]); 01066 save_xptr = (_f_comp8 *) rptr + 01067 (i * res_strd[non_dim]) + res_tmp; 01068 src_use_strd = -src_dim_strd; 01069 res_use_strd = -res_dim_strd; 01070 } 01071 01072 if (shft < extent) 01073 shft_cnt = shft; 01074 else 01075 shft_cnt = extent; 01076 01077 if (boundary) { 01078 xptr3 = (_f_comp8 *) bptr + bindx; 01079 bnd128 = xptr3[0]; 01080 } else { 01081 BIT128_DEFAULT(); 01082 } 01083 01084 for (j = 0; j < shft_cnt; j++) { 01085 rindx = j * res_use_strd; 01086 xptr2[rindx] = bnd128; 01087 } 01088 xptr2 = save_xptr; 01089 for (k = 0; j < extent; j++, k++) { 01090 sindx = j * src_use_strd; 01091 rindx = k * res_use_strd; 01092 xptr2[rindx] = xptr1[sindx]; 01093 } 01094 break; 01095 01096 #ifdef _F_COMP16 01097 case DVSUBTYPE_BIT256 : 01098 if (shft >= 0) { 01099 dxptr1 = (dblcmplx *) sptr + 01100 (i * src_strd[non_dim]); 01101 dxptr2 = (dblcmplx *) rptr + 01102 (i * res_strd[non_dim]) + 01103 ((extent - shft) * res_dim_strd); 01104 save_dxptr = (dblcmplx *) rptr + 01105 (i * res_strd[non_dim]); 01106 src_use_strd = src_dim_strd; 01107 res_use_strd = res_dim_strd; 01108 } else { 01109 shft = -shft; 01110 dxptr1 = (dblcmplx *) sptr + 01111 (i * src_strd[non_dim]) + src_tmp; 01112 dxptr2 = (dblcmplx *) rptr + 01113 (i * res_strd[non_dim]) + 01114 ((shft - 1) * res_strd[dim]); 01115 save_dxptr = (dblcmplx *) rptr + 01116 (i * res_strd[non_dim]) + res_tmp; 01117 src_use_strd = -src_dim_strd; 01118 res_use_strd = -res_dim_strd; 01119 } 01120 01121 if (shft < extent) 01122 shft_cnt = shft; 01123 else 01124 shft_cnt = extent; 01125 01126 if (boundary) { 01127 dxptr3 = (dblcmplx *) bptr + bindx; 01128 bnd256.re = dxptr3[0].re; 01129 bnd256.im = dxptr3[0].im; 01130 } else { 01131 BIT256_DEFAULT(); 01132 } 01133 01134 for (j = 0; j < shft_cnt; j++) { 01135 rindx = j * res_use_strd; 01136 dxptr2[rindx].re = bnd256.re; 01137 dxptr2[rindx].im = bnd256.im; 01138 } 01139 dxptr2 = save_dxptr; 01140 for (k = 0; j < extent; j++, k++) { 01141 sindx = j * src_use_strd; 01142 rindx = k * res_use_strd; 01143 dxptr2[rindx].re = dxptr1[sindx].re; 01144 dxptr2[rindx].im = dxptr1[sindx].im; 01145 } 01146 break; 01147 #endif 01148 01149 default : 01150 _lerror(_LELVL_ABORT, FEINTDTY); 01151 } 01152 01153 /* Increment boundary stride index */ 01154 01155 if (bnd_dim) 01156 bindx += bnd_dim_strd; 01157 } 01158 01159 /* Arrays with rank of 3-7 */ 01160 01161 } else { 01162 01163 if (dim == 0) { 01164 i = 0; 01165 tot_ext = 1; 01166 } else 01167 #ifdef _UNICOS 01168 #pragma _CRI shortloop 01169 #endif 01170 for (i = 0, tot_ext = 1; i < dim; i++) { 01171 tot_ext *= source->dimension[i].extent; 01172 src_ext[i] = source->dimension[i].extent; 01173 if (bucketsize > 1 && arithmetic) { 01174 src_strd[i] = source->dimension[i].stride_mult / bucketsize; 01175 res_strd[i] = result->dimension[i].stride_mult / bucketsize; 01176 if (boundary) 01177 bnd_strd[i] = 01178 boundary->dimension[i].stride_mult / bucketsize; 01179 else 01180 bnd_strd[i] = 0; 01181 } else { 01182 src_strd[i] = source->dimension[i].stride_mult; 01183 res_strd[i] = result->dimension[i].stride_mult; 01184 if (boundary) 01185 bnd_strd[i] = boundary->dimension[i].stride_mult; 01186 else 01187 bnd_strd[i] = 0; 01188 } 01189 } 01190 if (i < (rank - 1)) 01191 #ifdef _UNICOS 01192 #pragma _CRI shortloop 01193 #endif 01194 for ( ; i < rank-1; i++) { 01195 tot_ext *= source->dimension[i+1].extent; 01196 src_ext[i] = source->dimension[i+1].extent; 01197 if (bucketsize > 1 && arithmetic) { 01198 src_strd[i] = source->dimension[i+1].stride_mult/bucketsize; 01199 res_strd[i] = result->dimension[i+1].stride_mult/bucketsize; 01200 if (boundary) 01201 bnd_strd[i] = 01202 boundary->dimension[i].stride_mult / bucketsize; 01203 else 01204 bnd_strd[i] = 0; 01205 } else { 01206 src_strd[i] = source->dimension[i+1].stride_mult; 01207 res_strd[i] = result->dimension[i+1].stride_mult; 01208 if (boundary) 01209 bnd_strd[i] = boundary->dimension[i].stride_mult; 01210 else 01211 bnd_strd[i] = 0; 01212 } 01213 } 01214 01215 /* Initialize all counters to 0 */ 01216 01217 #ifdef _UNICOS 01218 #pragma _CRI shortloop 01219 #endif 01220 for (i = 0; i < MAXDIM-1; i++) { 01221 src_off[i] = 0; 01222 res_off[i] = 0; 01223 bnd_off[i] = 0; 01224 shft_off[i] = 0; 01225 shft_strd[i] = 0; 01226 curdim[i] = 0; 01227 } 01228 01229 /* Set up some scalars which contain information about DIM */ 01230 01231 extent = source->dimension[dim].extent; 01232 01233 if (bucketsize > 1 && arithmetic) { 01234 src_dim_strd = source->dimension[dim].stride_mult / bucketsize; 01235 res_dim_strd = result->dimension[dim].stride_mult / bucketsize; 01236 } else { 01237 src_dim_strd = source->dimension[dim].stride_mult; 01238 res_dim_strd = result->dimension[dim].stride_mult; 01239 } 01240 01241 /* 01242 * The following expressions are used throughout this loop. They can 01243 * be put into scalars to avoid recalculating them every time they are 01244 * used. 01245 */ 01246 01247 src_tmp = (extent - 1) * src_dim_strd; 01248 res_tmp = (extent - 1) * res_dim_strd; 01249 01250 /* Set up the shift variables */ 01251 01252 if (shift->n_dim == 0) { 01253 shft_dim = 0; 01254 } else { 01255 shft_dim = 1; 01256 shindx = 0; 01257 #ifdef _UNICOS 01258 #pragma _CRI shortloop 01259 #endif 01260 for (i = 0; i < rank-1; i++) { 01261 shft_strd[i] = shift->dimension[i].stride_mult / shft_size; 01262 } 01263 } 01264 01265 /* Initialize correct pointer type to shift data */ 01266 01267 if (shift->type_lens.int_len == 64) { 01268 i8ptr = (_f_int8 *) shptr; 01269 shft = i8ptr[0]; 01270 } else { 01271 i4ptr = (_f_int4 *) shptr; 01272 shft = i4ptr[0]; 01273 } 01274 01275 /* Set up boundary values */ 01276 01277 if (boundary) { 01278 bindx = 0; 01279 if (boundary->n_dim > 0) 01280 bnd_dim = 1; 01281 else 01282 bnd_dim = 0; 01283 } 01284 01285 /* 01286 * The outer loop will be executed once for each combination of 01287 * indices not including the DIM dimension. 01288 */ 01289 01290 for (i = 0; i < tot_ext; i++) { 01291 01292 /* Calculate the shift value used throughout the inner loop */ 01293 01294 if (shft_dim) { 01295 switch (rank) { 01296 case 3 : 01297 shindx = shft_off[0] + shft_off[1]; 01298 break; 01299 case 4 : 01300 shindx = shft_off[0] + shft_off[1] + shft_off[2]; 01301 break; 01302 case 5 : 01303 shindx = shft_off[0] + shft_off[1] + 01304 shft_off[2] + shft_off[3]; 01305 break; 01306 case 6 : 01307 shindx = shft_off[0] + shft_off[1] + shft_off[2] + 01308 shft_off[3] + shft_off[4]; 01309 break; 01310 default : 01311 shindx = shft_off[0] + shft_off[1] + shft_off[2] + 01312 shft_off[3] + shft_off[4] + shft_off[5]; 01313 } 01314 if (shift->type_lens.int_len == 64) 01315 shft = i8ptr[shindx]; 01316 else 01317 shft = i4ptr[shindx]; 01318 } else 01319 if (shift->type_lens.int_len == 64) 01320 shft = i8ptr[0]; 01321 else 01322 shft = i4ptr[0]; 01323 if (shft > extent) 01324 shft = extent; 01325 else if (shft < -extent) 01326 shft = -extent; 01327 01328 /* If necessary, get boundary index */ 01329 01330 if (bnd_dim) { 01331 switch (rank) { 01332 case 3 : 01333 bindx = bnd_off[0] + bnd_off[1]; 01334 break; 01335 case 4 : 01336 bindx = bnd_off[0] + bnd_off[1] + bnd_off[2]; 01337 break; 01338 case 5 : 01339 bindx = bnd_off[0] + bnd_off[1] + 01340 bnd_off[2] + bnd_off[3]; 01341 break; 01342 case 6 : 01343 bindx = bnd_off[0] + bnd_off[1] + bnd_off[2] + 01344 bnd_off[3] + bnd_off[4]; 01345 break; 01346 default : 01347 bindx = bnd_off[0] + bnd_off[1] + bnd_off[2] + 01348 bnd_off[3] + bnd_off[4] + bnd_off[5]; 01349 } 01350 } 01351 01352 /* Calculate source and result index values */ 01353 01354 switch (rank) { 01355 case 3 : 01356 sindx = src_off[0] + src_off[1]; 01357 rindx = res_off[0] + res_off[1]; 01358 break; 01359 case 4 : 01360 sindx = src_off[0] + src_off[1] + src_off[2]; 01361 rindx = res_off[0] + res_off[1] + res_off[2]; 01362 break; 01363 case 5 : 01364 sindx = src_off[0] + src_off[1] + 01365 src_off[2] + src_off[3]; 01366 rindx = res_off[0] + res_off[1] + 01367 res_off[2] + res_off[3]; 01368 break; 01369 case 6 : 01370 sindx = src_off[0] + src_off[1] + src_off[2] + 01371 src_off[3] + src_off[4]; 01372 rindx = res_off[0] + res_off[1] + res_off[2] + 01373 res_off[3] + res_off[4]; 01374 break; 01375 default : 01376 sindx = src_off[0] + src_off[1] + src_off[2] + 01377 src_off[3] + src_off[4] + src_off[5]; 01378 rindx = res_off[0] + res_off[1] + res_off[2] + 01379 res_off[3] + res_off[4] + res_off[5]; 01380 } 01381 01382 switch (subtype) { 01383 case DVSUBTYPE_CHAR : 01384 if (shft >= 0) { 01385 cptr1 = (char *) cs + sindx; 01386 save_cptr1 = cptr1; 01387 cptr2 = (char *) cr + rindx + 01388 ((extent - shft) * res_dim_strd); 01389 save_cptr2a = cptr2; 01390 save_cptr2b = (char *) cr + rindx; 01391 src_use_strd = src_dim_strd; 01392 res_use_strd = res_dim_strd; 01393 } else { 01394 shft = -shft; 01395 cptr1 = (char *) cs + sindx + src_tmp; 01396 save_cptr1 = cptr1; 01397 cptr2 = (char *) cr + rindx + 01398 ((shft - 1) * res_dim_strd); 01399 save_cptr2a = cptr2; 01400 save_cptr2b = (char *) cr + rindx + res_tmp; 01401 src_use_strd = -src_dim_strd; 01402 res_use_strd = -res_dim_strd; 01403 } 01404 01405 if (boundary) 01406 cptr3 = (char *) cb + bindx; 01407 01408 if (shft < extent) 01409 shft_cnt = shft; 01410 else 01411 shft_cnt = extent; 01412 for (j = 0; j < shft_cnt; j++) { 01413 cptr2 = save_cptr2a + (j * res_use_strd); 01414 if (boundary) 01415 (void) memcpy (cptr2, cptr3, bucketsize); 01416 else 01417 (void) memset (cptr2, ' ', bucketsize); 01418 } 01419 cptr2 = save_cptr2b; 01420 for (k = 0; j < extent; j++, k++) { 01421 cptr1 = save_cptr1 + (j * src_use_strd); 01422 cptr2 = save_cptr2b + (k * res_use_strd); 01423 (void) memcpy (cptr2, cptr1, bucketsize); 01424 } 01425 break; 01426 01427 case DVSUBTYPE_DERIVED : 01428 if (shft >= 0) { 01429 save_fptr1 = (_f_int *) sptr + sindx; 01430 save_fptr2a = (_f_int *) rptr + rindx + 01431 ((extent - shft) * res_dim_strd); 01432 save_fptr2b = (_f_int *) rptr + rindx; 01433 src_use_strd = src_dim_strd; 01434 res_use_strd = res_dim_strd; 01435 } else { 01436 shft = -shft; 01437 save_fptr1 = (_f_int *) sptr + sindx + 01438 src_tmp; 01439 save_fptr2a = (_f_int *) rptr + rindx + 01440 ((shft - 1) * res_dim_strd); 01441 save_fptr2b = (_f_int *) rptr + rindx + 01442 res_tmp; 01443 src_use_strd = -src_dim_strd; 01444 res_use_strd = -res_dim_strd; 01445 } 01446 fptr3 = (_f_int *) bptr + bindx; 01447 01448 if (shft < extent) 01449 shft_cnt = shft; 01450 else 01451 shft_cnt = extent; 01452 01453 for (j = 0; j < bucketsize; j++) { 01454 fptr1 = save_fptr1; 01455 fptr2 = save_fptr2a; 01456 for (k = 0; k < shft_cnt; k++) { 01457 rindx2 = (k * res_use_strd) + j; 01458 fptr2[rindx2] = fptr3[j]; 01459 } 01460 fptr2 = save_fptr2b; 01461 for (l = 0; k < extent; k++, l++) { 01462 sindx2 = (k * src_use_strd) + j; 01463 rindx2 = (l * res_use_strd) + j; 01464 fptr2[rindx2] = fptr1[sindx2]; 01465 } 01466 } 01467 break; 01468 01469 case DVSUBTYPE_BIT64 : 01470 if (shft >= 0) { 01471 uptr1 = (_f_int8 *) sptr + sindx; 01472 save_uptr1 = uptr1; 01473 uptr2 = (_f_int8 *) rptr + rindx + 01474 ((extent - shft) * res_dim_strd); 01475 save_uptr2b = (_f_int8 *) rptr + rindx; 01476 src_use_strd = src_dim_strd; 01477 res_use_strd = res_dim_strd; 01478 } else { 01479 shft = -shft; 01480 uptr1 = (_f_int8 *) sptr + sindx + src_tmp; 01481 save_uptr1 = uptr1; 01482 uptr2 = (_f_int8 *) rptr + rindx + 01483 ((shft - 1) * res_dim_strd); 01484 save_uptr2b = (_f_int8 *) rptr + rindx + 01485 res_tmp; 01486 src_use_strd = -src_dim_strd; 01487 res_use_strd = -res_dim_strd; 01488 } 01489 01490 if (shft < extent) 01491 shft_cnt = shft; 01492 else 01493 shft_cnt = extent; 01494 01495 if (boundary) { 01496 uptr3 = (_f_int8 *) bptr + bindx; 01497 bnd64 = uptr3[0]; 01498 } else { 01499 BIT64_DEFAULT(); 01500 } 01501 01502 for (j = 0; j < shft_cnt; j++) { 01503 rindx2 = j * res_use_strd; 01504 uptr2[rindx2] = bnd64; 01505 } 01506 uptr2 = save_uptr2b; 01507 #ifndef CRAY2 01508 for (k = 0; j < extent; j++, k++) { 01509 sindx2 = j * src_use_strd; 01510 rindx2 = k * res_use_strd; 01511 uptr2[rindx2] = uptr1[sindx2]; 01512 } 01513 #else 01514 uptr1 = save_uptr1 + (shft_cnt * src_use_strd); 01515 shft_cnt = extent - shft_cnt; 01516 memstride ( uptr2, res_use_strd, 01517 uptr1, src_use_strd, shft_cnt); 01518 #endif 01519 break; 01520 01521 case DVSUBTYPE_BIT32 : 01522 if (shft >= 0) { 01523 hptr1 = (_f_int4 *) sptr + sindx; 01524 hptr2 = (_f_int4 *) rptr + rindx + 01525 ((extent - shft) * res_dim_strd); 01526 save_hptr = (_f_int4 *) rptr + rindx; 01527 src_use_strd = src_dim_strd; 01528 res_use_strd = res_dim_strd; 01529 } else { 01530 shft = -shft; 01531 hptr1 = (_f_int4 *) sptr + sindx + src_tmp; 01532 hptr2 = (_f_int4 *) rptr + rindx + 01533 ((shft - 1) * res_dim_strd); 01534 save_hptr = (_f_int4 *) rptr + rindx + res_tmp; 01535 src_use_strd = -src_dim_strd; 01536 res_use_strd = -res_dim_strd; 01537 } 01538 01539 if (shft < extent) 01540 shft_cnt = shft; 01541 else 01542 shft_cnt = extent; 01543 01544 if (boundary) { 01545 hptr3 = (_f_int4 *) bptr + bindx; 01546 bnd32 = hptr3[0]; 01547 } else { 01548 BIT32_DEFAULT(); 01549 } 01550 01551 for (j = 0; j < shft_cnt; j++) { 01552 rindx2 = j * res_use_strd; 01553 hptr2[rindx2] = bnd32; 01554 } 01555 hptr2 = save_hptr; 01556 for (k = 0; j < extent; j++, k++) { 01557 sindx2 = j * src_use_strd; 01558 rindx2 = k * res_use_strd; 01559 hptr2[rindx2] = hptr1[sindx2]; 01560 } 01561 break; 01562 01563 case DVSUBTYPE_BIT128 : 01564 if (shft >= 0) { 01565 xptr1 = (_f_comp8 *) sptr + sindx; 01566 xptr2 = (_f_comp8 *) rptr + rindx + 01567 ((extent - shft) * res_dim_strd); 01568 save_xptr = (_f_comp8 *) rptr + rindx; 01569 src_use_strd = src_dim_strd; 01570 res_use_strd = res_dim_strd; 01571 } else { 01572 shft = -shft; 01573 xptr1 = (_f_comp8 *) sptr + sindx + src_tmp; 01574 xptr2 = (_f_comp8 *) rptr + rindx + 01575 ((shft - 1) * res_dim_strd); 01576 save_xptr = (_f_comp8 *) rptr + rindx + res_tmp; 01577 src_use_strd = -src_dim_strd; 01578 res_use_strd = -res_dim_strd; 01579 } 01580 01581 if (shft < extent) 01582 shft_cnt = shft; 01583 else 01584 shft_cnt = extent; 01585 01586 if (boundary) { 01587 xptr3 = (_f_comp8 *) bptr + bindx; 01588 bnd128 = xptr3[0]; 01589 } else { 01590 BIT128_DEFAULT(); 01591 } 01592 01593 for (j = 0; j < shft_cnt; j++) { 01594 rindx2 = j * res_use_strd; 01595 xptr2[rindx2] = bnd128; 01596 } 01597 xptr2 = save_xptr; 01598 for (k = 0; j < extent; j++, k++) { 01599 sindx2 = j * src_use_strd; 01600 rindx2 = k * res_use_strd; 01601 xptr2[rindx2] = xptr1[sindx2]; 01602 } 01603 break; 01604 01605 #ifdef _F_COMP16 01606 case DVSUBTYPE_BIT256 : 01607 if (shft >= 0) { 01608 dxptr1 = (dblcmplx *) sptr + sindx; 01609 dxptr2 = (dblcmplx *) rptr + rindx + 01610 ((extent - shft) * res_dim_strd); 01611 save_dxptr = (dblcmplx *) rptr + rindx; 01612 src_use_strd = src_dim_strd; 01613 res_use_strd = res_dim_strd; 01614 } else { 01615 shft = -shft; 01616 dxptr1 = (dblcmplx *) sptr + sindx + src_tmp; 01617 dxptr2 = (dblcmplx *) rptr + rindx + 01618 ((shft - 1) * res_dim_strd); 01619 save_dxptr = (dblcmplx *) rptr + rindx + res_tmp; 01620 src_use_strd = -src_dim_strd; 01621 res_use_strd = -res_dim_strd; 01622 } 01623 01624 if (shft < extent) 01625 shft_cnt = shft; 01626 else 01627 shft_cnt = extent; 01628 01629 if (boundary) { 01630 dxptr3 = (dblcmplx *) bptr + bindx; 01631 bnd256.re = dxptr3[0].re; 01632 bnd256.im = dxptr3[0].im; 01633 } else { 01634 BIT256_DEFAULT(); 01635 } 01636 01637 for (j = 0; j < shft_cnt; j++) { 01638 rindx2 = j * res_use_strd; 01639 dxptr2[rindx2].re = bnd256.re; 01640 dxptr2[rindx2].im = bnd256.im; 01641 } 01642 dxptr2 = save_dxptr; 01643 for (k = 0; j < extent; j++, k++) { 01644 sindx2 = j * src_use_strd; 01645 rindx2 = k * res_use_strd; 01646 dxptr2[rindx2].re = dxptr1[sindx2].re; 01647 dxptr2[rindx2].im = dxptr1[sindx2].im; 01648 } 01649 break; 01650 #endif 01651 01652 default : 01653 _lerror(_LELVL_ABORT, FEINTDTY); 01654 } 01655 01656 /* Increment the current dimension counter. */ 01657 01658 curdim[0]++; 01659 if (curdim[0] < src_ext[0]) { 01660 src_off[0] += src_strd[0]; 01661 res_off[0] += res_strd[0]; 01662 shft_off[0] += shft_strd[0]; 01663 bnd_off[0] += bnd_strd[0]; 01664 } else { 01665 curdim[0] = 0; 01666 src_off[0] = 0; 01667 res_off[0] = 0; 01668 shft_off[0] = 0; 01669 bnd_off[0] = 0; 01670 curdim[1]++; 01671 if (curdim[1] < src_ext[1]) { 01672 src_off[1] += src_strd[1]; 01673 res_off[1] += res_strd[1]; 01674 shft_off[1] += shft_strd[1]; 01675 bnd_off[1] += bnd_strd[1]; 01676 } else { 01677 curdim[1] = 0; 01678 src_off[1] = 0; 01679 res_off[1] = 0; 01680 shft_off[1] = 0; 01681 bnd_off[1] = 0; 01682 curdim[2]++; 01683 if (curdim[2] < src_ext[2]) { 01684 src_off[2] += src_strd[2]; 01685 res_off[2] += res_strd[2]; 01686 shft_off[2] += shft_strd[2]; 01687 bnd_off[2] += bnd_strd[2]; 01688 } else { 01689 curdim[2] = 0; 01690 src_off[2] = 0; 01691 res_off[2] = 0; 01692 shft_off[2] = 0; 01693 bnd_off[2] = 0; 01694 curdim[3]++; 01695 if (curdim[3] < src_ext[3]) { 01696 src_off[3] += src_strd[3]; 01697 res_off[3] += res_strd[3]; 01698 shft_off[3] += shft_strd[3]; 01699 bnd_off[3] += bnd_strd[3]; 01700 } else { 01701 curdim[3] = 0; 01702 src_off[3] = 0; 01703 res_off[3] = 0; 01704 shft_off[3] = 0; 01705 bnd_off[3] = 0; 01706 curdim[4]++; 01707 if (curdim[4] < src_ext[4]) { 01708 src_off[4] += src_strd[4]; 01709 res_off[4] += res_strd[4]; 01710 shft_off[4] += shft_strd[4]; 01711 bnd_off[4] += bnd_strd[4]; 01712 } else { 01713 curdim[4] = 0; 01714 src_off[4] = 0; 01715 res_off[4] = 0; 01716 shft_off[4] = 0; 01717 bnd_off[4] = 0; 01718 curdim[5]++; 01719 if (curdim[5] < src_ext[5]) { 01720 src_off[5] += src_strd[5]; 01721 res_off[5] += res_strd[5]; 01722 shft_off[5] += shft_strd[5]; 01723 bnd_off[5] += bnd_strd[5]; 01724 } 01725 } 01726 } 01727 } 01728 } 01729 } 01730 } 01731 } 01732 }