Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/cshift.c 92.1 07/07/99 15:52:02" 00038 00039 #include <stddef.h> 00040 #include <liberrno.h> 00041 #include <cray/dopevec.h> 00042 #include <cray/portdefs.h> 00043 #include "arraydefs.h" 00044 00045 /* 00046 * Cshift function. Perform a circular shift of an array 00047 * expression of rank one or perform circular shifts on all 00048 * complete rank one sections along a given dimension of an array 00049 * expression of rank two or greater. Elements shifted out at one 00050 * end of a section are shifted in at the other end. Different 00051 * sections may be shifted by different amounts in different 00052 * directions. 00053 */ 00054 00055 #ifdef _UNICOS 00056 #pragma _CRI duplicate _CSHIFT as CSHIFT@ 00057 #endif 00058 void 00059 _CSHIFT (DopeVectorType * result, 00060 DopeVectorType * source, 00061 DopeVectorType * shift, 00062 _f_int *dimp) 00063 { 00064 char *cs; /* char ptr to source array */ 00065 char *cr; /* char ptr to result array */ 00066 char * restrict cptr1; /* char */ 00067 char * restrict cptr2; /* char */ 00068 _f_int8 * restrict uptr1; /* 64-bit */ 00069 _f_int8 * restrict uptr2; /* 64-bit */ 00070 _f_int * restrict wptr1; /* generic integer */ 00071 _f_int * restrict wptr2; /* generic integer */ 00072 _f_real16 * restrict dptr1; /* 128-bit */ 00073 _f_real16 * restrict dptr2; /* 128-bit */ 00074 #ifdef _F_COMP16 00075 dblcmplx * restrict xptr1; /* 256-bit */ 00076 dblcmplx * restrict xptr2; /* 256-bit */ 00077 #endif 00078 _f_int4 * restrict hptr1; /* 32-bit */ 00079 _f_int4 * restrict hptr2; /* 32-bit */ 00080 void * restrict sptr; /* ptr to src data area */ 00081 void * restrict rptr; /* ptr to res data area */ 00082 long * restrict shptr; /* ptr to shift area */ 00083 _f_int8 * restrict save_uptr1; /* save copy of uptr1 */ 00084 _f_int8 * restrict save_uptr2a; /* save copy of uptr1 */ 00085 _f_int8 * restrict save_uptr2b; /* save copy of uptr1 */ 00086 _f_int * restrict save_wptr1; /* save copy of wptr1 */ 00087 _f_int * restrict save_wptr2a; /* save copy of wptr1 */ 00088 _f_int * restrict save_wptr2b; /* save copy of wptr1 */ 00089 _f_int4 * restrict iptr4; /* ptr to shift data area */ 00090 _f_int8 * restrict iptr8; /* ptr to shift data area */ 00091 _f_int bucketsize; /* size of each data element */ 00092 _f_int shft_size; /* size of stride elements */ 00093 long nbytes; /* # of bytes in data area */ 00094 _f_int num; /* index value */ 00095 _f_int bytealligned; /* byte alligned flag */ 00096 long sindx; /* source index */ 00097 long sindx2; /* source index */ 00098 long shindx; /* shift index */ 00099 long rindx; /* result array index */ 00100 long rindx2; /* result array index */ 00101 _f_int shft; /* shift value */ 00102 _f_int dim; /* dim value */ 00103 _f_int non_dim; /* non-shift dim for 2x2 */ 00104 long curdim[MAXDIM-1]; /* current indices */ 00105 long src_strd[MAXDIM-1]; /* index stride */ 00106 long src_ext[MAXDIM-1]; /* extents for source array */ 00107 long src_off[MAXDIM-1]; /* source offset */ 00108 long res_strd[MAXDIM-1]; /* index stride */ 00109 long res_off[MAXDIM-1]; /* result offset */ 00110 long shft_strd[MAXDIM-1];/* stride for shift array */ 00111 long shft_off[MAXDIM-1]; /* shift offset */ 00112 _f_int rank; /* rank of source matrix */ 00113 _f_int type; /* type */ 00114 _f_int subtype; /* sub-type */ 00115 _f_int arithmetic; /* arithmetic data type */ 00116 long extent; /* extent temporary */ 00117 long extlow; /* lower bound of extent */ 00118 long exthi; /* upper bound of extent */ 00119 long src_dim_strd; /* stride for source index */ 00120 long res_dim_strd; /* stride for result index */ 00121 long shft_dim; /* shift dimension */ 00122 long shft_dim_strd; /* shift stride */ 00123 long tot_ext; /* total extent */ 00124 long i, j, k, l; /* index variables */ 00125 00126 /* Set type and dimension global variables */ 00127 00128 rank = source->n_dim; 00129 type = source->type_lens.type; 00130 00131 /* Size calculation is based on variable type. */ 00132 00133 switch (type) { 00134 case DVTYPE_ASCII : 00135 bytealligned = 1; 00136 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */ 00137 subtype = DVSUBTYPE_CHAR; 00138 arithmetic = 0; 00139 break; 00140 case DVTYPE_DERIVEDBYTE : 00141 bytealligned = 1; 00142 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE; 00143 subtype = DVSUBTYPE_CHAR; 00144 arithmetic = 0; 00145 break; 00146 case DVTYPE_DERIVEDWORD : 00147 bytealligned = 0; 00148 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD; 00149 subtype = DVSUBTYPE_DERIVED; 00150 arithmetic = 0; 00151 break; 00152 default : 00153 bytealligned = 0; 00154 bucketsize = source->type_lens.int_len / BITS_PER_WORD; 00155 if (source->type_lens.int_len == 64) { 00156 subtype = DVSUBTYPE_BIT64; 00157 } else if (source->type_lens.int_len == 32) { 00158 subtype = DVSUBTYPE_BIT32; 00159 bucketsize = 1; 00160 } else if (source->type_lens.int_len == 256) { 00161 subtype = DVSUBTYPE_BIT256; 00162 } else { 00163 subtype = DVSUBTYPE_BIT128; 00164 } 00165 arithmetic = 1; 00166 } 00167 shft_size = shift->base_addr.a.el_len / BITS_PER_WORD; 00168 #ifdef _CRAYMPP 00169 if (shft_size == 0) 00170 shft_size = 1; 00171 #endif 00172 00173 /* If necessary, fill result dope vector */ 00174 00175 if (!result->assoc) { 00176 result->base_addr.a.ptr = (void *) NULL; 00177 result->orig_base = 0; 00178 result->orig_size = 0; 00179 #ifdef _UNICOS 00180 #pragma _CRI shortloop 00181 #endif 00182 for (i = 0, tot_ext = bucketsize; i < rank; i++) { 00183 result->dimension[i].extent = source->dimension[i].extent; 00184 result->dimension[i].low_bound = 1; 00185 result->dimension[i].stride_mult = tot_ext; 00186 tot_ext *= result->dimension[i].extent; 00187 } 00188 00189 /* Determine size of space to allocate */ 00190 00191 if (!bytealligned) { 00192 nbytes = bucketsize * BYTES_PER_WORD; 00193 #ifdef _CRAYMPP 00194 if (subtype == DVSUBTYPE_BIT32) 00195 nbytes /= 2; 00196 #endif 00197 } else { 00198 nbytes = bucketsize; 00199 } 00200 #ifdef _UNICOS 00201 #pragma _CRI shortloop 00202 #endif 00203 for (i = 0; i < rank; i++) 00204 nbytes *= result->dimension[i].extent; 00205 if (nbytes > 0) { 00206 result->base_addr.a.ptr = (void *) malloc(nbytes); 00207 if (result->base_addr.a.ptr == NULL) 00208 _lerror(_LELVL_ABORT, FENOMEMY); 00209 } 00210 00211 result->assoc = 1; 00212 result->base_addr.a.el_len = source->base_addr.a.el_len; 00213 if (type == DVTYPE_ASCII) { 00214 cr = (char *) result->base_addr.a.ptr; 00215 result->base_addr.charptr = _cptofcd (cr, bucketsize); 00216 } 00217 result->orig_base = result->base_addr.a.ptr; 00218 result->orig_size = nbytes * BITS_PER_BYTE; 00219 } 00220 00221 /* 00222 * Check to see if any of the matrices have size 0. If any do, 00223 * return without doing anything. 00224 */ 00225 00226 #ifdef _UNICOS 00227 #pragma _CRI shortloop 00228 #endif 00229 for (i = 0; i < rank; i++) { 00230 if (!source->dimension[i].extent) 00231 return; 00232 } 00233 if (result->assoc) { 00234 #ifdef _UNICOS 00235 #pragma _CRI shortloop 00236 #endif 00237 for (i = 0; i < rank; i++) 00238 if (!result->dimension[i].extent) 00239 return; 00240 } 00241 if (shift->n_dim > 1) { 00242 #ifdef _UNICOS 00243 #pragma _CRI shortloop 00244 #endif 00245 for (i = 0; i < rank-1; i++) 00246 if (!shift->dimension[i].extent) 00247 return; 00248 } 00249 00250 /* Set up scalar pointers to all of the argument data areas */ 00251 00252 if (!bytealligned) { 00253 sptr = (void *) source->base_addr.a.ptr; 00254 rptr = (void *) result->base_addr.a.ptr; 00255 } else { 00256 if (type == DVTYPE_ASCII) { 00257 cs = _fcdtocp (source->base_addr.charptr); 00258 cr = _fcdtocp (result->base_addr.charptr); 00259 } else { 00260 cs = (char *) source->base_addr.a.ptr; 00261 cr = (char *) result->base_addr.a.ptr; 00262 } 00263 } 00264 shptr = (void *) shift->base_addr.a.ptr; 00265 00266 /* If dim argument is not present, set it to 1 (0 for C) */ 00267 00268 if (dimp == NULL) 00269 dim = 0; 00270 else { 00271 if (*dimp < 1 || *dimp > rank) 00272 _lerror (_LELVL_ABORT, FESCIDIM); 00273 dim = *dimp - 1; 00274 } 00275 00276 /* If source is a 1-dimensional array */ 00277 00278 if (rank == 1) { 00279 00280 /* 00281 * Get shift value which will be used for each loop iteration. 00282 * Once the value is obtained, make sure that it is positive. This 00283 * will ensure that the final calculated value can only be positive, 00284 * thus eliminating a test from the inner loop. 00285 */ 00286 00287 if (shift->base_addr.a.el_len == 64) { 00288 iptr8 = (_f_int8 *) shptr; 00289 shft = *iptr8 % source->dimension[0].extent; 00290 } else { 00291 iptr4 = (_f_int4 *) shptr; 00292 shft = *iptr4 % source->dimension[0].extent; 00293 } 00294 if (shft < 0) 00295 shft += source->dimension[0].extent; 00296 00297 /* 00298 * The index calculation for each element of the source and result 00299 * matrices make use of some 'shortcuts'. These involved the use 00300 * of some variables which contain information about indices. One 00301 * of these variables exists for each dimension of the source and 00302 * result matrices. 00303 * 00304 * strd - stride value based in terms of elements rather than 00305 * words. 00306 */ 00307 00308 if (bucketsize > 1 && arithmetic) { 00309 src_strd[0] = source->dimension[0].stride_mult / bucketsize; 00310 res_strd[0] = result->dimension[0].stride_mult / bucketsize; 00311 } else { 00312 src_strd[0] = source->dimension[0].stride_mult; 00313 res_strd[0] = result->dimension[0].stride_mult; 00314 } 00315 00316 /* 00317 * In order to assist vectorization, the following value has been 00318 * taken out of the array variable, and put into a scalar. 00319 */ 00320 00321 extent = source->dimension[0].extent; 00322 00323 /* 00324 * Each data type will be handled with its own inner loop. 00325 */ 00326 00327 switch (subtype) { 00328 case DVSUBTYPE_CHAR : 00329 cptr1 = (char *) cs; 00330 cptr2 = (char *) cr + (extent - shft) * bucketsize; 00331 src_dim_strd = source->dimension[0].stride_mult; 00332 res_dim_strd = result->dimension[0].stride_mult; 00333 for (i = 0; i < shft; i++) { 00334 (void) memcpy (cptr2, cptr1, bucketsize); 00335 cptr1 += src_dim_strd; 00336 cptr2 += res_dim_strd; 00337 } 00338 cptr2 = (char *) cr; 00339 for ( ; i < extent; i++) { 00340 (void) memcpy (cptr2, cptr1, bucketsize); 00341 cptr1 += src_dim_strd; 00342 cptr2 += res_dim_strd; 00343 } 00344 break; 00345 00346 case DVSUBTYPE_DERIVED : 00347 src_dim_strd = source->dimension[0].stride_mult; 00348 res_dim_strd = result->dimension[0].stride_mult; 00349 for (i = 0; i < bucketsize; i++) { 00350 wptr1 = (_f_int *) sptr; 00351 wptr2 = (_f_int *) rptr + (extent-shft) * 00352 bucketsize; 00353 for (j = 0; j < shft; j++) { 00354 sindx = i + (j * src_dim_strd); 00355 rindx = i + (j * res_dim_strd); 00356 wptr2[rindx] = wptr1[sindx]; 00357 } 00358 wptr2 = (_f_int *) rptr; 00359 for (k = 0 ; j < extent; j++, k++) { 00360 sindx = i + (j * src_dim_strd); 00361 rindx = i + (k * res_dim_strd); 00362 wptr2[rindx] = wptr1[sindx]; 00363 } 00364 } 00365 break; 00366 00367 case DVSUBTYPE_BIT64 : 00368 uptr1 = (_f_int8 *) sptr; 00369 uptr2 = (_f_int8 *) rptr + ((extent - shft) * 00370 res_strd[0]); 00371 src_dim_strd = src_strd[0]; 00372 res_dim_strd = res_strd[0]; 00373 #ifndef CRAY2 00374 for (i = 0; i < shft; i++) { 00375 sindx = i * src_dim_strd; 00376 rindx = i * res_dim_strd; 00377 uptr2[rindx] = uptr1[sindx]; 00378 } 00379 uptr2 = (_f_int8 *) rptr; 00380 for (j = 0 ; i < extent; i++, j++) { 00381 sindx = i * src_dim_strd; 00382 rindx = j * res_dim_strd; 00383 uptr2[rindx] = uptr1[sindx]; 00384 } 00385 #else 00386 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft); 00387 uptr2 = (_f_int8 *) rptr; 00388 uptr1 = (_f_int8 *) sptr + (shft * src_dim_strd); 00389 shft = extent - shft; 00390 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft); 00391 #endif 00392 break; 00393 00394 case DVSUBTYPE_BIT32 : 00395 hptr1 = (_f_int4 *) sptr; 00396 hptr2 = (_f_int4 *) rptr + ((extent - shft) * res_strd[0]); 00397 src_dim_strd = src_strd[0]; 00398 res_dim_strd = res_strd[0]; 00399 for (i = 0; i < shft; i++) { 00400 sindx = i * src_dim_strd; 00401 rindx = i * res_dim_strd; 00402 hptr2[rindx] = hptr1[sindx]; 00403 } 00404 hptr2 = (_f_int4 *) rptr; 00405 for (j = 0 ; i < extent; i++, j++) { 00406 sindx = i * src_dim_strd; 00407 rindx = j * res_dim_strd; 00408 hptr2[rindx] = hptr1[sindx]; 00409 } 00410 break; 00411 00412 case DVSUBTYPE_BIT128 : 00413 dptr1 = (_f_real16 *) sptr; 00414 dptr2 = (_f_real16 *) rptr + 00415 ((extent - shft) * res_strd[0]); 00416 src_dim_strd = src_strd[0]; 00417 res_dim_strd = res_strd[0]; 00418 for (i = 0; i < shft; i++) { 00419 sindx = i * src_dim_strd; 00420 rindx = i * res_dim_strd; 00421 dptr2[rindx] = dptr1[sindx]; 00422 } 00423 dptr2 = (_f_real16 *) rptr; 00424 for (j = 0 ; i < extent; i++, j++) { 00425 sindx = i * src_dim_strd; 00426 rindx = j * res_dim_strd; 00427 dptr2[rindx] = dptr1[sindx]; 00428 } 00429 break; 00430 00431 #ifdef _F_COMP16 00432 case DVSUBTYPE_BIT256 : 00433 xptr1 = (dblcmplx *) sptr; 00434 xptr2 = (dblcmplx *) rptr + ((extent - shft) * res_strd[0]); 00435 src_dim_strd = src_strd[0]; 00436 res_dim_strd = res_strd[0]; 00437 for (i = 0; i < shft; i++) { 00438 sindx = i * src_dim_strd; 00439 rindx = i * res_dim_strd; 00440 xptr2[rindx].re = xptr1[sindx].re; 00441 xptr2[rindx].im = xptr1[sindx].im; 00442 } 00443 xptr2 = (dblcmplx *) rptr; 00444 for (j = 0 ; i < extent; i++, j++) { 00445 sindx = i * src_dim_strd; 00446 rindx = j * res_dim_strd; 00447 xptr2[rindx].re = xptr1[sindx].re; 00448 xptr2[rindx].im = xptr1[sindx].im; 00449 } 00450 break; 00451 #endif 00452 00453 default : 00454 _lerror (_LELVL_ABORT, FEINTDTY); 00455 } 00456 00457 /* Arrays with rank of 2 */ 00458 00459 } else if (rank == 2) { 00460 00461 /* Set dimension and non_dimension indices */ 00462 00463 if (dim == 0) 00464 non_dim = 1; 00465 else 00466 non_dim = 0; 00467 00468 /* Set up shortcut variables for both array dimensions */ 00469 00470 if (bucketsize > 1 && arithmetic) { 00471 src_strd[0] = source->dimension[0].stride_mult / bucketsize; 00472 src_strd[1] = source->dimension[1].stride_mult / bucketsize; 00473 res_strd[0] = result->dimension[0].stride_mult / bucketsize; 00474 res_strd[1] = result->dimension[1].stride_mult / bucketsize; 00475 } else { 00476 src_strd[0] = source->dimension[0].stride_mult; 00477 src_strd[1] = source->dimension[1].stride_mult; 00478 res_strd[0] = result->dimension[0].stride_mult; 00479 res_strd[1] = result->dimension[1].stride_mult; 00480 } 00481 shft_strd[0] = shift->dimension[0].stride_mult / shft_size; 00482 00483 /* Put information about DIM index in scalars for vectorization */ 00484 00485 src_dim_strd = src_strd[dim]; 00486 res_dim_strd = res_strd[dim]; 00487 00488 /* Set up extent temporary variables */ 00489 00490 extent = source->dimension[dim].extent; 00491 extlow = -extent; 00492 exthi = 2 * extent; 00493 00494 /* Set up shift variables */ 00495 00496 if (shift->base_addr.a.el_len == 64) 00497 iptr8 = (_f_int8 *) shptr; 00498 else 00499 iptr4 = (_f_int4 *) shptr; 00500 if (shift->n_dim == 1) { 00501 shft_dim = 1; 00502 shft_dim_strd = shift->dimension[0].stride_mult / shft_size; 00503 shindx = 0; 00504 } else { /* scalar value, only get it once */ 00505 shft_dim = 0; 00506 if (shift->base_addr.a.el_len == 64) 00507 shft = iptr8[0]; 00508 else 00509 shft = iptr4[0]; 00510 if (shft >= extent) { 00511 if (shft < exthi) 00512 shft -= extent; 00513 else 00514 shft %= extent; 00515 } else if (shft < 0) { 00516 if (shft >= extlow) 00517 shft += extent; 00518 else { 00519 shft %= extent; 00520 if (shft < 0) 00521 shft += extent; 00522 } 00523 } 00524 } 00525 00526 /* Outer loop is for dimension not being shifted */ 00527 00528 for (i = 0; i < source->dimension[non_dim].extent; i++) { 00529 00530 /* Get shift value if it is not a scalar */ 00531 00532 if (shft_dim == 1) { 00533 if (shift->base_addr.a.el_len == 64) 00534 shft = iptr8[shindx]; 00535 else 00536 shft = iptr4[shindx]; 00537 if (shft >= extent) { 00538 if (shft < exthi) 00539 shft -= extent; 00540 else 00541 shft %= extent; 00542 } else if (shft < 0) { 00543 if (shft >= extlow) 00544 shft += extent; 00545 else { 00546 shft %= extent; 00547 if (shft < 0) 00548 shft += extent; 00549 } 00550 } 00551 shindx += shft_dim_strd; 00552 } 00553 00554 switch (subtype) { 00555 case DVSUBTYPE_CHAR : 00556 cptr1 = (char *) cs + (i * src_strd[non_dim]); 00557 cptr2 = (char *) cr + (i * res_strd[non_dim]) + 00558 ((extent - shft) * res_dim_strd); 00559 for (j = 0; j < shft; j++) { 00560 (void) memcpy (cptr2, cptr1, bucketsize); 00561 cptr1 += src_dim_strd; 00562 cptr2 += src_dim_strd; 00563 } 00564 cptr2 = (char *) cr + (i * res_strd[non_dim]); 00565 for ( ; j < extent; j++) { 00566 (void) memcpy (cptr2, cptr1, bucketsize); 00567 cptr1 += src_dim_strd; 00568 cptr2 += src_dim_strd; 00569 } 00570 break; 00571 00572 case DVSUBTYPE_DERIVED : 00573 wptr1 = (_f_int *) sptr + 00574 (i * src_strd[non_dim]); 00575 save_wptr1 = wptr1; 00576 wptr2 = (_f_int *) rptr + 00577 (i * res_strd[non_dim]) + 00578 ((extent - shft) * res_dim_strd); 00579 save_wptr2a = wptr2; 00580 wptr2 = (_f_int *) rptr + 00581 (i * res_strd[non_dim]); 00582 save_wptr2b = wptr2; 00583 for (j = 0; j < bucketsize; j++) { 00584 wptr1 = save_wptr1; 00585 wptr2 = save_wptr2a; 00586 for (k = 0; k < shft; k++) { 00587 sindx = j + (k * src_dim_strd); 00588 rindx = j + (k * res_dim_strd); 00589 wptr2[rindx] = wptr1[sindx]; 00590 } 00591 wptr2 = save_wptr2b; 00592 for (l = 0; k < extent; k++, l++) { 00593 sindx = j + (k * src_dim_strd); 00594 rindx = j + (l * res_dim_strd); 00595 wptr2[rindx] = wptr1[sindx]; 00596 } 00597 } 00598 break; 00599 00600 case DVSUBTYPE_BIT64 : 00601 uptr1 = (_f_int8 *) sptr + 00602 (i * src_strd[non_dim]); 00603 save_uptr1 = uptr1; 00604 uptr2 = (_f_int8 *) rptr + 00605 (i * res_strd[non_dim]) + 00606 ((extent - shft) * res_dim_strd); 00607 #ifndef CRAY2 00608 for (j = 0; j < shft; j++) { 00609 sindx = j * src_dim_strd; 00610 rindx = j * res_dim_strd; 00611 uptr2[rindx] = uptr1[sindx]; 00612 } 00613 uptr2 = (_f_int8 *) rptr + 00614 (i * res_strd[non_dim]); 00615 for (k = 0 ; j < extent; j++, k++) { 00616 sindx = j * src_dim_strd; 00617 rindx = k * res_dim_strd; 00618 uptr2[rindx] = uptr1[sindx]; 00619 } 00620 #else 00621 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, 00622 shft); 00623 uptr2 = (_f_int8 *) rptr + 00624 (i * res_strd[non_dim]); 00625 uptr1 = save_uptr1 + (shft * src_dim_strd); 00626 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, 00627 extent-shft); 00628 #endif 00629 break; 00630 00631 case DVSUBTYPE_BIT32 : 00632 hptr1 = (_f_int4 *) sptr + (i * src_strd[non_dim]); 00633 hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]) + 00634 ((extent - shft) * res_dim_strd); 00635 for (j = 0; j < shft; j++) { 00636 sindx = j * src_dim_strd; 00637 rindx = j * res_dim_strd; 00638 hptr2[rindx] = hptr1[sindx]; 00639 } 00640 hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]); 00641 for (k = 0 ; j < extent; j++, k++) { 00642 sindx = j * src_dim_strd; 00643 rindx = k * res_dim_strd; 00644 hptr2[rindx] = hptr1[sindx]; 00645 } 00646 break; 00647 00648 case DVSUBTYPE_BIT128 : 00649 dptr1 = (_f_real16 *) sptr + (i * src_strd[non_dim]); 00650 dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]) + 00651 ((extent - shft) * res_dim_strd); 00652 for (j = 0; j < shft; j++) { 00653 sindx = j * src_dim_strd; 00654 rindx = j * res_dim_strd; 00655 dptr2[rindx] = dptr1[sindx]; 00656 } 00657 dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]); 00658 for (k = 0 ; j < extent; j++, k++) { 00659 sindx = j * src_dim_strd; 00660 rindx = k * res_dim_strd; 00661 dptr2[rindx] = dptr1[sindx]; 00662 } 00663 break; 00664 00665 #ifdef _F_COMP16 00666 case DVSUBTYPE_BIT256 : 00667 xptr1 = (dblcmplx *) sptr + (i * src_strd[non_dim]); 00668 xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]) + 00669 ((extent - shft) * res_dim_strd); 00670 for (j = 0; j < shft; j++) { 00671 sindx = j * src_dim_strd; 00672 rindx = j * res_dim_strd; 00673 xptr2[rindx].re = xptr1[sindx].re; 00674 xptr2[rindx].im = xptr1[sindx].im; 00675 } 00676 xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]); 00677 for (k = 0 ; j < extent; j++, k++) { 00678 sindx = j * src_dim_strd; 00679 rindx = k * res_dim_strd; 00680 xptr2[rindx].re = xptr1[sindx].re; 00681 xptr2[rindx].im = xptr1[sindx].im; 00682 } 00683 break; 00684 #endif 00685 00686 default : 00687 _lerror (_LELVL_ABORT, FEINTDTY); 00688 } 00689 } 00690 00691 /* Arrays with rank of 3-7 */ 00692 00693 } else { 00694 00695 if (dim == 0) { 00696 i = 0; 00697 tot_ext = 1; 00698 } else 00699 #ifdef _UNICOS 00700 #pragma _CRI shortloop 00701 #endif 00702 for (i = 0, tot_ext = 1; i < dim; i++) { 00703 tot_ext *= source->dimension[i].extent; 00704 src_ext[i] = source->dimension[i].extent; 00705 if (bucketsize > 1 && arithmetic) { 00706 src_strd[i] = source->dimension[i].stride_mult / bucketsize; 00707 res_strd[i] = result->dimension[i].stride_mult / bucketsize; 00708 } else { 00709 src_strd[i] = source->dimension[i].stride_mult; 00710 res_strd[i] = result->dimension[i].stride_mult; 00711 } 00712 } 00713 if (i < (rank - 1)) 00714 #ifdef _UNICOS 00715 #pragma _CRI shortloop 00716 #endif 00717 for ( ; i < rank-1; i++) { 00718 tot_ext *= source->dimension[i+1].extent; 00719 src_ext[i] = source->dimension[i+1].extent; 00720 if (bucketsize > 1 && arithmetic) { 00721 src_strd[i] = source->dimension[i+1].stride_mult/bucketsize; 00722 res_strd[i] = result->dimension[i+1].stride_mult/bucketsize; 00723 } else { 00724 src_strd[i] = source->dimension[i+1].stride_mult; 00725 res_strd[i] = result->dimension[i+1].stride_mult; 00726 } 00727 } 00728 00729 /* Set up some scalars which contain information about DIM */ 00730 00731 extent = source->dimension[dim].extent; 00732 extlow = -extent; 00733 exthi = 2 * extent; 00734 00735 if (bucketsize > 1 && arithmetic) { 00736 src_dim_strd = source->dimension[dim].stride_mult / bucketsize; 00737 res_dim_strd = result->dimension[dim].stride_mult / bucketsize; 00738 } else { 00739 src_dim_strd = source->dimension[dim].stride_mult; 00740 res_dim_strd = result->dimension[dim].stride_mult; 00741 } 00742 00743 /* Set up the shift variables */ 00744 00745 if (shift->base_addr.a.el_len == 64) 00746 iptr8 = (_f_int8 *) shptr; 00747 else 00748 iptr4 = (_f_int4 *) shptr; 00749 if (shift->n_dim == 0) { 00750 shft_dim = 0; 00751 if (shift->base_addr.a.el_len == 64) 00752 shft = iptr8[0]; 00753 else 00754 shft = iptr4[0]; 00755 if (shft >= extent) { 00756 if (shft < exthi) 00757 shft -= extent; 00758 else 00759 shft %= extent; 00760 } else if (shft < 0) { 00761 if (shft >= extlow) 00762 shft += extent; 00763 else { 00764 shft %= extent; 00765 if (shft < 0) 00766 shft += extent; 00767 } 00768 } 00769 } else { 00770 shft_dim = 1; 00771 shindx = 0; 00772 #ifdef _UNICOS 00773 #pragma _CRI shortloop 00774 #endif 00775 for (i = 0; i < rank-1; i++) { 00776 shft_strd[i] = shift->dimension[i].stride_mult; 00777 } 00778 } 00779 00780 /* Initialize the curdim and offset arrays to 0 */ 00781 00782 for (i = 0; i < rank-1; i++) { 00783 curdim[i] = 0; 00784 src_off[i] = 0; 00785 res_off[i] = 0; 00786 shft_off[i] = 0; 00787 } 00788 00789 /* 00790 * The outer loop will be executed once for each combination of 00791 * indices not including the DIM dimension. 00792 */ 00793 00794 for (i = 0; i < tot_ext; i++) { 00795 00796 /* Calculate the shift value used throughout the inner loop */ 00797 00798 if (shft_dim) { 00799 switch (rank) { 00800 case 3 : 00801 shindx = shft_off[0] + shft_off[1]; 00802 break; 00803 case 4 : 00804 shindx = shft_off[0] + shft_off[1] + shft_off[2]; 00805 break; 00806 case 5 : 00807 shindx = shft_off[0] + shft_off[1] + 00808 shft_off[2] + shft_off[3]; 00809 break; 00810 case 6 : 00811 shindx = shft_off[0] + shft_off[1] + shft_off[2] + 00812 shft_off[3] + shft_off[4]; 00813 break; 00814 default : 00815 shindx = shft_off[0] + shft_off[1] + shft_off[2] + 00816 shft_off[3] + shft_off[4] + shft_off[5]; 00817 } 00818 shft = shptr[shindx]; 00819 if (shft >= extent) { 00820 if (shft < exthi) 00821 shft -= extent; 00822 else 00823 shft %= extent; 00824 } else if (shft < 0) { 00825 if (shft >= extlow) 00826 shft += extent; 00827 else { 00828 shft %= extent; 00829 if (shft < 0) 00830 shft += extent; 00831 } 00832 } 00833 } 00834 00835 switch (rank) { 00836 case 3 : 00837 sindx = src_off[0] + src_off[1]; 00838 rindx = res_off[0] + res_off[1]; 00839 break; 00840 case 4 : 00841 sindx = src_off[0] + src_off[1] + src_off[2]; 00842 rindx = res_off[0] + res_off[1] + res_off[2]; 00843 break; 00844 case 5 : 00845 sindx = src_off[0] + src_off[1] + 00846 src_off[2] + src_off[3]; 00847 rindx = res_off[0] + res_off[1] + 00848 res_off[2] + res_off[3]; 00849 break; 00850 case 6 : 00851 sindx = src_off[0] + src_off[1] + src_off[2] + 00852 src_off[3] + src_off[4]; 00853 rindx = res_off[0] + res_off[1] + res_off[2] + 00854 res_off[3] + res_off[4]; 00855 break; 00856 default : 00857 sindx = src_off[0] + src_off[1] + src_off[2] + 00858 src_off[3] + src_off[4] + src_off[5]; 00859 rindx = res_off[0] + res_off[1] + res_off[2] + 00860 res_off[3] + res_off[4] + res_off[5]; 00861 } 00862 00863 switch (subtype) { 00864 case DVSUBTYPE_CHAR : 00865 cptr1 = (char *) cs + sindx; 00866 cptr2 = (char *) cr + rindx + 00867 ((extent - shft) * res_dim_strd); 00868 for (j = 0; j < shft; j++) { 00869 (void) memcpy (cptr2, cptr1, bucketsize); 00870 cptr1 += src_dim_strd; 00871 cptr2 += res_dim_strd; 00872 } 00873 cptr2 = (char *) cr + rindx; 00874 for ( ; j < extent; j++) { 00875 (void) memcpy (cptr2, cptr1, bucketsize); 00876 cptr1 += src_dim_strd; 00877 cptr2 += res_dim_strd; 00878 } 00879 break; 00880 00881 case DVSUBTYPE_DERIVED : 00882 wptr1 = (_f_int *) sptr + sindx; 00883 save_wptr1 = wptr1; 00884 wptr2 = (_f_int *) rptr + rindx + 00885 ((extent - shft) * res_dim_strd); 00886 save_wptr2a = wptr2; 00887 wptr2 = (_f_int *) rptr + rindx; 00888 save_wptr2b = wptr2; 00889 for (j = 0; j < bucketsize; j++) { 00890 wptr1 = save_wptr1; 00891 wptr2 = save_wptr2a; 00892 for (k = 0; k < shft; k++) { 00893 sindx2 = j + (k * src_dim_strd); 00894 rindx2 = j + (k * res_dim_strd); 00895 wptr2[rindx2] = wptr1[sindx2]; 00896 } 00897 wptr2 = save_wptr2b; 00898 for (l = 0; k < extent; k++, l++) { 00899 sindx2 = j + (k * src_dim_strd); 00900 rindx2 = j + (l * res_dim_strd); 00901 wptr2[rindx2] = wptr1[sindx2]; 00902 } 00903 } 00904 break; 00905 00906 case DVSUBTYPE_BIT64 : 00907 uptr1 = (_f_int8 *) sptr + sindx; 00908 save_uptr1 = uptr1; 00909 uptr2 = (_f_int8 *) rptr + rindx + 00910 ((extent - shft) * res_dim_strd); 00911 #ifndef CRAY2 00912 for (j = 0; j < shft; j++) { 00913 sindx2 = j * src_dim_strd; 00914 rindx2 = j * res_dim_strd; 00915 uptr2[rindx2] = uptr1[sindx2]; 00916 } 00917 uptr2 = (_f_int8 *) rptr + rindx; 00918 for (k = 0 ; j < extent; j++, k++) { 00919 sindx2 = j * src_dim_strd; 00920 rindx2 = k * res_dim_strd; 00921 uptr2[rindx2] = uptr1[sindx2]; 00922 } 00923 #else 00924 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, 00925 shft); 00926 uptr2 = (_f_int8 *) rptr + rindx; 00927 uptr1 = save_uptr1 + (shft * src_dim_strd); 00928 memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, 00929 extent-shft); 00930 #endif 00931 break; 00932 00933 case DVSUBTYPE_BIT32 : 00934 hptr1 = (_f_int4 *) sptr + sindx; 00935 hptr2 = (_f_int4 *) rptr + rindx + 00936 ((extent - shft) * res_dim_strd); 00937 for (j = 0; j < shft; j++) { 00938 sindx2 = j * src_dim_strd; 00939 rindx2 = j * res_dim_strd; 00940 hptr2[rindx2] = hptr1[sindx2]; 00941 } 00942 hptr2 = (_f_int4 *) rptr + rindx; 00943 for (k = 0; j < extent; j++, k++) { 00944 sindx2 = j * src_dim_strd; 00945 rindx2 = k * res_dim_strd; 00946 hptr2[rindx2] = hptr1[sindx2]; 00947 } 00948 break; 00949 00950 case DVSUBTYPE_BIT128 : 00951 dptr1 = (_f_real16 *) sptr + sindx; 00952 dptr2 = (_f_real16 *) rptr + rindx + 00953 ((extent - shft) * res_dim_strd); 00954 for (j = 0; j < shft; j++) { 00955 sindx2 = j * src_dim_strd; 00956 rindx2 = j * res_dim_strd; 00957 dptr2[rindx2] = dptr1[sindx2]; 00958 } 00959 dptr2 = (_f_real16 *) rptr + rindx; 00960 for (k = 0; j < extent; j++, k++) { 00961 sindx2 = j * src_dim_strd; 00962 rindx2 = k * res_dim_strd; 00963 dptr2[rindx2] = dptr1[sindx2]; 00964 } 00965 break; 00966 00967 #ifdef _F_COMP16 00968 case DVSUBTYPE_BIT256 : 00969 xptr1 = (dblcmplx *) sptr + sindx; 00970 xptr2 = (dblcmplx *) rptr + rindx + 00971 ((extent - shft) * res_dim_strd); 00972 for (j = 0; j < shft; j++) { 00973 sindx2 = j * src_dim_strd; 00974 rindx2 = j * res_dim_strd; 00975 xptr2[rindx2].re = xptr1[sindx2].re; 00976 xptr2[rindx2].im = xptr1[sindx2].im; 00977 } 00978 xptr2 = (dblcmplx *) rptr + rindx; 00979 for (k = 0; j < extent; j++, k++) { 00980 sindx2 = j * src_dim_strd; 00981 rindx2 = k * res_dim_strd; 00982 xptr2[rindx2].re = xptr1[sindx2].re; 00983 xptr2[rindx2].im = xptr1[sindx2].im; 00984 } 00985 break; 00986 #endif 00987 00988 default : 00989 _lerror (_LELVL_ABORT, FEINTDTY); 00990 } 00991 00992 /* Increment the current dimension counter. */ 00993 00994 curdim[0]++; 00995 if (curdim[0] < src_ext[0]) { 00996 src_off[0] += src_strd[0]; 00997 res_off[0] += res_strd[0]; 00998 shft_off[0] += shft_strd[0]; 00999 } else { 01000 curdim[0] = 0; 01001 src_off[0] = 0; 01002 res_off[0] = 0; 01003 shft_off[0] = 0; 01004 curdim[1]++; 01005 if (curdim[1] < src_ext[1]) { 01006 src_off[1] += src_strd[1]; 01007 res_off[1] += res_strd[1]; 01008 shft_off[1] += shft_strd[1]; 01009 } else { 01010 curdim[1] = 0; 01011 src_off[1] = 0; 01012 res_off[1] = 0; 01013 shft_off[1] = 0; 01014 curdim[2]++; 01015 if (curdim[2] < src_ext[2]) { 01016 src_off[2] += src_strd[2]; 01017 res_off[2] += res_strd[2]; 01018 shft_off[2] += shft_strd[2]; 01019 } else { 01020 curdim[2] = 0; 01021 src_off[2] = 0; 01022 res_off[2] = 0; 01023 shft_off[2] = 0; 01024 curdim[3]++; 01025 if (curdim[3] < src_ext[3]) { 01026 src_off[3] += src_strd[3]; 01027 res_off[3] += res_strd[3]; 01028 shft_off[3] += shft_strd[3]; 01029 } else { 01030 curdim[3] = 0; 01031 src_off[3] = 0; 01032 res_off[3] = 0; 01033 shft_off[3] = 0; 01034 curdim[4]++; 01035 if (curdim[4] < src_ext[4]) { 01036 src_off[4] += src_strd[4]; 01037 res_off[4] += res_strd[4]; 01038 shft_off[4] += shft_strd[4]; 01039 } else { 01040 curdim[4] = 0; 01041 src_off[4] = 0; 01042 res_off[4] = 0; 01043 shft_off[4] = 0; 01044 curdim[5]++; 01045 if (curdim[5] < src_ext[5]) { 01046 src_off[5] += src_strd[5]; 01047 res_off[5] += res_strd[5]; 01048 shft_off[5] += shft_strd[5]; 01049 } 01050 } 01051 } 01052 } 01053 } 01054 } 01055 } 01056 } 01057 }