Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
eoshift.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License 
00007   as published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU Lesser General Public 
00021   License along with this program; if not, write the Free Software 
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 
00023   USA.
00024 
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027 
00028   http://www.sgi.com
00029 
00030   For further information regarding this notice, see:
00031 
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033 
00034 */
00035 
00036 
00037 #pragma ident "@(#) libfi/array/eoshift.c       92.1    07/07/99 15:52:02"
00038 
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044 
00045 /*
00046  *      EOSHIFT routine.  Perform an end-off shift of an array expression of
00047  *      rank one or perform end-off shifts on all the complete rank-one
00048  *      sections along a given dimension of an array expression of rank two
00049  *      or greater.  Elements are shifted off at one end of a section and
00050  *      copies of a boundary value are shifted in at the other end.  Different
00051  *      sections may have different boundary values and may be shifted by
00052  *      different amounts and in different directions.
00053  */
00054 
00055 /*      Declare constants used for default boundary values      */
00056 
00057 const _f_int1   defaulti1 = 0;
00058 const _f_int2   defaulti2 = 0;
00059 const _f_int4   defaulti4 = 0;
00060 const _f_int8   defaulti8 = 0;
00061 const _f_log1   defaultl1 = _btol(0);
00062 const _f_log2   defaultl2 = _btol(0);
00063 const _f_log4   defaultl4 = _btol(0);
00064 const _f_log8   defaultl8 = _btol(0);
00065 const _f_real4  defaultr4 = 0.0;
00066 const _f_real8  defaultr8 = 0.0;
00067 
00068 #if defined _F_REAL16 && _F_REAL16 != (-1)
00069 #if defined(_WORD32) || defined(__mips)
00070 const _f_comp8  defaultr16 = {0.0,0.0};
00071 #else
00072 const _f_comp8  defaultr16 = 0.0 + 0.0i;
00073 #endif /* _WORD32 or __mips */
00074 #endif
00075 
00076 #ifdef _F_COMP4
00077 #if _F_COMP4 > 0
00078 const _f_comp4  defaultc4 = 0.0 + 0.0i;
00079 #else
00080 const _f_comp4  defaultc4 = {0.0, 0.0};
00081 #endif
00082 #endif
00083 
00084 #ifdef _F_COMP8
00085 #if _F_COMP8 > 0
00086 const _f_comp8  defaultc8 = 0.0 + 0.0i;
00087 #else
00088 const _f_comp8  defaultc8 = {0.0, 0.0};
00089 #endif
00090 #endif
00091 
00092 #ifdef _F_COMP4
00093 #define BIT64_DEFAULT()                                                 \
00094         if (type == DVTYPE_INTEGER)                                     \
00095             bnd64 = *(_f_int8 *) &defaulti8;                            \
00096         else if (type == DVTYPE_REAL)                                   \
00097             bnd64 = *(_f_int8 *) &defaultr8;                            \
00098         else if (type == DVTYPE_COMPLEX) {                              \
00099             bnd64 = *(_f_int8 *) &defaultc4;                            \
00100         } else                                                          \
00101             bnd64 = *(_f_int8 *) &defaultl8;
00102 #else
00103 #define BIT64_DEFAULT()                                                 \
00104         if (type == DVTYPE_INTEGER)                                     \
00105             bnd64 = *(_f_int8 *) &defaulti8;                            \
00106         else if (type == DVTYPE_REAL)                                   \
00107             bnd64 = *(_f_int8 *) &defaultr8;                            \
00108         else                                                            \
00109             bnd64 = *(_f_int8 *) &defaultl8;
00110 #endif
00111 
00112 #if defined _F_REAL16
00113 #if _F_REAL16 > 1
00114 #define BIT128_DEFAULT()                                                \
00115         if (type == DVTYPE_REAL)                                        \
00116             bnd128 = *(_f_comp8 *) &defaultr16;                         \
00117         else                                                            \
00118             bnd128 = *(_f_comp8 *) &defaultc8;
00119 #else
00120 #define BIT128_DEFAULT()                                                \
00121         bnd128 = *(_f_comp8 *) &defaultc8;
00122 #endif
00123 #endif
00124 
00125 #ifdef _F_COMP16
00126 #define BIT256_DEFAULT()                                                \
00127         bnd256.re = *(_f_real16 *) &defaultr16;                         \
00128         bnd256.im = *(_f_real16 *) &defaultr16;
00129 #endif
00130 
00131 #define BIT32_DEFAULT()                                                 \
00132         if (type == DVTYPE_INTEGER) {                                   \
00133             bnd32 = *(_f_int4 *) &defaulti4;                            \
00134         } else if (type == DVTYPE_REAL) {                               \
00135             bnd32 = *(_f_int4 *) &defaultr4;                            \
00136         } else {                                                        \
00137             bnd32 = *(_f_int4 *) &defaultl4;                            \
00138         }
00139 
00140 #define BIT16_DEFAULT()                                                 \
00141         if (type == DVTYPE_INTEGER) {                                   \
00142             bnd16 = *(_f_int2 *) &defaulti2;                            \
00143         } else {                                                        \
00144             bnd16 = *(_f_int2 *) &defaultl2;                            \
00145         }
00146 
00147 #define BIT8_DEFAULT()                                                  \
00148         if (type == DVTYPE_INTEGER) {                                   \
00149             bnd8 = *(_f_int1 *) &defaulti1;                             \
00150         } else {                                                        \
00151             bnd8 = *(_f_int1 *) &defaultl1;                             \
00152         }
00153 
00154 
00155 #ifdef _UNICOS
00156 #pragma _CRI duplicate _EOSHIFT as EOSHIFT@
00157 #endif
00158 void
00159 _EOSHIFT (DopeVectorType * result,
00160         DopeVectorType * source,
00161         DopeVectorType * shift,
00162         DopeVectorType * boundary,
00163         _f_int  *dimp)
00164 {
00165         char    *cs;                    /* char ptr to source array     */
00166         char    *cr;                    /* char ptr to result array     */
00167         char    *cb;                    /* char ptr to boundary array   */
00168         char            * restrict cptr1;       /* char                 */
00169         char            * restrict cptr2;       /* char                 */
00170         char            * restrict cptr3;       /* char                 */
00171         _f_int8         * restrict uptr1;       /* 64-bit               */
00172         _f_int8         * restrict uptr2;       /* 64-bit               */
00173         _f_int8         * restrict uptr3;       /* 64-bit               */
00174         _f_comp8        * restrict xptr1;       /* 128-bit              */
00175         _f_comp8        * restrict xptr2;       /* 128-bit              */
00176         _f_comp8        * restrict xptr3;       /* 128-bit              */
00177         _f_int          * restrict fptr1;       /* default word         */
00178         _f_int          * restrict fptr2;       /* default word         */
00179         _f_int          * restrict fptr3;       /* default word         */
00180 #ifdef _F_COMP16
00181         dblcmplx        * restrict dxptr1;      /* 256-bit              */
00182         dblcmplx        * restrict dxptr2;      /* 256-bit              */
00183         dblcmplx        * restrict dxptr3;      /* 256-bit              */
00184 #endif
00185         _f_int4         * restrict hptr1;       /* 32-bit               */
00186         _f_int4         * restrict hptr2;       /* 32-bit               */
00187         _f_int4         * restrict hptr3;       /* 32-bit               */
00188         void            * restrict sptr;    /* ptr to src data area     */
00189         void            * restrict rptr;    /* ptr to res data area     */
00190         _f_int          * restrict shptr;   /* ptr to shift data area   */
00191         _f_int          * restrict bptr;    /* ptr to boundary area     */
00192         _f_int8         * restrict save_uptr1;  /* save copy of uptr1   */
00193         _f_int8         * restrict save_uptr2a; /* save copy of uptr2   */
00194         _f_int8         * restrict save_uptr2b; /* save copy of fptr2   */
00195         _f_int          * restrict save_fptr1;  /* save copy of fptr1   */
00196         _f_int          * restrict save_fptr2a; /* save copy of fptr2   */
00197         _f_int          * restrict save_fptr2b; /* save copy of uptr2   */
00198         char            * restrict save_cptr1;  /* save copy of cptr1   */
00199         char            * restrict save_cptr2a; /* save copy of cptr2   */
00200         char            * restrict save_cptr2b; /* save copy of cptr2   */
00201         _f_comp8        * restrict save_xptr;   /* save copy of xptr2   */
00202 #ifdef _F_COMP16
00203         dblcmplx        * restrict save_dxptr;  /* save copy of dxptr2  */
00204 #endif
00205         _f_int4         * restrict save_hptr;   /* save copy of hptr2   */
00206         _f_int4         * restrict save_hptr1;  /* save copy of hptr2   */
00207         _f_int4         * restrict save_hptr2;  /* save copy of hptr2   */
00208         _f_int4         * restrict i4ptr;   /* ptr to shift data area   */
00209         _f_int8         * restrict i8ptr;   /* ptr to shift data area   */
00210         _f_int  bucketsize;             /* size of each data element    */
00211         _f_int  shft_size;              /* size of each strd element    */
00212         _f_int  nbytes;                 /* # of bytes in data area      */
00213         _f_int  bytealligned;           /* byte alligned flag           */
00214         long    sindx;                  /* source index                 */
00215         long    sindx2;                 /* source index                 */
00216         long    shindx;                 /* shift index                  */
00217         long    rindx;                  /* result array index           */
00218         long    rindx2;                 /* result array index           */
00219         long    bindx;                  /* boundary array index         */
00220         long    shft;                   /* shift value                  */
00221         _f_int  dim;                    /* dim value                    */
00222         _f_int  non_dim;                /* non-shift dim for 2x2        */
00223         long    curdim[MAXDIM-1 ];      /* current indices              */
00224         long    src_strd[MAXDIM-1];     /* index stride                 */
00225         long    src_ext[MAXDIM-1];      /* extents for source array     */
00226         long    src_off[MAXDIM-1];      /* source offset                */
00227         long    res_strd[MAXDIM-1];     /* index stride                 */
00228         long    res_off[MAXDIM-1];      /* result offset                */
00229         long    shft_strd[MAXDIM-1];    /* stride for shift array       */
00230         long    shft_off[MAXDIM-1];     /* shift offset                 */
00231         long    bnd_strd[MAXDIM-1];     /* stride for shift array       */
00232         long    bnd_off[MAXDIM-1];      /* shift offset                 */
00233         _f_int  rank;                   /* rank of source matrix        */
00234         _f_int  type;                   /* type                         */
00235         _f_int  subtype;                /* sub-type                     */
00236         _f_int  arithmetic;             /* arithmetic data type         */
00237         long    extent;                 /* extent temporary             */
00238         long    src_dim_strd;           /* stride for source index      */
00239         long    res_dim_strd;           /* stride for result index      */
00240         long    src_use_strd;           /* stride for source index      */
00241         long    res_use_strd;           /* stride for result index      */
00242         long    shft_dim;               /* shift dimension              */
00243         long    shft_dim_strd;          /* shift stride                 */
00244         _f_int  bnd_dim;                /* boundary dimension           */
00245         long    bnd_dim_strd;           /* boundary stride              */
00246         long    tot_ext;                /* total extent                 */
00247         long    shft_cnt;               /* shift count index            */
00248         long    src_tmp;                /* temporary value for src      */
00249         long    res_tmp;                /* temporary value for res      */
00250         _f_int1         bnd8;           /* boundary value holder        */
00251         _f_int2         bnd16;          /* boundary value holder        */
00252         _f_int4         bnd32;          /* boundary value holder        */
00253         _f_int8         bnd64;          /* boundary value holder        */
00254         _f_comp8        bnd128;         /* boundary value holder        */
00255 #ifdef _F_COMP16
00256         dblcmplx        bnd256;         /* boundary value holder        */
00257 #endif
00258         long    i, j, k, l;             /* index variables              */
00259 
00260 /*    Set type and dimension global variables   */
00261 
00262         rank = source->n_dim;
00263         type = source->type_lens.type;
00264 
00265 /*      Size calculation is based on variable type      */
00266 
00267         switch (type) {
00268             case DVTYPE_ASCII :
00269                 bytealligned = 1;
00270                 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */
00271                 subtype = DVSUBTYPE_CHAR;
00272                 arithmetic = 0;
00273                 break;
00274             case DVTYPE_DERIVEDBYTE :
00275                 bytealligned = 1;
00276                 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00277                 subtype = DVSUBTYPE_CHAR;
00278                 arithmetic = 0;
00279                 break;
00280             case DVTYPE_DERIVEDWORD :
00281                 bytealligned = 0;
00282                 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00283                 subtype = DVSUBTYPE_DERIVED;
00284                 arithmetic = 0;
00285                 break;
00286             default :
00287                 bytealligned = 0;
00288                 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00289                 if (source->type_lens.int_len == 64) {
00290                     subtype = DVSUBTYPE_BIT64;
00291                 } else if (source->type_lens.int_len == 32) {
00292                     subtype = DVSUBTYPE_BIT32;
00293                     bucketsize = 1;
00294                 } else if (source->type_lens.int_len == 256) {
00295                     subtype = DVSUBTYPE_BIT256;
00296                 } else {
00297                     subtype = DVSUBTYPE_BIT128;
00298                 }
00299                 arithmetic = 1;
00300         }
00301         shft_size = shift->type_lens.int_len / BITS_PER_WORD;
00302 #ifdef  _CRAYMPP
00303         if (shft_size == 0)
00304             shft_size = 1;
00305 #endif
00306 
00307 /*      If necessary, fill result dope vector   */
00308 
00309         if (!result->assoc) {
00310             result->base_addr.a.ptr = (void *) NULL;
00311             result->orig_base      = 0;
00312             result->orig_size      = 0;
00313 
00314 #ifdef _UNICOS
00315 #pragma _CRI shortloop
00316 #endif
00317         for (i = 0, tot_ext = bucketsize; i < rank; i++) {
00318             result->dimension[i].extent = source->dimension[i].extent;
00319             result->dimension[i].low_bound = 1;
00320             result->dimension[i].stride_mult = tot_ext;
00321             tot_ext *= result->dimension[i].extent;
00322         }
00323 
00324 /*      Determine size of space to allocate    */
00325 
00326             if (!bytealligned) {
00327                 nbytes = bucketsize * BYTES_PER_WORD;
00328 #ifdef _CRAYMPP
00329                 if (subtype == DVSUBTYPE_BIT32)
00330                     nbytes /= 2;
00331 #endif
00332             } else
00333                 nbytes = bucketsize;
00334 #ifdef _UNICOS
00335 #pragma _CRI shortloop
00336 #endif
00337             for (i = 0; i < rank; i++)
00338                 nbytes *= result->dimension[i].extent;
00339             if (nbytes > 0) {
00340                 result->base_addr.a.ptr = (void *) malloc(nbytes);
00341                 if (result->base_addr.a.ptr == NULL)
00342                     _lerror(_LELVL_ABORT, FENOMEMY);
00343             }
00344             result->orig_base = (void *) result->base_addr.a.ptr;
00345             result->orig_size = nbytes * BITS_PER_BYTE;
00346 
00347             result->assoc = 1;
00348             result->base_addr.a.el_len = source->base_addr.a.el_len;
00349             if (type == DVTYPE_ASCII) {
00350                 cr = (char *) result->base_addr.a.ptr;
00351                 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00352             }
00353         }
00354 
00355 /*
00356  *      Check to see if any of the matrices have size 0.  If any do,
00357  *      return without doing anything.
00358  */
00359 
00360 #ifdef _UNICOS
00361 #pragma _CRI     shortloop
00362 #endif
00363         for (i = 0; i < rank; i++) {
00364             if (!source->dimension[i].extent)
00365                 return;
00366         }
00367         if (result->assoc) {
00368 #ifdef _UNICOS
00369 #pragma _CRI     shortloop
00370 #endif
00371             for (i = 0; i < rank; i++) {
00372                 if (!result->dimension[i].extent)
00373                     return;
00374             }
00375         }
00376         if (shift->n_dim > 1) {
00377 #ifdef _UNICOS
00378 #pragma _CRI     shortloop
00379 #endif
00380             for (i = 0; i < rank-1; i++) {
00381                 if (!shift->dimension[i].extent)
00382                     return;
00383             }
00384         }
00385         if (boundary) {
00386             if (boundary->n_dim > 1) {
00387 #ifdef _UNICOS
00388 #pragma _CRI    shortloop
00389 #endif
00390                 for (i = 0; i < rank-1; i++) {
00391                     if (!boundary->dimension[i].extent)
00392                         return;
00393                 }
00394             }
00395         }
00396 
00397 /*      Set up scalar pointers to all of the argument data areas    */
00398 
00399         if (!bytealligned) {
00400             sptr = (void *) source->base_addr.a.ptr;
00401             rptr = (void *) result->base_addr.a.ptr;
00402             if (boundary)
00403                 bptr = (void *) boundary->base_addr.a.ptr;
00404         } else {
00405             if (type == DVTYPE_ASCII) {
00406                 cs = _fcdtocp (source->base_addr.charptr);
00407                 cr = _fcdtocp (result->base_addr.charptr);
00408                 if (boundary)
00409                     cb = _fcdtocp (boundary->base_addr.charptr);
00410             } else {
00411                 cs = (char *) source->base_addr.a.ptr;
00412                 cr = (char *) result->base_addr.a.ptr;
00413                 if (boundary)
00414                     cb = (char *) boundary->base_addr.a.ptr;
00415             }
00416         }
00417         shptr = (void *) shift->base_addr.a.ptr;
00418 
00419 /*      If dim argument is not present, set it to 1 (0 for C)   */
00420 
00421         if (dimp == NULL)
00422             dim = 0;
00423         else {
00424             if (*dimp < 1 || *dimp > rank)
00425                 _lerror (_LELVL_ABORT, FESCIDIM);
00426             dim = *dimp - 1;
00427         }
00428 
00429 /*      If source is a 1-dimensional array    */
00430 
00431         if (rank == 1) {
00432 
00433 /*
00434  *      In order to assist vectorization, the following value has been
00435  *      taken out of the array variable, and put into a scalar.
00436  */
00437 
00438             extent = source->dimension[0].extent;
00439 
00440 /*
00441  *      Get shift value which will be used for each loop iteration.
00442  *      Once the value is obtained, make sure that it is positive.  This
00443  *      will ensure that the final calculated value can only be positive,
00444  *      thus eliminating a test from the inner loop.
00445  */
00446 
00447             if (shift->type_lens.int_len == 64) {
00448                 i8ptr = (_f_int8 *) shptr;
00449                 shft = *i8ptr;
00450             } else {
00451                 i4ptr = (_f_int4 *) shptr;
00452                 shft = *i4ptr;
00453             }
00454             if (shft > extent)
00455                 shft = extent;
00456             else if (shft < -extent)
00457                 shft = -extent;
00458 
00459 /*
00460  *      The index calculation for each element of the source and result
00461  *      matrices make use of some 'shortcuts'.  These involved the use
00462  *      of some variables which contain information about indices.  One
00463  *      of these variables exists for each dimension of the source and
00464  *      result matrices.
00465  *
00466  *    strd -    stride value based in terms of elements rather than
00467  *              words.
00468  */
00469 
00470             if (bucketsize > 1 && arithmetic) {
00471                 src_dim_strd = source->dimension[0].stride_mult / bucketsize;
00472                 res_dim_strd = result->dimension[0].stride_mult / bucketsize;
00473             } else {
00474                 src_dim_strd = source->dimension[0].stride_mult;
00475                 res_dim_strd = result->dimension[0].stride_mult;
00476             }
00477 
00478 /*
00479  *      Each data type will be handled with its own inner loop.
00480  */
00481 
00482             switch (subtype) {
00483                 case DVSUBTYPE_CHAR :
00484                     if (shft >= 0) {
00485                         cptr1 = (char *) cs;
00486                         cptr2 = (char *) cr + (extent - shft) * bucketsize;
00487                         save_cptr2a = (char *) cr;
00488                         src_use_strd = src_dim_strd;
00489                         res_use_strd = res_dim_strd;
00490                     } else {
00491                         shft = -shft;
00492                         cptr1 = (char *) cs + ((extent - 1) * src_dim_strd);
00493                         cptr2 = (char *) cr + ((shft - 1) * res_dim_strd);
00494                         save_cptr2a = (char *) cr +
00495                                         ((extent - 1) * res_dim_strd);
00496                         src_use_strd = -src_dim_strd;
00497                         res_use_strd = -res_dim_strd;
00498                     }
00499                     if (boundary)
00500                         cptr3 = (char *) cb;
00501                     if (shft < extent)
00502                         shft_cnt = shft;
00503                     else
00504                         shft_cnt = extent;
00505                     for (i = 0; i < shft_cnt; i++) {
00506                         if (boundary)
00507                             (void) memcpy (cptr2, cptr3, bucketsize);
00508                         else
00509                             (void) memset (cptr2, ' ', bucketsize);
00510                         cptr1 += src_use_strd;
00511                         cptr2 += res_use_strd;
00512                     }
00513                     cptr2 = save_cptr2a;
00514                     for ( ; i < extent; i++) {
00515                         (void) memcpy (cptr2, cptr1, bucketsize);
00516                         cptr1 += src_use_strd;
00517                         cptr2 += res_use_strd;
00518                     }
00519 
00520                     break;
00521 
00522                 case DVSUBTYPE_DERIVED :
00523                     if (shft >= 0) {
00524                         save_fptr1 = (_f_int *) sptr;
00525                         save_fptr2a = (_f_int *) rptr +
00526                                         (extent-shft) * bucketsize;
00527                         save_fptr2b = (_f_int *) rptr;
00528                         src_use_strd = src_dim_strd;
00529                         res_use_strd = res_dim_strd;
00530                     } else {
00531                         shft = -shft;
00532                         save_fptr1 = (_f_int *) sptr +
00533                                         ((extent - 1) * src_dim_strd);
00534                         save_fptr2a = (_f_int *) rptr +
00535                                         ((shft - 1) * res_dim_strd);
00536                         save_fptr2b = (_f_int *) rptr +
00537                                         ((extent - 1) * res_dim_strd);
00538                         src_use_strd = -src_dim_strd;
00539                         res_use_strd = -res_dim_strd;
00540                     }
00541                     if (boundary)
00542                         fptr3 = (_f_int *) bptr;
00543                     if (shft < extent)
00544                         shft_cnt = shft;
00545                     else
00546                         shft_cnt = extent;
00547                     for (i = 0; i < bucketsize; i++) {
00548                         fptr1 = save_fptr1;
00549                         fptr2 = save_fptr2a;
00550                         for (j = 0; j < shft_cnt; j++) {
00551                             rindx = (j * res_use_strd) + i;
00552                             fptr2[rindx] = fptr3[i];
00553                         }
00554                         fptr2 = save_fptr2b;
00555                         for (k = 0; j < extent; j++, k++) {
00556                             sindx = (j * src_use_strd) + i;
00557                             rindx = (k * res_use_strd) + i;
00558                             fptr2[rindx] = fptr1[sindx];
00559                         }
00560                     }
00561                     break;
00562 
00563                 case DVSUBTYPE_BIT64 :
00564                     if (shft >= 0) {
00565                         uptr1 = (_f_int8 *) sptr;
00566                         save_uptr1 = uptr1;
00567                         uptr2 = (_f_int8 *) rptr +
00568                                 ((extent - shft) * res_dim_strd);
00569                         save_uptr2b = (_f_int8 *) rptr;
00570                         src_use_strd = src_dim_strd;
00571                         res_use_strd = res_dim_strd;
00572                     } else {
00573                         shft = -shft;
00574                         uptr1 = (_f_int8 *) sptr +
00575                                 ((extent - 1) * src_dim_strd);
00576                         save_uptr1 = uptr1;
00577                         uptr2 = (_f_int8 *) rptr +
00578                                 ((shft - 1) * res_dim_strd);
00579                         save_uptr2b = (_f_int8 *) rptr +
00580                                         ((extent - 1) * res_dim_strd);
00581                         src_use_strd = -src_dim_strd;
00582                         res_use_strd = -res_dim_strd;
00583                     }
00584                     if (shft < extent)
00585                         shft_cnt = shft;
00586                     else
00587                         shft_cnt = extent;
00588 
00589                     if (boundary) {
00590                         uptr3 = (_f_int8 *) bptr;
00591                         bnd64 = uptr3[0];
00592                     } else {
00593                         BIT64_DEFAULT();
00594                     }
00595 
00596                     for (i = 0; i < shft_cnt; i++) {
00597                         rindx = i * res_use_strd;
00598                         uptr2[rindx] = bnd64;
00599                     }
00600                     uptr2 = save_uptr2b;
00601 #ifndef CRAY2
00602                     for (j = 0; i < extent; i++, j++) {
00603                         sindx = i * src_use_strd;
00604                         rindx = j * res_use_strd;
00605                         uptr2[rindx] = uptr1[sindx];
00606                     }
00607 #else
00608                     uptr1 = save_uptr1 + (shft_cnt * res_use_strd);
00609                     shft_cnt = extent - shft_cnt;
00610                     memstride ( uptr2, res_use_strd,
00611                                 uptr1, src_use_strd, shft_cnt);
00612 #endif
00613                     break;
00614 
00615                 case DVSUBTYPE_BIT32 :
00616                     if (shft >= 0) {
00617                         hptr1 = (_f_int4 *) sptr;
00618                         hptr2 = (_f_int4 *) rptr +
00619                                 ((extent - shft) * res_dim_strd);
00620                         save_hptr = (_f_int4 *) rptr;
00621                         src_use_strd = src_dim_strd;
00622                         res_use_strd = res_dim_strd;
00623                     } else {
00624                         shft = -shft;
00625                         hptr1 = (_f_int4 *) sptr +
00626                                 ((extent - 1) * src_dim_strd);
00627                         hptr2 = (_f_int4 *) rptr +
00628                                 ((shft - 1) * res_dim_strd);
00629                         save_hptr = (_f_int4 *) rptr +
00630                                         ((extent - 1) * res_dim_strd);
00631                         src_use_strd = -src_dim_strd;
00632                         res_use_strd = -res_dim_strd;
00633                     }
00634                     if (shft < extent)
00635                         shft_cnt = shft;
00636                     else
00637                         shft_cnt = extent;
00638 
00639                     if (boundary) {
00640                         hptr3 = (_f_int4 *) bptr;
00641                         bnd32 = hptr3[0];
00642                     } else {
00643                         BIT32_DEFAULT();
00644                     }
00645 
00646                     for (i = 0; i < shft_cnt; i++) {
00647                         rindx = i * res_use_strd;
00648                         hptr2[rindx] = bnd32;
00649                     }
00650                     hptr2 = save_hptr;
00651                     for (j = 0; i < extent ; i++, j++) {
00652                         sindx = i * src_use_strd;
00653                         rindx = j * res_use_strd;
00654                         hptr2[rindx] = hptr1[sindx];
00655                     }
00656                     break;
00657 
00658                 case DVSUBTYPE_BIT128 :
00659                     if (shft >= 0) {
00660                         xptr1 = (_f_comp8 *) sptr;
00661                         xptr2 = (_f_comp8 *) rptr +
00662                                 ((extent - shft) * res_dim_strd);
00663                         save_xptr = (_f_comp8 *) rptr;
00664                         src_use_strd = src_dim_strd;
00665                         res_use_strd = res_dim_strd;
00666                     } else {
00667                         shft = -shft;
00668                         xptr1 = (_f_comp8 *) sptr +
00669                                 ((extent - 1) * src_dim_strd);
00670                         xptr2 = (_f_comp8 *) rptr +
00671                                 ((shft - 1) * res_dim_strd);
00672                         save_xptr = (_f_comp8 *) rptr +
00673                                         ((extent - 1) * res_dim_strd);
00674                         src_use_strd = -src_dim_strd;
00675                         res_use_strd = -res_dim_strd;
00676                     }
00677                     if (shft < extent)
00678                         shft_cnt = shft;
00679                     else
00680                         shft_cnt = extent;
00681 
00682                     if (boundary) {
00683                         xptr3 = (_f_comp8 *) bptr;
00684                         bnd128 = xptr3[0];
00685                     } else {
00686                         BIT128_DEFAULT();
00687                     }
00688 
00689                     for (i = 0; i < shft_cnt; i++) {
00690                         rindx = i * res_use_strd;
00691                         xptr2[rindx] = bnd128;
00692                     }
00693                     xptr2 = save_xptr;
00694                     for (j = 0; i < extent ; i++, j++) {
00695                         sindx = i * src_use_strd;
00696                         rindx = j * res_use_strd;
00697                         xptr2[rindx] = xptr1[sindx];
00698                     }
00699                     break;
00700 
00701 #ifdef _F_COMP16
00702                 case DVSUBTYPE_BIT256 :
00703                     if (shft >= 0) {
00704                         dxptr1 = (dblcmplx *) sptr;
00705                         dxptr2 = (dblcmplx *) rptr +
00706                                 ((extent - shft) * res_dim_strd);
00707                         save_dxptr = (dblcmplx *) rptr;
00708                         src_use_strd = src_dim_strd;
00709                         res_use_strd = res_dim_strd;
00710                     } else {
00711                         shft = -shft;
00712                         dxptr1 = (dblcmplx *) sptr +
00713                                 ((extent - 1) * src_dim_strd);
00714                         dxptr2 = (dblcmplx *) rptr +
00715                                 ((shft - 1) * res_dim_strd);
00716                         save_dxptr = (dblcmplx *) rptr +
00717                                         ((extent - 1) * res_dim_strd);
00718                         src_use_strd = -src_dim_strd;
00719                         res_use_strd = -res_dim_strd;
00720                     }
00721                     if (shft < extent)
00722                         shft_cnt = shft;
00723                     else
00724                         shft_cnt = extent;
00725 
00726                     if (boundary) {
00727                         dxptr3 = (dblcmplx *) bptr;
00728                         bnd256.re = dxptr3[0].re;
00729                         bnd256.im = dxptr3[0].im;
00730                     } else {
00731                         BIT256_DEFAULT();
00732                     }
00733                     for (i = 0; i < shft_cnt; i++) {
00734                         rindx = i * res_use_strd;
00735                         dxptr2[rindx].re = bnd256.re;
00736                         dxptr2[rindx].im = bnd256.im;
00737                     }
00738                     dxptr2 = save_dxptr;
00739                     for (j = 0; i < extent ; i++, j++) {
00740                         sindx = i * src_use_strd;
00741                         rindx = j * res_use_strd;
00742                         dxptr2[rindx].re = dxptr1[sindx].re;
00743                         dxptr2[rindx].im = dxptr1[sindx].im;
00744                     }
00745                     break;
00746 #endif
00747 
00748                 default:
00749                     _lerror(_LELVL_ABORT, FEINTDTY);
00750             }
00751 
00752 /*      Arrays with rank of 2   */
00753 
00754         } else if (rank == 2) {
00755 
00756 /*      Set dimension and non_dimension indices */
00757 
00758             if (dim == 0)
00759                 non_dim = 1;
00760             else
00761                 non_dim = 0;
00762 
00763 /*      Set up shortcut variables for both array dimensions    */
00764 
00765             if (bucketsize > 1 && arithmetic) {
00766                 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00767                 src_strd[1] = source->dimension[1].stride_mult / bucketsize;
00768                 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00769                 res_strd[1] = result->dimension[1].stride_mult / bucketsize;
00770                 if (boundary)
00771                     bnd_strd[0] = boundary->dimension[0].stride_mult/bucketsize;
00772             } else {
00773                 src_strd[0] = source->dimension[0].stride_mult;
00774                 src_strd[1] = source->dimension[1].stride_mult;
00775                 res_strd[0] = result->dimension[0].stride_mult;
00776                 res_strd[1] = result->dimension[1].stride_mult;
00777                 if (boundary)
00778                     bnd_strd[0] = boundary->dimension[0].stride_mult;
00779             }
00780             shft_strd[0] = shift->dimension[0].stride_mult / shft_size;
00781 
00782 /*      Put information about DIM index in scalars for vectorization */
00783 
00784             src_dim_strd = src_strd[dim];
00785             res_dim_strd = res_strd[dim];
00786             if (boundary)
00787                 bnd_dim_strd = bnd_strd[0];
00788 
00789 /*      Set up extent temporary variables    */
00790 
00791             extent = source->dimension[dim].extent;
00792 
00793 /*
00794  *      The following expressions are used repeatedly in this section, and
00795  *      can be put into scalar variables so that they do not have to be
00796  *      recalculated every time they are used.
00797  */
00798 
00799             src_tmp = (extent - 1) * src_strd[dim];
00800             res_tmp = (extent - 1) * res_strd[dim];
00801 
00802 /*      Set up shift variables          */
00803 
00804             if (shift->n_dim == 1) {
00805                 shft_dim = 1;
00806                 shft_dim_strd = shift->dimension[0].stride_mult / shft_size;
00807                 shindx = 0;
00808             } else {                    /* scalar value, only get it once */
00809                 shft_dim = 0;
00810             }
00811             if (shift->type_lens.int_len == 64) {
00812                 i8ptr = (_f_int8 *) shptr;
00813                 shft = i8ptr[0];
00814             } else {
00815                 i4ptr = (_f_int4 *) shptr;
00816                 shft = i4ptr[0];
00817             }
00818 
00819 /*      Set up boundary index   */
00820 
00821             if (boundary) {
00822                 bindx = 0;
00823                 if (boundary->n_dim > 0)
00824                     bnd_dim = 1;
00825                 else
00826                     bnd_dim = 0;
00827             }
00828 
00829 /*      Outer loop is for dimension not being shifted    */
00830 
00831             for (i = 0; i < source->dimension[non_dim].extent; i++) {
00832 
00833 /*      Get shift value if it is not a scalar   */
00834 
00835                 if (shift->type_lens.int_len == 64) {
00836                     if (shft_dim == 1) {
00837                         shft = i8ptr[shindx];
00838                         shindx += shft_dim_strd;
00839                     } else
00840                         shft = i8ptr[0];
00841                 } else {
00842                     if (shft_dim == 1) {
00843                         shft = i4ptr[shindx];
00844                         shindx += shft_dim_strd;
00845                     } else
00846                         shft = i4ptr[0];
00847                 }
00848                 if (shft > extent)
00849                     shft = extent;
00850                 else if (shft < -extent)
00851                     shft = -extent;
00852 
00853                 switch (subtype) {
00854                     case DVSUBTYPE_CHAR :
00855                         if (shft >= 0) {
00856                             cptr1 = (char *) cs + (i * src_strd[non_dim]);
00857                             cptr2 = (char *) cr + (i * res_strd[non_dim]) +
00858                                     ((extent - shft) * res_strd[dim]);
00859                             save_cptr2a = (char *) cr + (i * res_strd[non_dim]);
00860                             src_use_strd = src_dim_strd;
00861                             res_use_strd = res_dim_strd;
00862                         } else {
00863                             shft = -shft;
00864                             cptr1 = (char *) cs + (i * src_strd[non_dim]) +
00865                                         src_tmp;
00866                             cptr2 = ( char *) cr +
00867                                         (i * res_strd[non_dim]) +
00868                                         ((shft - 1) * res_strd[dim]);
00869                             save_cptr2a = (char *) cr + (i * res_strd[non_dim])
00870                                           + res_tmp;
00871                             src_use_strd = -src_dim_strd;
00872                             res_use_strd = -res_dim_strd;
00873                         }
00874                         if (boundary)
00875                             cptr3 = (char *) cb + bindx;
00876 
00877                         if (shft < extent)
00878                             shft_cnt = shft;
00879                         else
00880                             shft_cnt = extent;
00881                         for (j = 0; j < shft_cnt; j++) {
00882                             if (boundary)
00883                                 (void) memcpy (cptr2, cptr3, bucketsize);
00884                             else
00885                                 (void) memset (cptr2, ' ', bucketsize);
00886                             cptr1 += src_use_strd;
00887                             cptr2 += res_use_strd;
00888                         }
00889                         cptr2 = save_cptr2a;
00890                         for ( ; j < extent; j++) {
00891                             (void) memcpy (cptr2, cptr1, bucketsize);
00892                             cptr1 += src_use_strd;
00893                             cptr2 += res_use_strd;
00894                         }
00895                         break;
00896 
00897                     case DVSUBTYPE_DERIVED :
00898                         if (shft >= 0) {
00899                             save_fptr1 = (_f_int *) sptr +
00900                                           (i * src_strd[non_dim]);
00901                             save_fptr2a = (_f_int *) rptr +
00902                                           (i * res_strd[non_dim]) +
00903                                           ((extent - shft) * res_dim_strd);
00904                             save_fptr2b = (_f_int *) rptr +
00905                                           (i * res_strd[non_dim]);
00906                             src_use_strd = src_dim_strd;
00907                             res_use_strd = res_dim_strd;
00908                         } else {
00909                             shft = -shft;
00910                             save_fptr1 = (_f_int *) sptr +
00911                                           (i * src_strd[non_dim]) + src_tmp;
00912                             save_fptr2a = (_f_int *) rptr +
00913                                           (i * res_strd[non_dim]) +
00914                                           ((shft - 1) * res_strd[dim]);
00915                             save_fptr2b = (_f_int *) rptr +
00916                                           (i * src_strd[non_dim]) + res_tmp;
00917                             src_use_strd = -src_dim_strd;
00918                             res_use_strd = -res_dim_strd;
00919                         }
00920                         fptr3 = (_f_int *) bptr + bindx;
00921 
00922                         if (shft < extent)
00923                             shft_cnt = shft;
00924                         else
00925                             shft_cnt = extent;
00926 
00927                         for (j = 0; j < bucketsize; j++) {
00928                             fptr1 = save_fptr1;
00929                             fptr2 = save_fptr2a;
00930                             for (k = 0; k < shft_cnt; k++) {
00931                                 rindx = (k * res_use_strd) + j;
00932                                 fptr2[rindx] = fptr3[j];
00933                             }
00934                             fptr2 = save_fptr2b;
00935                             for (l = 0; k < extent; k++, l++) {
00936                                 sindx = (k * src_use_strd) + j;
00937                                 rindx = (l * res_use_strd) + j;
00938                                 fptr2[rindx] = fptr1[sindx];
00939                             }
00940                         }
00941                         break;
00942 
00943                     case DVSUBTYPE_BIT64 :
00944                         if (shft >= 0) {
00945                             uptr1 = (_f_int8 *) sptr +
00946                                         (i * src_strd[non_dim]);
00947                             save_uptr1 = uptr1;
00948                             uptr2 = (_f_int8 *) rptr +
00949                                         (i * res_strd[non_dim]) +
00950                                         ((extent - shft) * res_dim_strd);
00951                             save_uptr2b = (_f_int8 *) rptr +
00952                                         (i * res_strd[non_dim]);
00953                             src_use_strd = src_dim_strd;
00954                             res_use_strd = res_dim_strd;
00955                         } else {
00956                             shft = -shft;
00957                             uptr1 = (_f_int8 *) sptr +
00958                                         (i * src_strd[non_dim]) + src_tmp;
00959                             save_uptr1 = uptr1;
00960                             uptr2 = (_f_int8 *) rptr +
00961                                         (i * res_strd[non_dim]) +
00962                                         ((shft - 1) * res_strd[dim]);
00963                             save_uptr2b = (_f_int8 *) rptr +
00964                                           (i * res_strd[non_dim]) + res_tmp;
00965                             src_use_strd = -src_dim_strd;
00966                             res_use_strd = -res_dim_strd;
00967                         }
00968 
00969                         if (shft < extent)
00970                             shft_cnt = shft;
00971                         else
00972                             shft_cnt = extent;
00973 
00974                         if (boundary) {
00975                             uptr3 = (_f_int8 *) bptr + bindx;
00976                             bnd64 = uptr3[0];
00977                         } else {
00978                             BIT64_DEFAULT();
00979                         }
00980 
00981                         for (j = 0; j < shft_cnt; j++) {
00982                             rindx = (j * res_use_strd);
00983                             uptr2[rindx] = bnd64;
00984                         }
00985                         uptr2 = save_uptr2b;
00986 #ifndef CRAY2
00987                         for (k = 0; j < extent; j++, k++) {
00988                             sindx = (j * src_use_strd);
00989                             rindx = (k * res_use_strd);
00990                             uptr2[rindx] = uptr1[sindx];
00991                         }
00992 #else
00993                     uptr1 = save_uptr1 + (shft_cnt * src_use_strd);
00994                     shft_cnt = extent - shft_cnt;
00995                     memstride ( uptr2, res_use_strd,
00996                                 uptr1, src_use_strd, shft_cnt);
00997 #endif
00998                         break;
00999 
01000                     case DVSUBTYPE_BIT32 :
01001                         if (shft >= 0) {
01002                             hptr1 = (_f_int4 *) sptr +
01003                                         (i * src_strd[non_dim]);
01004                             hptr2 = (_f_int4 *) rptr +
01005                                         (i * res_strd[non_dim]) +
01006                                         ((extent - shft) * res_dim_strd);
01007                             save_hptr = (_f_int4 *) rptr +
01008                                         (i * res_strd[non_dim]);
01009                             src_use_strd = src_dim_strd;
01010                             res_use_strd = res_dim_strd;
01011                         } else {
01012                             shft = -shft;
01013                             hptr1 = (_f_int4 *) sptr +
01014                                         (i * src_strd[non_dim]) + src_tmp;
01015                             hptr2 = (_f_int4 *) rptr +
01016                                         (i * res_strd[non_dim]) +
01017                                         ((shft - 1) * res_strd[dim]);
01018                             save_hptr = (_f_int4 *) rptr +
01019                                         (i * res_strd[non_dim]) + res_tmp;
01020                             src_use_strd = -src_dim_strd;
01021                             res_use_strd = -res_dim_strd;
01022                         }
01023 
01024                         if (shft < extent)
01025                             shft_cnt = shft;
01026                         else
01027                             shft_cnt = extent;
01028 
01029                         if (boundary) {
01030                             hptr3 = (_f_int4 *) bptr + bindx;
01031                             bnd32 = hptr3[0];
01032                         } else {
01033                             BIT32_DEFAULT();
01034                         }
01035 
01036                         for (j = 0; j < shft_cnt; j++) {
01037                             rindx = j * res_use_strd;
01038                             hptr2[rindx] = bnd32;
01039                         }
01040                         hptr2 = save_hptr;
01041                         for (k = 0; j < extent; j++, k++) {
01042                             sindx = j * src_use_strd;
01043                             rindx = k * res_use_strd;
01044                             hptr2[rindx] = hptr1[sindx];
01045                         }
01046                         break;
01047 
01048                     case DVSUBTYPE_BIT128 :
01049                         if (shft >= 0) {
01050                             xptr1 = (_f_comp8 *) sptr +
01051                                         (i * src_strd[non_dim]);
01052                             xptr2 = (_f_comp8 *) rptr +
01053                                         (i * res_strd[non_dim]) +
01054                                         ((extent - shft) * res_dim_strd);
01055                             save_xptr = (_f_comp8 *) rptr +
01056                                         (i * res_strd[non_dim]);
01057                             src_use_strd = src_dim_strd;
01058                             res_use_strd = res_dim_strd;
01059                         } else {
01060                             shft = -shft;
01061                             xptr1 = (_f_comp8 *) sptr +
01062                                         (i * src_strd[non_dim]) + src_tmp;
01063                             xptr2 = (_f_comp8 *) rptr +
01064                                         (i * res_strd[non_dim]) +
01065                                         ((shft - 1) * res_strd[dim]);
01066                             save_xptr = (_f_comp8 *) rptr +
01067                                         (i * res_strd[non_dim]) + res_tmp;
01068                             src_use_strd = -src_dim_strd;
01069                             res_use_strd = -res_dim_strd;
01070                         }
01071 
01072                         if (shft < extent)
01073                             shft_cnt = shft;
01074                         else
01075                             shft_cnt = extent;
01076 
01077                         if (boundary) {
01078                             xptr3 = (_f_comp8 *) bptr + bindx;
01079                             bnd128 = xptr3[0];
01080                         } else {
01081                             BIT128_DEFAULT();
01082                         }
01083 
01084                         for (j = 0; j < shft_cnt; j++) {
01085                             rindx = j * res_use_strd;
01086                             xptr2[rindx] = bnd128;
01087                         }
01088                         xptr2 = save_xptr;
01089                         for (k = 0; j < extent; j++, k++) {
01090                             sindx = j * src_use_strd;
01091                             rindx = k * res_use_strd;
01092                             xptr2[rindx] = xptr1[sindx];
01093                         }
01094                         break;
01095 
01096 #ifdef _F_COMP16
01097                     case DVSUBTYPE_BIT256 :
01098                         if (shft >= 0) {
01099                             dxptr1 = (dblcmplx *) sptr +
01100                                         (i * src_strd[non_dim]);
01101                             dxptr2 = (dblcmplx *) rptr +
01102                                         (i * res_strd[non_dim]) +
01103                                         ((extent - shft) * res_dim_strd);
01104                             save_dxptr = (dblcmplx *) rptr +
01105                                         (i * res_strd[non_dim]);
01106                             src_use_strd = src_dim_strd;
01107                             res_use_strd = res_dim_strd;
01108                         } else {
01109                             shft = -shft;
01110                             dxptr1 = (dblcmplx *) sptr +
01111                                         (i * src_strd[non_dim]) + src_tmp;
01112                             dxptr2 = (dblcmplx *) rptr +
01113                                         (i * res_strd[non_dim]) +
01114                                         ((shft - 1) * res_strd[dim]);
01115                             save_dxptr = (dblcmplx *) rptr +
01116                                         (i * res_strd[non_dim]) + res_tmp;
01117                             src_use_strd = -src_dim_strd;
01118                             res_use_strd = -res_dim_strd;
01119                         }
01120 
01121                         if (shft < extent)
01122                             shft_cnt = shft;
01123                         else
01124                             shft_cnt = extent;
01125 
01126                         if (boundary) {
01127                             dxptr3 = (dblcmplx *) bptr + bindx;
01128                             bnd256.re = dxptr3[0].re;
01129                             bnd256.im = dxptr3[0].im;
01130                         } else {
01131                             BIT256_DEFAULT();
01132                         }
01133 
01134                         for (j = 0; j < shft_cnt; j++) {
01135                             rindx = j * res_use_strd;
01136                             dxptr2[rindx].re = bnd256.re;
01137                             dxptr2[rindx].im = bnd256.im;
01138                         }
01139                         dxptr2 = save_dxptr;
01140                         for (k = 0; j < extent; j++, k++) {
01141                             sindx = j * src_use_strd;
01142                             rindx = k * res_use_strd;
01143                             dxptr2[rindx].re = dxptr1[sindx].re;
01144                             dxptr2[rindx].im = dxptr1[sindx].im;
01145                         }
01146                         break;
01147 #endif
01148 
01149                     default :
01150                         _lerror(_LELVL_ABORT, FEINTDTY);
01151                 }
01152 
01153 /*      Increment boundary stride index         */
01154 
01155                 if (bnd_dim)
01156                     bindx += bnd_dim_strd;
01157             }
01158 
01159 /*      Arrays with rank of 3-7 */
01160 
01161         } else {
01162 
01163             if (dim == 0) {
01164                 i = 0;
01165                 tot_ext = 1;
01166             } else
01167 #ifdef _UNICOS
01168 #pragma _CRI    shortloop
01169 #endif
01170             for (i = 0, tot_ext = 1; i < dim; i++) {
01171                 tot_ext *= source->dimension[i].extent;
01172                 src_ext[i] = source->dimension[i].extent;
01173                 if (bucketsize > 1 && arithmetic) {
01174                     src_strd[i] = source->dimension[i].stride_mult / bucketsize;
01175                     res_strd[i] = result->dimension[i].stride_mult / bucketsize;
01176                     if (boundary)
01177                         bnd_strd[i] =
01178                             boundary->dimension[i].stride_mult / bucketsize;
01179                     else
01180                         bnd_strd[i] = 0;
01181                 } else {
01182                     src_strd[i] = source->dimension[i].stride_mult;
01183                     res_strd[i] = result->dimension[i].stride_mult;
01184                     if (boundary)
01185                         bnd_strd[i] = boundary->dimension[i].stride_mult;
01186                     else
01187                         bnd_strd[i] = 0;
01188                 }
01189             }
01190             if (i < (rank - 1))
01191 #ifdef _UNICOS
01192 #pragma _CRI    shortloop
01193 #endif
01194               for ( ; i < rank-1; i++) {
01195                 tot_ext *= source->dimension[i+1].extent;
01196                 src_ext[i] = source->dimension[i+1].extent;
01197                 if (bucketsize > 1 && arithmetic) {
01198                     src_strd[i] = source->dimension[i+1].stride_mult/bucketsize;
01199                     res_strd[i] = result->dimension[i+1].stride_mult/bucketsize;
01200                     if (boundary)
01201                         bnd_strd[i] =
01202                             boundary->dimension[i].stride_mult / bucketsize;
01203                     else
01204                         bnd_strd[i] = 0;
01205                 } else {
01206                     src_strd[i] = source->dimension[i+1].stride_mult;
01207                     res_strd[i] = result->dimension[i+1].stride_mult;
01208                     if (boundary)
01209                         bnd_strd[i] = boundary->dimension[i].stride_mult;
01210                     else
01211                         bnd_strd[i] = 0;
01212                 }
01213               }
01214 
01215 /*      Initialize all counters to 0    */
01216 
01217 #ifdef _UNICOS
01218 #pragma _CRI    shortloop
01219 #endif
01220             for (i = 0; i < MAXDIM-1; i++) {
01221                 src_off[i] = 0;
01222                 res_off[i] = 0;
01223                 bnd_off[i] = 0;
01224                 shft_off[i] = 0;
01225                 shft_strd[i] = 0;
01226                 curdim[i] = 0;
01227             }
01228 
01229 /*      Set up some scalars which contain information about DIM    */
01230 
01231             extent = source->dimension[dim].extent;
01232 
01233             if (bucketsize > 1 && arithmetic) {
01234                 src_dim_strd = source->dimension[dim].stride_mult / bucketsize;
01235                 res_dim_strd = result->dimension[dim].stride_mult / bucketsize;
01236             } else {
01237                 src_dim_strd = source->dimension[dim].stride_mult;
01238                 res_dim_strd = result->dimension[dim].stride_mult;
01239             }
01240 
01241 /*
01242  *      The following expressions are used throughout this loop.  They can
01243  *      be put into scalars to avoid recalculating them every time they are
01244  *      used.
01245  */
01246 
01247             src_tmp = (extent - 1) * src_dim_strd;
01248             res_tmp = (extent - 1) * res_dim_strd;
01249 
01250 /*      Set up the shift variables              */
01251 
01252             if (shift->n_dim == 0) {
01253                 shft_dim = 0;
01254             } else {
01255                 shft_dim = 1;
01256                 shindx = 0;
01257 #ifdef _UNICOS
01258 #pragma _CRI    shortloop
01259 #endif
01260                 for (i = 0; i < rank-1; i++) {
01261                     shft_strd[i] = shift->dimension[i].stride_mult / shft_size;
01262                 }
01263             }
01264 
01265 /*      Initialize correct pointer type to shift data   */
01266 
01267             if (shift->type_lens.int_len == 64) {
01268                 i8ptr = (_f_int8 *) shptr;
01269                 shft = i8ptr[0];
01270             } else {
01271                 i4ptr = (_f_int4 *) shptr;
01272                 shft = i4ptr[0];
01273             }
01274 
01275 /*      Set up boundary values  */
01276 
01277             if (boundary) {
01278                 bindx = 0;
01279                 if (boundary->n_dim > 0)
01280                     bnd_dim = 1;
01281                 else
01282                     bnd_dim = 0;
01283             }
01284 
01285 /*
01286  *      The outer loop will be executed once for each combination of
01287  *      indices not including the DIM dimension.
01288  */
01289 
01290             for (i = 0; i < tot_ext; i++) {
01291 
01292 /*      Calculate the shift value used throughout the inner loop    */
01293 
01294                 if (shft_dim) {
01295                     switch (rank) {
01296                         case 3 :
01297                             shindx = shft_off[0] + shft_off[1];
01298                             break;
01299                         case 4 :
01300                             shindx = shft_off[0] + shft_off[1] + shft_off[2];
01301                             break;
01302                         case 5 :
01303                             shindx = shft_off[0] + shft_off[1] +
01304                                      shft_off[2] + shft_off[3];
01305                             break;
01306                         case 6 :
01307                             shindx = shft_off[0] + shft_off[1] + shft_off[2] +
01308                                      shft_off[3] + shft_off[4];
01309                             break;
01310                         default :
01311                             shindx = shft_off[0] + shft_off[1] + shft_off[2] +
01312                                      shft_off[3] + shft_off[4] + shft_off[5];
01313                     }
01314                     if (shift->type_lens.int_len == 64) 
01315                         shft = i8ptr[shindx];
01316                     else
01317                         shft = i4ptr[shindx];
01318                 } else
01319                     if (shift->type_lens.int_len == 64) 
01320                         shft = i8ptr[0];
01321                     else
01322                         shft = i4ptr[0];
01323                 if (shft > extent)
01324                     shft = extent;
01325                 else if (shft < -extent)
01326                     shft = -extent;
01327 
01328 /*      If necessary, get boundary index        */
01329 
01330                 if (bnd_dim) {
01331                     switch (rank) {
01332                         case 3 :
01333                             bindx = bnd_off[0] + bnd_off[1];
01334                             break;
01335                         case 4 :
01336                             bindx = bnd_off[0] + bnd_off[1] + bnd_off[2];
01337                             break;
01338                         case 5 :
01339                             bindx = bnd_off[0] + bnd_off[1] +
01340                                      bnd_off[2] + bnd_off[3];
01341                             break;
01342                         case 6 :
01343                             bindx = bnd_off[0] + bnd_off[1] + bnd_off[2] +
01344                                      bnd_off[3] + bnd_off[4];
01345                             break;
01346                         default :
01347                             bindx = bnd_off[0] + bnd_off[1] + bnd_off[2] +
01348                                      bnd_off[3] + bnd_off[4] + bnd_off[5];
01349                     }
01350                 }
01351 
01352 /*      Calculate source and result index values        */
01353 
01354                 switch (rank) {
01355                     case 3 :
01356                         sindx = src_off[0] + src_off[1];
01357                         rindx = res_off[0] + res_off[1];
01358                         break;
01359                     case 4 :
01360                         sindx = src_off[0] + src_off[1] + src_off[2];
01361                         rindx = res_off[0] + res_off[1] + res_off[2];
01362                         break;
01363                     case 5 :
01364                         sindx = src_off[0] + src_off[1] +
01365                                 src_off[2] + src_off[3];
01366                         rindx = res_off[0] + res_off[1] +
01367                                 res_off[2] + res_off[3];
01368                         break;
01369                     case 6 :
01370                         sindx = src_off[0] + src_off[1] + src_off[2] +
01371                                 src_off[3] + src_off[4];
01372                         rindx = res_off[0] + res_off[1] + res_off[2] +
01373                                 res_off[3] + res_off[4];
01374                         break;
01375                     default :
01376                         sindx = src_off[0] + src_off[1] + src_off[2] +
01377                                 src_off[3] + src_off[4] + src_off[5];
01378                         rindx = res_off[0] + res_off[1] + res_off[2] +
01379                                 res_off[3] + res_off[4] + res_off[5];
01380                 }
01381 
01382                 switch (subtype) {
01383                     case DVSUBTYPE_CHAR :
01384                         if (shft >= 0) {
01385                             cptr1 = (char *) cs + sindx;
01386                             save_cptr1 = cptr1;
01387                             cptr2 = (char *) cr + rindx +
01388                                     ((extent - shft) * res_dim_strd);
01389                             save_cptr2a = cptr2;
01390                             save_cptr2b = (char *) cr + rindx;
01391                             src_use_strd = src_dim_strd;
01392                             res_use_strd = res_dim_strd;
01393                         } else {
01394                             shft = -shft;
01395                             cptr1 = (char *) cs + sindx + src_tmp;
01396                             save_cptr1 = cptr1;
01397                             cptr2 = (char *) cr + rindx +
01398                                         ((shft - 1) * res_dim_strd);
01399                             save_cptr2a = cptr2;
01400                             save_cptr2b = (char *) cr + rindx + res_tmp;
01401                             src_use_strd = -src_dim_strd;
01402                             res_use_strd = -res_dim_strd;
01403                         }
01404 
01405                         if (boundary)
01406                             cptr3 = (char *) cb + bindx;
01407 
01408                         if (shft < extent)
01409                             shft_cnt = shft;
01410                         else
01411                             shft_cnt = extent;
01412                         for (j = 0; j < shft_cnt; j++) {
01413                             cptr2 = save_cptr2a + (j * res_use_strd);
01414                             if (boundary)
01415                                 (void) memcpy (cptr2, cptr3, bucketsize);
01416                             else
01417                                 (void) memset (cptr2, ' ', bucketsize);
01418                         }
01419                         cptr2 = save_cptr2b;
01420                         for (k = 0; j < extent; j++, k++) {
01421                             cptr1 = save_cptr1 + (j * src_use_strd);
01422                             cptr2 = save_cptr2b + (k * res_use_strd);
01423                             (void) memcpy (cptr2, cptr1, bucketsize);
01424                         }
01425                         break;
01426 
01427                     case DVSUBTYPE_DERIVED :
01428                         if (shft >= 0) {
01429                             save_fptr1 = (_f_int *) sptr + sindx;
01430                             save_fptr2a = (_f_int *) rptr + rindx +
01431                                 ((extent - shft) * res_dim_strd);
01432                             save_fptr2b = (_f_int *) rptr + rindx;
01433                             src_use_strd = src_dim_strd;
01434                             res_use_strd = res_dim_strd;
01435                         } else {
01436                             shft = -shft;
01437                             save_fptr1 = (_f_int *) sptr + sindx +
01438                                                 src_tmp;
01439                             save_fptr2a = (_f_int *) rptr + rindx +
01440                                           ((shft - 1) * res_dim_strd);
01441                             save_fptr2b = (_f_int *) rptr + rindx +
01442                                                 res_tmp;
01443                             src_use_strd = -src_dim_strd;
01444                             res_use_strd = -res_dim_strd;
01445                         }
01446                         fptr3 = (_f_int *) bptr + bindx;
01447 
01448                         if (shft < extent)
01449                             shft_cnt = shft;
01450                         else
01451                             shft_cnt = extent;
01452 
01453                         for (j = 0; j < bucketsize; j++) {
01454                             fptr1 = save_fptr1;
01455                             fptr2 = save_fptr2a;
01456                             for (k = 0; k < shft_cnt; k++) {
01457                                 rindx2 = (k * res_use_strd) + j;
01458                                 fptr2[rindx2] = fptr3[j];
01459                             }
01460                             fptr2 = save_fptr2b;
01461                             for (l = 0; k < extent; k++, l++) {
01462                                 sindx2 = (k * src_use_strd) + j;
01463                                 rindx2 = (l * res_use_strd) + j;
01464                                 fptr2[rindx2] = fptr1[sindx2];
01465                             }
01466                         }
01467                         break;
01468 
01469                     case DVSUBTYPE_BIT64 :
01470                         if (shft >= 0) {
01471                             uptr1 = (_f_int8 *) sptr + sindx;
01472                             save_uptr1 = uptr1;
01473                             uptr2 = (_f_int8 *) rptr + rindx +
01474                                         ((extent - shft) * res_dim_strd);
01475                             save_uptr2b = (_f_int8 *) rptr + rindx;
01476                             src_use_strd = src_dim_strd;
01477                             res_use_strd = res_dim_strd;
01478                         } else {
01479                             shft = -shft;
01480                             uptr1 = (_f_int8 *) sptr + sindx + src_tmp;
01481                             save_uptr1 = uptr1;
01482                             uptr2 = (_f_int8 *) rptr + rindx +
01483                                         ((shft - 1) * res_dim_strd);
01484                             save_uptr2b = (_f_int8 *) rptr + rindx +
01485                                                 res_tmp;
01486                             src_use_strd = -src_dim_strd;
01487                             res_use_strd = -res_dim_strd;
01488                         }
01489 
01490                         if (shft < extent)
01491                             shft_cnt = shft;
01492                         else
01493                             shft_cnt = extent;
01494 
01495                         if (boundary) {
01496                             uptr3 = (_f_int8 *) bptr + bindx;
01497                             bnd64 = uptr3[0];
01498                         } else {
01499                             BIT64_DEFAULT();
01500                         }
01501 
01502                         for (j = 0; j < shft_cnt; j++) {
01503                             rindx2 = j * res_use_strd;
01504                             uptr2[rindx2] = bnd64;
01505                         }
01506                         uptr2 = save_uptr2b;
01507 #ifndef CRAY2
01508                         for (k = 0; j < extent; j++, k++) {
01509                             sindx2 = j * src_use_strd;
01510                             rindx2 = k * res_use_strd;
01511                             uptr2[rindx2] = uptr1[sindx2];
01512                         }
01513 #else
01514                         uptr1 = save_uptr1 + (shft_cnt * src_use_strd);
01515                         shft_cnt = extent - shft_cnt;
01516                         memstride ( uptr2, res_use_strd,
01517                                 uptr1, src_use_strd, shft_cnt);
01518 #endif
01519                         break;
01520 
01521                     case DVSUBTYPE_BIT32 :
01522                         if (shft >= 0) {
01523                             hptr1 = (_f_int4 *) sptr + sindx;
01524                             hptr2 = (_f_int4 *) rptr + rindx +
01525                                         ((extent - shft) * res_dim_strd);
01526                             save_hptr = (_f_int4 *) rptr + rindx;
01527                             src_use_strd = src_dim_strd;
01528                             res_use_strd = res_dim_strd;
01529                         } else {
01530                             shft = -shft;
01531                             hptr1 = (_f_int4 *) sptr + sindx + src_tmp;
01532                             hptr2 = (_f_int4 *) rptr + rindx +
01533                                         ((shft - 1) * res_dim_strd);
01534                             save_hptr = (_f_int4 *) rptr + rindx + res_tmp;
01535                             src_use_strd = -src_dim_strd;
01536                             res_use_strd = -res_dim_strd;
01537                         }
01538 
01539                         if (shft < extent)
01540                             shft_cnt = shft;
01541                         else
01542                             shft_cnt = extent;
01543 
01544                         if (boundary) {
01545                             hptr3 = (_f_int4 *) bptr + bindx;
01546                             bnd32 = hptr3[0];
01547                         } else {
01548                             BIT32_DEFAULT();
01549                         }
01550 
01551                         for (j = 0; j < shft_cnt; j++) {
01552                             rindx2 = j * res_use_strd;
01553                             hptr2[rindx2] = bnd32;
01554                         }
01555                         hptr2 = save_hptr;
01556                         for (k = 0; j < extent; j++, k++) {
01557                             sindx2 = j * src_use_strd;
01558                             rindx2 = k * res_use_strd;
01559                             hptr2[rindx2] = hptr1[sindx2];
01560                         }
01561                         break;
01562 
01563                     case DVSUBTYPE_BIT128 :
01564                         if (shft >= 0) {
01565                             xptr1 = (_f_comp8 *) sptr + sindx;
01566                             xptr2 = (_f_comp8 *) rptr + rindx +
01567                                         ((extent - shft) * res_dim_strd);
01568                             save_xptr = (_f_comp8 *) rptr + rindx;
01569                             src_use_strd = src_dim_strd;
01570                             res_use_strd = res_dim_strd;
01571                         } else {
01572                             shft = -shft;
01573                             xptr1 = (_f_comp8 *) sptr + sindx + src_tmp;
01574                             xptr2 = (_f_comp8 *) rptr + rindx +
01575                                         ((shft - 1) * res_dim_strd);
01576                             save_xptr = (_f_comp8 *) rptr + rindx + res_tmp;
01577                             src_use_strd = -src_dim_strd;
01578                             res_use_strd = -res_dim_strd;
01579                         }
01580 
01581                         if (shft < extent)
01582                             shft_cnt = shft;
01583                         else
01584                             shft_cnt = extent;
01585 
01586                         if (boundary) {
01587                             xptr3 = (_f_comp8 *) bptr + bindx;
01588                             bnd128 = xptr3[0];
01589                         } else {
01590                             BIT128_DEFAULT();
01591                         }
01592 
01593                         for (j = 0; j < shft_cnt; j++) {
01594                             rindx2 = j * res_use_strd;
01595                             xptr2[rindx2] = bnd128;
01596                         }
01597                         xptr2 = save_xptr;
01598                         for (k = 0; j < extent; j++, k++) {
01599                             sindx2 = j * src_use_strd;
01600                             rindx2 = k * res_use_strd;
01601                             xptr2[rindx2] = xptr1[sindx2];
01602                         }
01603                         break;
01604 
01605 #ifdef _F_COMP16
01606                     case DVSUBTYPE_BIT256 :
01607                         if (shft >= 0) {
01608                             dxptr1 = (dblcmplx *) sptr + sindx;
01609                             dxptr2 = (dblcmplx *) rptr + rindx +
01610                                         ((extent - shft) * res_dim_strd);
01611                             save_dxptr = (dblcmplx *) rptr + rindx;
01612                             src_use_strd = src_dim_strd;
01613                             res_use_strd = res_dim_strd;
01614                         } else {
01615                             shft = -shft;
01616                             dxptr1 = (dblcmplx *) sptr + sindx + src_tmp;
01617                             dxptr2 = (dblcmplx *) rptr + rindx +
01618                                         ((shft - 1) * res_dim_strd);
01619                             save_dxptr = (dblcmplx *) rptr + rindx + res_tmp;
01620                             src_use_strd = -src_dim_strd;
01621                             res_use_strd = -res_dim_strd;
01622                         }
01623 
01624                         if (shft < extent)
01625                             shft_cnt = shft;
01626                         else
01627                             shft_cnt = extent;
01628 
01629                         if (boundary) {
01630                             dxptr3 = (dblcmplx *) bptr + bindx;
01631                             bnd256.re = dxptr3[0].re;
01632                             bnd256.im = dxptr3[0].im;
01633                         } else {
01634                             BIT256_DEFAULT();
01635                         }
01636 
01637                         for (j = 0; j < shft_cnt; j++) {
01638                             rindx2 = j * res_use_strd;
01639                             dxptr2[rindx2].re = bnd256.re;
01640                             dxptr2[rindx2].im = bnd256.im;
01641                         }
01642                         dxptr2 = save_dxptr;
01643                         for (k = 0; j < extent; j++, k++) {
01644                             sindx2 = j * src_use_strd;
01645                             rindx2 = k * res_use_strd;
01646                             dxptr2[rindx2].re = dxptr1[sindx2].re;
01647                             dxptr2[rindx2].im = dxptr1[sindx2].im;
01648                         }
01649                         break;
01650 #endif
01651 
01652                     default :
01653                         _lerror(_LELVL_ABORT, FEINTDTY);
01654                 }
01655 
01656 /*      Increment the current dimension counter.    */
01657 
01658                 curdim[0]++;
01659                 if (curdim[0] < src_ext[0]) {
01660                     src_off[0] += src_strd[0];
01661                     res_off[0] += res_strd[0];
01662                     shft_off[0] += shft_strd[0];
01663                     bnd_off[0] += bnd_strd[0];
01664                 } else {
01665                     curdim[0] = 0;
01666                     src_off[0] = 0;
01667                     res_off[0] = 0;
01668                     shft_off[0] = 0;
01669                     bnd_off[0] = 0;
01670                     curdim[1]++;
01671                     if (curdim[1] < src_ext[1]) {
01672                         src_off[1] += src_strd[1];
01673                         res_off[1] += res_strd[1];
01674                         shft_off[1] += shft_strd[1];
01675                         bnd_off[1] += bnd_strd[1];
01676                     } else {
01677                         curdim[1] = 0;
01678                         src_off[1] = 0;
01679                         res_off[1] = 0;
01680                         shft_off[1] = 0;
01681                         bnd_off[1] = 0;
01682                         curdim[2]++;
01683                         if (curdim[2] < src_ext[2]) {
01684                             src_off[2] += src_strd[2];
01685                             res_off[2] += res_strd[2];
01686                             shft_off[2] += shft_strd[2];
01687                             bnd_off[2] += bnd_strd[2];
01688                         } else {
01689                             curdim[2] = 0;
01690                             src_off[2] = 0;
01691                             res_off[2] = 0;
01692                             shft_off[2] = 0;
01693                             bnd_off[2] = 0;
01694                             curdim[3]++;
01695                             if (curdim[3] < src_ext[3]) {
01696                                 src_off[3] += src_strd[3];
01697                                 res_off[3] += res_strd[3];
01698                                 shft_off[3] += shft_strd[3];
01699                                 bnd_off[3] += bnd_strd[3];
01700                             } else {
01701                                 curdim[3] = 0;
01702                                 src_off[3] = 0;
01703                                 res_off[3] = 0;
01704                                 shft_off[3] = 0;
01705                                 bnd_off[3] = 0;
01706                                 curdim[4]++;
01707                                 if (curdim[4] < src_ext[4]) {
01708                                     src_off[4] += src_strd[4];
01709                                     res_off[4] += res_strd[4];
01710                                     shft_off[4] += shft_strd[4];
01711                                     bnd_off[4] += bnd_strd[4];
01712                                 } else {
01713                                     curdim[4] = 0;
01714                                     src_off[4] = 0;
01715                                     res_off[4] = 0;
01716                                     shft_off[4] = 0;
01717                                     bnd_off[4] = 0;
01718                                     curdim[5]++;
01719                                     if (curdim[5] < src_ext[5]) {
01720                                         src_off[5] += src_strd[5];
01721                                         res_off[5] += res_strd[5];
01722                                         shft_off[5] += shft_strd[5];
01723                                         bnd_off[5] += bnd_strd[5];
01724                                     }
01725                                 }
01726                             }
01727                         }
01728                     }
01729                 }
01730             }
01731         }
01732 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines