OpenAD/Open64Dox/cshift_8c_source.html

00001 /*
00002
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License
00007   as published by the Free Software Foundation.
00008
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement
00015   or the like.  Any license provided herein, whether implied or
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with
00018   other software, or any other product whatsoever.
00019
00020   You should have received a copy of the GNU Lesser General Public
00021   License along with this program; if not, write the Free Software
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
00023   USA.
00024
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027
00028   http://www.sgi.com
00029
00030   For further information regarding this notice, see:
00031
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033
00034 */
00035
00036
00037 #pragma ident "@(#) libfi/array/cshift.c        92.1    07/07/99 15:52:02"
00038
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044
00045 /*
00046  *    Cshift function.  Perform a circular shift of an array
00047  *    expression of rank one or perform circular shifts on all
00048  *    complete rank one sections along a given dimension of an array
00049  *    expression of rank two or greater.  Elements shifted out at one
00050  *    end of a section are shifted in at the other end.  Different
00051  *    sections may be shifted by different amounts in different
00052  *    directions.
00053  */
00054
00055 #ifdef _UNICOS
00056 #pragma _CRI duplicate _CSHIFT as CSHIFT@
00057 #endif
00058 void
00059 _CSHIFT (DopeVectorType * result,
00060         DopeVectorType * source,
00061         DopeVectorType * shift,
00062         _f_int  *dimp)
00063 {
00064         char    *cs;            /* char ptr to source array     */
00065         char    *cr;            /* char ptr to result array     */
00066         char            * restrict cptr1;       /* char                 */
00067         char            * restrict cptr2;       /* char                 */
00068         _f_int8         * restrict uptr1;       /* 64-bit               */
00069         _f_int8         * restrict uptr2;       /* 64-bit               */
00070         _f_int          * restrict wptr1;       /* generic integer      */
00071         _f_int          * restrict wptr2;       /* generic integer      */
00072         _f_real16       * restrict dptr1;       /* 128-bit              */
00073         _f_real16       * restrict dptr2;       /* 128-bit              */
00074 #ifdef _F_COMP16
00075         dblcmplx        * restrict xptr1;       /* 256-bit              */
00076         dblcmplx        * restrict xptr2;       /* 256-bit              */
00077 #endif
00078         _f_int4         * restrict hptr1;       /* 32-bit               */
00079         _f_int4         * restrict hptr2;       /* 32-bit               */
00080         void            * restrict sptr;        /* ptr to src data area */
00081         void            * restrict rptr;        /* ptr to res data area */
00082         long            * restrict shptr;       /* ptr to shift area    */
00083         _f_int8         * restrict save_uptr1;  /* save copy of uptr1   */
00084         _f_int8         * restrict save_uptr2a; /* save copy of uptr1   */
00085         _f_int8         * restrict save_uptr2b; /* save copy of uptr1   */
00086         _f_int          * restrict save_wptr1;  /* save copy of wptr1   */
00087         _f_int          * restrict save_wptr2a; /* save copy of wptr1   */
00088         _f_int          * restrict save_wptr2b; /* save copy of wptr1   */
00089         _f_int4         * restrict iptr4;    /* ptr to shift data area  */
00090         _f_int8         * restrict iptr8;    /* ptr to shift data area  */
00091         _f_int          bucketsize;     /* size of each data element    */
00092         _f_int          shft_size;      /* size of stride elements      */
00093         long    nbytes;         /* # of bytes in data area      */
00094         _f_int  num;            /* index value                  */
00095         _f_int  bytealligned;   /* byte alligned flag           */
00096         long    sindx;          /* source index                 */
00097         long    sindx2;         /* source index                 */
00098         long    shindx;         /* shift index                  */
00099         long    rindx;          /* result array index           */
00100         long    rindx2;         /* result array index           */
00101         _f_int  shft;           /* shift value                  */
00102         _f_int  dim;            /* dim value                    */
00103         _f_int  non_dim;        /* non-shift dim for 2x2        */
00104         long    curdim[MAXDIM-1];   /* current indices          */
00105         long    src_strd[MAXDIM-1]; /* index stride             */
00106         long    src_ext[MAXDIM-1];  /* extents for source array */
00107         long    src_off[MAXDIM-1];  /* source offset            */
00108         long    res_strd[MAXDIM-1]; /* index stride             */
00109         long    res_off[MAXDIM-1];  /* result offset            */
00110         long    shft_strd[MAXDIM-1];/* stride for shift array   */
00111         long    shft_off[MAXDIM-1]; /* shift offset             */
00112         _f_int  rank;           /* rank of source matrix        */
00113         _f_int  type;           /* type                         */
00114         _f_int  subtype;        /* sub-type                     */
00115         _f_int  arithmetic;     /* arithmetic data type         */
00116         long    extent;         /* extent temporary             */
00117         long    extlow;         /* lower bound of extent        */
00118         long    exthi;          /* upper bound of extent        */
00119         long    src_dim_strd;   /* stride for source index      */
00120         long    res_dim_strd;   /* stride for result index      */
00121         long    shft_dim;       /* shift dimension              */
00122         long    shft_dim_strd;  /* shift stride                 */
00123         long    tot_ext;        /* total extent                 */
00124         long    i, j, k, l;     /* index variables              */
00125
00126 /*    Set type and dimension global variables   */
00127
00128         rank = source->n_dim;
00129         type = source->type_lens.type;
00130
00131 /*    Size calculation is based on variable type.       */
00132
00133         switch (type) {
00134             case DVTYPE_ASCII :
00135                 bytealligned = 1;
00136                 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */
00137                 subtype = DVSUBTYPE_CHAR;
00138                 arithmetic = 0;
00139                 break;
00140             case DVTYPE_DERIVEDBYTE :
00141                 bytealligned = 1;
00142                 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00143                 subtype = DVSUBTYPE_CHAR;
00144                 arithmetic = 0;
00145                 break;
00146             case DVTYPE_DERIVEDWORD :
00147                 bytealligned = 0;
00148                 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00149                 subtype = DVSUBTYPE_DERIVED;
00150                 arithmetic = 0;
00151                 break;
00152             default :
00153                 bytealligned = 0;
00154                 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00155                 if (source->type_lens.int_len == 64) {
00156                     subtype = DVSUBTYPE_BIT64;
00157                 } else if (source->type_lens.int_len == 32) {
00158                     subtype = DVSUBTYPE_BIT32;
00159                     bucketsize = 1;
00160                 } else if (source->type_lens.int_len == 256) {
00161                     subtype = DVSUBTYPE_BIT256;
00162                 } else {
00163                     subtype = DVSUBTYPE_BIT128;
00164                 }
00165                 arithmetic = 1;
00166         }
00167         shft_size = shift->base_addr.a.el_len / BITS_PER_WORD;
00168 #ifdef  _CRAYMPP
00169         if (shft_size == 0)
00170             shft_size = 1;
00171 #endif
00172
00173 /*    If necessary, fill result dope vector     */
00174
00175         if (!result->assoc) {
00176             result->base_addr.a.ptr  = (void *) NULL;
00177             result->orig_base      = 0;
00178             result->orig_size      = 0;
00179 #ifdef _UNICOS
00180 #pragma _CRI shortloop
00181 #endif
00182             for (i = 0, tot_ext = bucketsize; i < rank; i++) {
00183                 result->dimension[i].extent = source->dimension[i].extent;
00184                 result->dimension[i].low_bound = 1;
00185                 result->dimension[i].stride_mult = tot_ext;
00186                 tot_ext *= result->dimension[i].extent;
00187             }
00188
00189 /*    Determine size of space to allocate    */
00190
00191             if (!bytealligned) {
00192                 nbytes = bucketsize * BYTES_PER_WORD;
00193 #ifdef _CRAYMPP
00194                 if (subtype == DVSUBTYPE_BIT32)
00195                     nbytes /= 2;
00196 #endif
00197             } else {
00198                 nbytes = bucketsize;
00199             }
00200 #ifdef _UNICOS
00201 #pragma _CRI shortloop
00202 #endif
00203             for (i = 0; i < rank; i++)
00204                 nbytes *= result->dimension[i].extent;
00205             if (nbytes > 0) {
00206                 result->base_addr.a.ptr = (void *) malloc(nbytes);
00207                 if (result->base_addr.a.ptr == NULL)
00208                     _lerror(_LELVL_ABORT, FENOMEMY);
00209             }
00210
00211             result->assoc = 1;
00212             result->base_addr.a.el_len = source->base_addr.a.el_len;
00213             if (type == DVTYPE_ASCII) {
00214                 cr = (char *) result->base_addr.a.ptr;
00215                 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00216             }
00217             result->orig_base = result->base_addr.a.ptr;
00218             result->orig_size = nbytes * BITS_PER_BYTE;
00219         }
00220
00221 /*
00222  *      Check to see if any of the matrices have size 0.  If any do,
00223  *      return without doing anything.
00224  */
00225
00226 #ifdef _UNICOS
00227 #pragma _CRI    shortloop
00228 #endif
00229         for (i = 0; i < rank; i++) {
00230             if (!source->dimension[i].extent)
00231                 return;
00232         }
00233         if (result->assoc) {
00234 #ifdef _UNICOS
00235 #pragma _CRI    shortloop
00236 #endif
00237             for (i = 0; i < rank; i++)
00238                 if (!result->dimension[i].extent)
00239                     return;
00240         }
00241         if (shift->n_dim > 1) {
00242 #ifdef _UNICOS
00243 #pragma _CRI    shortloop
00244 #endif
00245             for (i = 0; i < rank-1; i++)
00246                 if (!shift->dimension[i].extent)
00247                     return;
00248         }
00249
00250 /*    Set up scalar pointers to all of the argument data areas    */
00251
00252         if (!bytealligned) {
00253             sptr = (void *) source->base_addr.a.ptr;
00254             rptr = (void *) result->base_addr.a.ptr;
00255         } else {
00256             if (type == DVTYPE_ASCII) {
00257                 cs = _fcdtocp (source->base_addr.charptr);
00258                 cr = _fcdtocp (result->base_addr.charptr);
00259             } else {
00260                 cs = (char *) source->base_addr.a.ptr;
00261                 cr = (char *) result->base_addr.a.ptr;
00262             }
00263         }
00264         shptr = (void *) shift->base_addr.a.ptr;
00265
00266 /*    If dim argument is not present, set it to 1 (0 for C)     */
00267
00268         if (dimp == NULL)
00269             dim = 0;
00270         else {
00271             if (*dimp < 1 || *dimp > rank)
00272                 _lerror (_LELVL_ABORT, FESCIDIM);
00273             dim = *dimp - 1;
00274         }
00275
00276 /*    If source is a 1-dimensional array    */
00277
00278         if (rank == 1) {
00279
00280 /*
00281  *    Get shift value which will be used for each loop iteration.
00282  *    Once the value is obtained, make sure that it is positive.  This
00283  *    will ensure that the final calculated value can only be positive,
00284  *    thus eliminating a test from the inner loop.
00285  */
00286
00287             if (shift->base_addr.a.el_len == 64) {
00288                 iptr8 = (_f_int8 *) shptr;
00289                 shft = *iptr8 % source->dimension[0].extent;
00290             } else {
00291                 iptr4 = (_f_int4 *) shptr;
00292                 shft = *iptr4 % source->dimension[0].extent;
00293             }
00294             if (shft < 0)
00295                 shft += source->dimension[0].extent;
00296
00297 /*
00298  *    The index calculation for each element of the source and result
00299  *    matrices make use of some 'shortcuts'.  These involved the use
00300  *    of some variables which contain information about indices.  One
00301  *    of these variables exists for each dimension of the source and
00302  *    result matrices.
00303  *
00304  *    strd -    stride value based in terms of elements rather than
00305  *              words.
00306  */
00307
00308             if (bucketsize > 1 && arithmetic) {
00309                 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00310                 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00311             } else {
00312                 src_strd[0] = source->dimension[0].stride_mult;
00313                 res_strd[0] = result->dimension[0].stride_mult;
00314             }
00315
00316 /*
00317  *    In order to assist vectorization, the following value has been
00318  *    taken out of the array variable, and put into a scalar.
00319  */
00320
00321             extent = source->dimension[0].extent;
00322
00323 /*
00324  *    Each data type will be handled with its own inner loop.
00325  */
00326
00327             switch (subtype) {
00328                 case DVSUBTYPE_CHAR :
00329                     cptr1 = (char *) cs;
00330                     cptr2 = (char *) cr + (extent - shft) * bucketsize;
00331                     src_dim_strd = source->dimension[0].stride_mult;
00332                     res_dim_strd = result->dimension[0].stride_mult;
00333                     for (i = 0; i < shft; i++) {
00334                         (void) memcpy (cptr2, cptr1, bucketsize);
00335                         cptr1 += src_dim_strd;
00336                         cptr2 += res_dim_strd;
00337                     }
00338                     cptr2 = (char *) cr;
00339                     for ( ; i < extent; i++) {
00340                         (void) memcpy (cptr2, cptr1, bucketsize);
00341                         cptr1 += src_dim_strd;
00342                         cptr2 += res_dim_strd;
00343                     }
00344                     break;
00345
00346                 case DVSUBTYPE_DERIVED :
00347                     src_dim_strd = source->dimension[0].stride_mult;
00348                     res_dim_strd = result->dimension[0].stride_mult;
00349                     for (i = 0; i < bucketsize; i++) {
00350                         wptr1 = (_f_int *) sptr;
00351                         wptr2 = (_f_int *) rptr + (extent-shft) *
00352                                         bucketsize;
00353                         for (j = 0; j < shft; j++) {
00354                             sindx = i + (j * src_dim_strd);
00355                             rindx = i + (j * res_dim_strd);
00356                             wptr2[rindx] = wptr1[sindx];
00357                         }
00358                         wptr2 = (_f_int *) rptr;
00359                         for (k = 0 ; j < extent; j++, k++) {
00360                             sindx = i + (j * src_dim_strd);
00361                             rindx = i + (k * res_dim_strd);
00362                             wptr2[rindx] = wptr1[sindx];
00363                         }
00364                     }
00365                     break;
00366
00367                 case DVSUBTYPE_BIT64 :
00368                     uptr1 = (_f_int8 *) sptr;
00369                     uptr2 = (_f_int8 *) rptr + ((extent - shft) *
00370                                 res_strd[0]);
00371                     src_dim_strd = src_strd[0];
00372                     res_dim_strd = res_strd[0];
00373 #ifndef CRAY2
00374                     for (i = 0; i < shft; i++) {
00375                         sindx = i * src_dim_strd;
00376                         rindx = i * res_dim_strd;
00377                         uptr2[rindx] = uptr1[sindx];
00378                     }
00379                     uptr2 = (_f_int8 *) rptr;
00380                     for (j = 0 ; i < extent; i++, j++) {
00381                         sindx = i * src_dim_strd;
00382                         rindx = j * res_dim_strd;
00383                         uptr2[rindx] = uptr1[sindx];
00384                     }
00385 #else
00386                     memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft);
00387                     uptr2 = (_f_int8 *) rptr;
00388                     uptr1 = (_f_int8 *) sptr + (shft * src_dim_strd);
00389                     shft = extent - shft;
00390                     memstride (uptr2, res_dim_strd, uptr1, src_dim_strd, shft);
00391 #endif
00392                     break;
00393
00394                 case DVSUBTYPE_BIT32 :
00395                     hptr1 = (_f_int4 *) sptr;
00396                     hptr2 = (_f_int4 *) rptr + ((extent - shft) * res_strd[0]);
00397                     src_dim_strd = src_strd[0];
00398                     res_dim_strd = res_strd[0];
00399                     for (i = 0; i < shft; i++) {
00400                         sindx = i * src_dim_strd;
00401                         rindx = i * res_dim_strd;
00402                         hptr2[rindx] = hptr1[sindx];
00403                     }
00404                     hptr2 = (_f_int4 *) rptr;
00405                     for (j = 0 ; i < extent; i++, j++) {
00406                         sindx = i * src_dim_strd;
00407                         rindx = j * res_dim_strd;
00408                         hptr2[rindx] = hptr1[sindx];
00409                     }
00410                     break;
00411
00412                 case DVSUBTYPE_BIT128 :
00413                     dptr1 = (_f_real16 *) sptr;
00414                     dptr2 = (_f_real16 *) rptr +
00415                                 ((extent - shft) * res_strd[0]);
00416                     src_dim_strd = src_strd[0];
00417                     res_dim_strd = res_strd[0];
00418                     for (i = 0; i < shft; i++) {
00419                         sindx = i * src_dim_strd;
00420                         rindx = i * res_dim_strd;
00421                         dptr2[rindx] = dptr1[sindx];
00422                     }
00423                     dptr2 = (_f_real16 *) rptr;
00424                     for (j = 0 ; i < extent; i++, j++) {
00425                         sindx = i * src_dim_strd;
00426                         rindx = j * res_dim_strd;
00427                         dptr2[rindx] = dptr1[sindx];
00428                     }
00429                     break;
00430
00431 #ifdef _F_COMP16
00432                 case DVSUBTYPE_BIT256 :
00433                     xptr1 = (dblcmplx *) sptr;
00434                     xptr2 = (dblcmplx *) rptr + ((extent - shft) * res_strd[0]);
00435                     src_dim_strd = src_strd[0];
00436                     res_dim_strd = res_strd[0];
00437                     for (i = 0; i < shft; i++) {
00438                         sindx = i * src_dim_strd;
00439                         rindx = i * res_dim_strd;
00440                         xptr2[rindx].re = xptr1[sindx].re;
00441                         xptr2[rindx].im = xptr1[sindx].im;
00442                     }
00443                     xptr2 = (dblcmplx *) rptr;
00444                     for (j = 0 ; i < extent; i++, j++) {
00445                         sindx = i * src_dim_strd;
00446                         rindx = j * res_dim_strd;
00447                         xptr2[rindx].re = xptr1[sindx].re;
00448                         xptr2[rindx].im = xptr1[sindx].im;
00449                     }
00450                     break;
00451 #endif
00452
00453                 default :
00454                     _lerror (_LELVL_ABORT, FEINTDTY);
00455             }
00456
00457 /*    Arrays with rank of 2     */
00458
00459         } else if (rank == 2) {
00460
00461 /*    Set dimension and non_dimension indices   */
00462
00463             if (dim == 0)
00464                 non_dim = 1;
00465             else
00466                 non_dim = 0;
00467
00468 /*    Set up shortcut variables for both array dimensions    */
00469
00470             if (bucketsize > 1 && arithmetic) {
00471                 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00472                 src_strd[1] = source->dimension[1].stride_mult / bucketsize;
00473                 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00474                 res_strd[1] = result->dimension[1].stride_mult / bucketsize;
00475             } else {
00476                 src_strd[0] = source->dimension[0].stride_mult;
00477                 src_strd[1] = source->dimension[1].stride_mult;
00478                 res_strd[0] = result->dimension[0].stride_mult;
00479                 res_strd[1] = result->dimension[1].stride_mult;
00480             }
00481             shft_strd[0] = shift->dimension[0].stride_mult / shft_size;
00482
00483 /*    Put information about DIM index in scalars for vectorization */
00484
00485             src_dim_strd = src_strd[dim];
00486             res_dim_strd = res_strd[dim];
00487
00488 /*    Set up extent temporary variables    */
00489
00490             extent = source->dimension[dim].extent;
00491             extlow = -extent;
00492             exthi  = 2 * extent;
00493
00494 /*      Set up shift variables          */
00495
00496             if (shift->base_addr.a.el_len == 64)
00497                 iptr8 = (_f_int8 *) shptr;
00498             else
00499                 iptr4 = (_f_int4 *) shptr;
00500             if (shift->n_dim == 1) {
00501                 shft_dim = 1;
00502                 shft_dim_strd = shift->dimension[0].stride_mult / shft_size;
00503                 shindx = 0;
00504             } else {                    /* scalar value, only get it once */
00505                 shft_dim = 0;
00506                 if (shift->base_addr.a.el_len == 64)
00507                     shft = iptr8[0];
00508                 else
00509                     shft = iptr4[0];
00510                 if (shft >= extent) {
00511                     if (shft < exthi)
00512                         shft -= extent;
00513                     else
00514                         shft %= extent;
00515                 } else if (shft < 0) {
00516                     if (shft >= extlow)
00517                         shft += extent;
00518                     else {
00519                         shft %= extent;
00520                         if (shft < 0)
00521                             shft += extent;
00522                     }
00523                 }
00524             }
00525
00526 /*    Outer loop is for dimension not being shifted    */
00527
00528             for (i = 0; i < source->dimension[non_dim].extent; i++) {
00529
00530 /*      Get shift value if it is not a scalar   */
00531
00532                 if (shft_dim == 1) {
00533                     if (shift->base_addr.a.el_len == 64)
00534                         shft = iptr8[shindx];
00535                     else
00536                         shft = iptr4[shindx];
00537                     if (shft >= extent) {
00538                         if (shft < exthi)
00539                             shft -= extent;
00540                         else
00541                             shft %= extent;
00542                     } else if (shft < 0) {
00543                         if (shft >= extlow)
00544                             shft += extent;
00545                         else {
00546                             shft %= extent;
00547                             if (shft < 0)
00548                                 shft += extent;
00549                         }
00550                     }
00551                     shindx += shft_dim_strd;
00552                 }
00553
00554                 switch (subtype) {
00555                     case DVSUBTYPE_CHAR :
00556                         cptr1 = (char *) cs + (i * src_strd[non_dim]);
00557                         cptr2 = (char *) cr + (i * res_strd[non_dim]) +
00558                                 ((extent - shft) * res_dim_strd);
00559                         for (j = 0; j < shft; j++) {
00560                             (void) memcpy (cptr2, cptr1, bucketsize);
00561                             cptr1 += src_dim_strd;
00562                             cptr2 += src_dim_strd;
00563                         }
00564                         cptr2 = (char *) cr + (i * res_strd[non_dim]);
00565                         for ( ; j < extent; j++) {
00566                             (void) memcpy (cptr2, cptr1, bucketsize);
00567                             cptr1 += src_dim_strd;
00568                             cptr2 += src_dim_strd;
00569                         }
00570                         break;
00571
00572                     case DVSUBTYPE_DERIVED :
00573                         wptr1 = (_f_int *) sptr +
00574                                 (i * src_strd[non_dim]);
00575                         save_wptr1 = wptr1;
00576                         wptr2 = (_f_int *) rptr +
00577                                 (i * res_strd[non_dim]) +
00578                                 ((extent - shft) * res_dim_strd);
00579                         save_wptr2a = wptr2;
00580                         wptr2 = (_f_int *) rptr +
00581                                 (i * res_strd[non_dim]);
00582                         save_wptr2b = wptr2;
00583                         for (j = 0; j < bucketsize; j++) {
00584                             wptr1 = save_wptr1;
00585                             wptr2 = save_wptr2a;
00586                             for (k = 0; k < shft; k++) {
00587                                 sindx = j + (k * src_dim_strd);
00588                                 rindx = j + (k * res_dim_strd);
00589                                 wptr2[rindx] = wptr1[sindx];
00590                             }
00591                             wptr2 = save_wptr2b;
00592                             for (l = 0; k < extent; k++, l++) {
00593                                 sindx = j + (k * src_dim_strd);
00594                                 rindx = j + (l * res_dim_strd);
00595                                 wptr2[rindx] = wptr1[sindx];
00596                             }
00597                         }
00598                         break;
00599
00600                     case DVSUBTYPE_BIT64 :
00601                         uptr1 = (_f_int8 *) sptr +
00602                                 (i * src_strd[non_dim]);
00603                         save_uptr1 = uptr1;
00604                         uptr2 = (_f_int8 *) rptr +
00605                                 (i * res_strd[non_dim]) +
00606                                  ((extent - shft) * res_dim_strd);
00607 #ifndef CRAY2
00608                         for (j = 0; j < shft; j++) {
00609                             sindx = j * src_dim_strd;
00610                             rindx = j * res_dim_strd;
00611                             uptr2[rindx] = uptr1[sindx];
00612                         }
00613                         uptr2 = (_f_int8 *) rptr +
00614                                 (i * res_strd[non_dim]);
00615                         for (k = 0 ; j < extent; j++, k++) {
00616                             sindx = j * src_dim_strd;
00617                             rindx = k * res_dim_strd;
00618                             uptr2[rindx] = uptr1[sindx];
00619                         }
00620 #else
00621                         memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00622                                 shft);
00623                         uptr2 = (_f_int8 *) rptr +
00624                                 (i * res_strd[non_dim]);
00625                         uptr1 = save_uptr1 + (shft * src_dim_strd);
00626                         memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00627                                 extent-shft);
00628 #endif
00629                         break;
00630
00631                     case DVSUBTYPE_BIT32 :
00632                         hptr1 = (_f_int4 *) sptr + (i * src_strd[non_dim]);
00633                         hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]) +
00634                                 ((extent - shft) * res_dim_strd);
00635                         for (j = 0; j < shft; j++) {
00636                             sindx = j * src_dim_strd;
00637                             rindx = j * res_dim_strd;
00638                             hptr2[rindx] = hptr1[sindx];
00639                         }
00640                         hptr2 = (_f_int4 *) rptr + (i * res_strd[non_dim]);
00641                         for (k = 0 ; j < extent; j++, k++) {
00642                             sindx = j * src_dim_strd;
00643                             rindx = k * res_dim_strd;
00644                             hptr2[rindx] = hptr1[sindx];
00645                         }
00646                         break;
00647
00648                     case DVSUBTYPE_BIT128 :
00649                         dptr1 = (_f_real16 *) sptr + (i * src_strd[non_dim]);
00650                         dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]) +
00651                                 ((extent - shft) * res_dim_strd);
00652                         for (j = 0; j < shft; j++) {
00653                             sindx = j * src_dim_strd;
00654                             rindx = j * res_dim_strd;
00655                             dptr2[rindx] = dptr1[sindx];
00656                         }
00657                         dptr2 = (_f_real16 *) rptr + (i * res_strd[non_dim]);
00658                         for (k = 0 ; j < extent; j++, k++) {
00659                             sindx = j * src_dim_strd;
00660                             rindx = k * res_dim_strd;
00661                             dptr2[rindx] = dptr1[sindx];
00662                         }
00663                         break;
00664
00665 #ifdef _F_COMP16
00666                     case DVSUBTYPE_BIT256 :
00667                         xptr1 = (dblcmplx *) sptr + (i * src_strd[non_dim]);
00668                         xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]) +
00669                                 ((extent - shft) * res_dim_strd);
00670                         for (j = 0; j < shft; j++) {
00671                             sindx = j * src_dim_strd;
00672                             rindx = j * res_dim_strd;
00673                             xptr2[rindx].re = xptr1[sindx].re;
00674                             xptr2[rindx].im = xptr1[sindx].im;
00675                         }
00676                         xptr2 = (dblcmplx *) rptr + (i * res_strd[non_dim]);
00677                         for (k = 0 ; j < extent; j++, k++) {
00678                             sindx = j * src_dim_strd;
00679                             rindx = k * res_dim_strd;
00680                             xptr2[rindx].re = xptr1[sindx].re;
00681                             xptr2[rindx].im = xptr1[sindx].im;
00682                         }
00683                         break;
00684 #endif
00685
00686                     default :
00687                         _lerror (_LELVL_ABORT, FEINTDTY);
00688                     }
00689                 }
00690
00691 /*    Arrays with rank of 3-7   */
00692
00693         } else {
00694
00695             if (dim == 0) {
00696                 i = 0;
00697                 tot_ext = 1;
00698             } else
00699 #ifdef _UNICOS
00700 #pragma _CRI    shortloop
00701 #endif
00702             for (i = 0, tot_ext = 1; i < dim; i++) {
00703                 tot_ext *= source->dimension[i].extent;
00704                 src_ext[i] = source->dimension[i].extent;
00705                 if (bucketsize > 1 && arithmetic) {
00706                     src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00707                     res_strd[i] = result->dimension[i].stride_mult / bucketsize;
00708                 } else {
00709                     src_strd[i] = source->dimension[i].stride_mult;
00710                     res_strd[i] = result->dimension[i].stride_mult;
00711                 }
00712             }
00713             if (i < (rank - 1))
00714 #ifdef _UNICOS
00715 #pragma _CRI    shortloop
00716 #endif
00717               for ( ; i < rank-1; i++) {
00718                 tot_ext *= source->dimension[i+1].extent;
00719                 src_ext[i] = source->dimension[i+1].extent;
00720                 if (bucketsize > 1 && arithmetic) {
00721                     src_strd[i] = source->dimension[i+1].stride_mult/bucketsize;
00722                     res_strd[i] = result->dimension[i+1].stride_mult/bucketsize;
00723                 } else {
00724                     src_strd[i] = source->dimension[i+1].stride_mult;
00725                     res_strd[i] = result->dimension[i+1].stride_mult;
00726                 }
00727               }
00728
00729 /*    Set up some scalars which contain information about DIM    */
00730
00731             extent = source->dimension[dim].extent;
00732             extlow = -extent;
00733             exthi = 2 * extent;
00734
00735             if (bucketsize > 1 && arithmetic) {
00736                 src_dim_strd = source->dimension[dim].stride_mult / bucketsize;
00737                 res_dim_strd = result->dimension[dim].stride_mult / bucketsize;
00738             } else {
00739                 src_dim_strd = source->dimension[dim].stride_mult;
00740                 res_dim_strd = result->dimension[dim].stride_mult;
00741             }
00742
00743 /*      Set up the shift variables              */
00744
00745             if (shift->base_addr.a.el_len == 64)
00746                 iptr8 = (_f_int8 *) shptr;
00747             else
00748                 iptr4 = (_f_int4 *) shptr;
00749             if (shift->n_dim == 0) {
00750                 shft_dim = 0;
00751                 if (shift->base_addr.a.el_len == 64)
00752                     shft = iptr8[0];
00753                 else
00754                     shft = iptr4[0];
00755                 if (shft >= extent) {
00756                     if (shft < exthi)
00757                         shft -= extent;
00758                     else
00759                         shft %= extent;
00760                 } else if (shft < 0) {
00761                     if (shft >= extlow)
00762                         shft += extent;
00763                     else {
00764                         shft %= extent;
00765                         if (shft < 0)
00766                             shft += extent;
00767                     }
00768                 }
00769             } else {
00770                 shft_dim = 1;
00771                 shindx = 0;
00772 #ifdef _UNICOS
00773 #pragma _CRI    shortloop
00774 #endif
00775                 for (i = 0; i < rank-1; i++) {
00776                     shft_strd[i] = shift->dimension[i].stride_mult;
00777                 }
00778             }
00779
00780 /*      Initialize the curdim and offset arrays to 0            */
00781
00782             for (i = 0; i < rank-1; i++) {
00783                 curdim[i] = 0;
00784                 src_off[i] = 0;
00785                 res_off[i] = 0;
00786                 shft_off[i] = 0;
00787             }
00788
00789 /*
00790  *    The outer loop will be executed once for each combination of
00791  *    indices not including the DIM dimension.
00792  */
00793
00794             for (i = 0; i < tot_ext; i++) {
00795
00796 /*    Calculate the shift value used throughout the inner loop    */
00797
00798                 if (shft_dim) {
00799                     switch (rank) {
00800                         case 3 :
00801                             shindx = shft_off[0] + shft_off[1];
00802                             break;
00803                         case 4 :
00804                             shindx = shft_off[0] + shft_off[1] + shft_off[2];
00805                             break;
00806                         case 5 :
00807                             shindx = shft_off[0] + shft_off[1] +
00808                                      shft_off[2] + shft_off[3];
00809                             break;
00810                         case 6 :
00811                             shindx = shft_off[0] + shft_off[1] + shft_off[2] +
00812                                      shft_off[3] + shft_off[4];
00813                             break;
00814                         default :
00815                             shindx = shft_off[0] + shft_off[1] + shft_off[2] +
00816                                      shft_off[3] + shft_off[4] + shft_off[5];
00817                     }
00818                     shft = shptr[shindx];
00819                     if (shft >= extent) {
00820                         if (shft < exthi)
00821                             shft -= extent;
00822                         else
00823                             shft %= extent;
00824                     } else if (shft < 0) {
00825                         if (shft >= extlow)
00826                             shft += extent;
00827                         else {
00828                             shft %= extent;
00829                             if (shft < 0)
00830                                 shft += extent;
00831                         }
00832                     }
00833                 }
00834
00835                 switch (rank) {
00836                     case 3 :
00837                         sindx = src_off[0] + src_off[1];
00838                         rindx = res_off[0] + res_off[1];
00839                         break;
00840                     case 4 :
00841                         sindx = src_off[0] + src_off[1] + src_off[2];
00842                         rindx = res_off[0] + res_off[1] + res_off[2];
00843                         break;
00844                     case 5 :
00845                         sindx = src_off[0] + src_off[1] +
00846                                 src_off[2] + src_off[3];
00847                         rindx = res_off[0] + res_off[1] +
00848                                 res_off[2] + res_off[3];
00849                         break;
00850                     case 6 :
00851                         sindx = src_off[0] + src_off[1] + src_off[2] +
00852                                 src_off[3] + src_off[4];
00853                         rindx = res_off[0] + res_off[1] + res_off[2] +
00854                                 res_off[3] + res_off[4];
00855                         break;
00856                     default :
00857                         sindx = src_off[0] + src_off[1] + src_off[2] +
00858                                 src_off[3] + src_off[4] + src_off[5];
00859                         rindx = res_off[0] + res_off[1] + res_off[2] +
00860                                 res_off[3] + res_off[4] + res_off[5];
00861                 }
00862
00863                 switch (subtype) {
00864                     case DVSUBTYPE_CHAR :
00865                         cptr1 = (char *) cs + sindx;
00866                         cptr2 = (char *) cr + rindx +
00867                                 ((extent - shft) * res_dim_strd);
00868                         for (j = 0; j < shft; j++) {
00869                             (void) memcpy (cptr2, cptr1, bucketsize);
00870                             cptr1 += src_dim_strd;
00871                             cptr2 += res_dim_strd;
00872                         }
00873                         cptr2 = (char *) cr + rindx;
00874                         for ( ; j < extent; j++) {
00875                             (void) memcpy (cptr2, cptr1, bucketsize);
00876                             cptr1 += src_dim_strd;
00877                             cptr2 += res_dim_strd;
00878                         }
00879                         break;
00880
00881                     case DVSUBTYPE_DERIVED :
00882                         wptr1 = (_f_int *) sptr + sindx;
00883                         save_wptr1 = wptr1;
00884                         wptr2 = (_f_int *) rptr + rindx +
00885                                 ((extent - shft) * res_dim_strd);
00886                         save_wptr2a = wptr2;
00887                         wptr2 = (_f_int *) rptr + rindx;
00888                         save_wptr2b = wptr2;
00889                         for (j = 0; j < bucketsize; j++) {
00890                             wptr1 = save_wptr1;
00891                             wptr2 = save_wptr2a;
00892                             for (k = 0; k < shft; k++) {
00893                                 sindx2 = j + (k * src_dim_strd);
00894                                 rindx2 = j + (k * res_dim_strd);
00895                                 wptr2[rindx2] = wptr1[sindx2];
00896                             }
00897                             wptr2 = save_wptr2b;
00898                             for (l = 0; k < extent; k++, l++) {
00899                                 sindx2 = j + (k * src_dim_strd);
00900                                 rindx2 = j + (l * res_dim_strd);
00901                                 wptr2[rindx2] = wptr1[sindx2];
00902                             }
00903                         }
00904                         break;
00905
00906                     case DVSUBTYPE_BIT64 :
00907                         uptr1 = (_f_int8 *) sptr + sindx;
00908                         save_uptr1 = uptr1;
00909                         uptr2 = (_f_int8 *) rptr + rindx +
00910                                 ((extent - shft) * res_dim_strd);
00911 #ifndef CRAY2
00912                         for (j = 0; j < shft; j++) {
00913                             sindx2 = j * src_dim_strd;
00914                             rindx2 = j * res_dim_strd;
00915                             uptr2[rindx2] = uptr1[sindx2];
00916                         }
00917                         uptr2 = (_f_int8 *) rptr + rindx;
00918                         for (k = 0 ; j < extent; j++, k++) {
00919                             sindx2 = j * src_dim_strd;
00920                             rindx2 = k * res_dim_strd;
00921                             uptr2[rindx2] = uptr1[sindx2];
00922                         }
00923 #else
00924                         memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00925                                 shft);
00926                         uptr2 = (_f_int8 *) rptr + rindx;
00927                         uptr1 = save_uptr1 + (shft * src_dim_strd);
00928                         memstride (uptr2, res_dim_strd, uptr1, src_dim_strd,
00929                                 extent-shft);
00930 #endif
00931                         break;
00932
00933                     case DVSUBTYPE_BIT32 :
00934                         hptr1 = (_f_int4 *) sptr + sindx;
00935                         hptr2 = (_f_int4 *) rptr + rindx +
00936                                 ((extent - shft) * res_dim_strd);
00937                         for (j = 0; j < shft; j++) {
00938                             sindx2 = j * src_dim_strd;
00939                             rindx2 = j * res_dim_strd;
00940                             hptr2[rindx2] = hptr1[sindx2];
00941                         }
00942                         hptr2 = (_f_int4 *) rptr + rindx;
00943                         for (k = 0; j < extent; j++, k++) {
00944                             sindx2 = j * src_dim_strd;
00945                             rindx2 = k * res_dim_strd;
00946                             hptr2[rindx2] = hptr1[sindx2];
00947                         }
00948                         break;
00949
00950                     case DVSUBTYPE_BIT128 :
00951                         dptr1 = (_f_real16 *) sptr + sindx;
00952                         dptr2 = (_f_real16 *) rptr + rindx +
00953                                 ((extent - shft) * res_dim_strd);
00954                         for (j = 0; j < shft; j++) {
00955                             sindx2 = j * src_dim_strd;
00956                             rindx2 = j * res_dim_strd;
00957                             dptr2[rindx2] = dptr1[sindx2];
00958                         }
00959                         dptr2 = (_f_real16 *) rptr + rindx;
00960                         for (k = 0; j < extent; j++, k++) {
00961                             sindx2 = j * src_dim_strd;
00962                             rindx2 = k * res_dim_strd;
00963                             dptr2[rindx2] = dptr1[sindx2];
00964                         }
00965                         break;
00966
00967 #ifdef _F_COMP16
00968                     case DVSUBTYPE_BIT256 :
00969                         xptr1 = (dblcmplx *) sptr + sindx;
00970                         xptr2 = (dblcmplx *) rptr + rindx +
00971                                 ((extent - shft) * res_dim_strd);
00972                         for (j = 0; j < shft; j++) {
00973                             sindx2 = j * src_dim_strd;
00974                             rindx2 = j * res_dim_strd;
00975                             xptr2[rindx2].re = xptr1[sindx2].re;
00976                             xptr2[rindx2].im = xptr1[sindx2].im;
00977                         }
00978                         xptr2 = (dblcmplx *) rptr + rindx;
00979                         for (k = 0; j < extent; j++, k++) {
00980                             sindx2 = j * src_dim_strd;
00981                             rindx2 = k * res_dim_strd;
00982                             xptr2[rindx2].re = xptr1[sindx2].re;
00983                             xptr2[rindx2].im = xptr1[sindx2].im;
00984                         }
00985                         break;
00986 #endif
00987
00988                     default :
00989                         _lerror (_LELVL_ABORT, FEINTDTY);
00990                 }
00991
00992 /*    Increment the current dimension counter.    */
00993
00994                 curdim[0]++;
00995                 if (curdim[0] < src_ext[0]) {
00996                     src_off[0] += src_strd[0];
00997                     res_off[0] += res_strd[0];
00998                     shft_off[0] += shft_strd[0];
00999                 } else {
01000                     curdim[0] = 0;
01001                     src_off[0] = 0;
01002                     res_off[0] = 0;
01003                     shft_off[0] = 0;
01004                     curdim[1]++;
01005                     if (curdim[1] < src_ext[1]) {
01006                         src_off[1] += src_strd[1];
01007                         res_off[1] += res_strd[1];
01008                         shft_off[1] += shft_strd[1];
01009                     } else {
01010                         curdim[1] = 0;
01011                         src_off[1] = 0;
01012                         res_off[1] = 0;
01013                         shft_off[1] = 0;
01014                         curdim[2]++;
01015                         if (curdim[2] < src_ext[2]) {
01016                             src_off[2] += src_strd[2];
01017                             res_off[2] += res_strd[2];
01018                             shft_off[2] += shft_strd[2];
01019                         } else {
01020                             curdim[2] = 0;
01021                             src_off[2] = 0;
01022                             res_off[2] = 0;
01023                             shft_off[2] = 0;
01024                             curdim[3]++;
01025                             if (curdim[3] < src_ext[3]) {
01026                                 src_off[3] += src_strd[3];
01027                                 res_off[3] += res_strd[3];
01028                                 shft_off[3] += shft_strd[3];
01029                             } else {
01030                                 curdim[3] = 0;
01031                                 src_off[3] = 0;
01032                                 res_off[3] = 0;
01033                                 shft_off[3] = 0;
01034                                 curdim[4]++;
01035                                 if (curdim[4] < src_ext[4]) {
01036                                     src_off[4] += src_strd[4];
01037                                     res_off[4] += res_strd[4];
01038                                     shft_off[4] += shft_strd[4];
01039                                 } else {
01040                                     curdim[4] = 0;
01041                                     src_off[4] = 0;
01042                                     res_off[4] = 0;
01043                                     shft_off[4] = 0;
01044                                     curdim[5]++;
01045                                     if (curdim[5] < src_ext[5]) {
01046                                         src_off[5] += src_strd[5];
01047                                         res_off[5] += res_strd[5];
01048                                         shft_off[5] += shft_strd[5];
01049                                     }
01050                                 }
01051                             }
01052                         }
01053                     }
01054                 }
01055             }
01056         }
01057 }