Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
reshape.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License 
00007   as published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU Lesser General Public 
00021   License along with this program; if not, write the Free Software 
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 
00023   USA.
00024 
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027 
00028   http://www.sgi.com
00029 
00030   For further information regarding this notice, see:
00031 
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033 
00034 */
00035 
00036 
00037 static const char USMID[] = "@(#) libfi/array/reshape.c 92.0    10/08/98 14:37:14";
00038 
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <stddef.h>
00042 #include <cray/dopevec.h>
00043 #include <cray/portdefs.h>
00044 #include "arraydefs.h"
00045 
00046 /*
00047  *      INCR_PAD increments the index for the pad array, and calculates
00048  *      the offset into the pad array.
00049  */
00050 
00051 #define INCR_PAD()                                                      \
00052         pad_indx[0]++;                                                  \
00053         if (pad_indx[0] < pad_ext[0])                                   \
00054             pad_off[0] = pad_indx[0] * pad_strd[0];                     \
00055         else {                                                          \
00056             pad_indx[0] = 0;                                            \
00057             pad_off[0] = 0;                                             \
00058             pad_indx[1]++;                                              \
00059             if (pad_indx[1] < pad_ext[1])                               \
00060                 pad_off[1] = pad_indx[1] * pad_strd[1];                 \
00061             else {                                                      \
00062                 pad_indx[1] = 0;                                        \
00063                 pad_off[1] = 0;                                         \
00064                 pad_indx[2]++;                                          \
00065                 if (pad_indx[2] < pad_ext[2])                           \
00066                     pad_off[2] = pad_indx[2] * pad_strd[2];             \
00067                 else {                                                  \
00068                     pad_indx[2] = 0;                                    \
00069                     pad_off[2] = 0;                                     \
00070                     pad_indx[3]++;                                      \
00071                     if (pad_indx[3] < pad_ext[3])                       \
00072                         pad_off[3] = pad_indx[3] * pad_strd[3];         \
00073                     else {                                              \
00074                         pad_indx[3] = 0;                                \
00075                         pad_off[3] = 0;                                 \
00076                         pad_indx[4]++;                                  \
00077                         if (pad_indx[4] < pad_ext[4])                   \
00078                             pad_off[4] = pad_indx[4] * pad_strd[4];     \
00079                         else {                                          \
00080                             pad_indx[4] = 0;                            \
00081                             pad_off[4] = 0;                             \
00082                             pad_indx[5]++;                              \
00083                             if (pad_indx[5] < pad_ext[5])               \
00084                                 pad_off[5] = pad_indx[5] * pad_strd[5]; \
00085                             else {                                      \
00086                                 pad_indx[5] = 0;                        \
00087                                 pad_off[5] = 0;                         \
00088                                 pad_indx[6]++;                          \
00089                                 if (pad_indx[6] < pad_ext[6])           \
00090                                     pad_off[6] = pad_indx[6] * pad_strd[6]; \
00091                                 else                                    \
00092                                     pad_indx[6] = 0;                    \
00093                                     pad_off[6] = 0;                     \
00094                             }                                           \
00095                         }                                               \
00096                     }                                                   \
00097                 }                                                       \
00098             }                                                           \
00099         }
00100 
00101 /*
00102  *      ADD_INDEX calculates the index from the begging of an array.  The
00103  *      array is passed in as the first argument.
00104  */
00105 
00106 #define ADD_INDEX(indx,off,rank)                                        \
00107         if (rank == 1) {                                                \
00108             indx = off[0];                                              \
00109         } else if (rank == 2) {                                         \
00110             indx = off[0] + off[1];                                     \
00111         } else if (rank == 3) {                                         \
00112             indx = off[0] + off[1] + off[2];                            \
00113         } else if (rank == 4) {                                         \
00114             indx = off[0] + off[1] + off[2] + off[3];                   \
00115         } else if (rank == 5) {                                         \
00116             indx = off[0] + off[1] + off[2] + off[3] + off[4];          \
00117         } else if (rank == 6) {                                         \
00118             indx = off[0] + off[1] + off[2] +                           \
00119                    off[3] + off[4] + off[5];                            \
00120         } else {                                                        \
00121             indx = off[0] + off[1] + off[2] + off[3] +                  \
00122                    off[4] + off[5] + off[6];                            \
00123         }
00124 
00125 /*
00126  *      INCR_SRC increments the index for the source array, and calculates
00127  *      the offset into the source array.
00128  */
00129 
00130 #define INCR_SRC()                                                      \
00131     src_indx[0]++;                                                      \
00132     if (src_indx[0] < src_ext[0])                                       \
00133         src_off[0] = src_indx[0] * src_strd[0];                         \
00134     else {                                                              \
00135         src_indx[0] = 0;                                                \
00136         src_off[0] = 0;                                                 \
00137         src_indx[1]++;                                                  \
00138         if (src_indx[1] < src_ext[1])                                   \
00139             src_off[1] = src_indx[1] * src_strd[1];                     \
00140         else {                                                          \
00141             src_indx[1] = 0;                                            \
00142             src_off[1] = 0;                                             \
00143             src_indx[2]++;                                              \
00144             if (src_indx[2] < src_ext[2])                               \
00145                 src_off[2] = src_indx[2] * src_strd[2];                 \
00146             else {                                                      \
00147                 src_indx[2] = 0;                                        \
00148                 src_off[2] = 0;                                         \
00149                 src_indx[3]++;                                          \
00150                 if (src_indx[3] < src_ext[3])                           \
00151                     src_off[3] = src_indx[3] * src_strd[3];             \
00152                 else {                                                  \
00153                     src_indx[3] = 0;                                    \
00154                     src_off[3] = 0;                                     \
00155                     src_indx[4]++;                                      \
00156                     if (src_indx[4] < src_ext[4])                       \
00157                         src_off[4] = src_indx[4] * src_strd[4];         \
00158                     else {                                              \
00159                         src_indx[4] = 0;                                \
00160                         src_off[4] = 0;                                 \
00161                         src_indx[5]++;                                  \
00162                         if (src_indx[5] < src_ext[5])                   \
00163                             src_off[5] = src_indx[5] * src_strd[5];     \
00164                         else {                                          \
00165                             src_indx[5] = 0;                            \
00166                             src_off[5] = 0;                             \
00167                             src_indx[6]++;                              \
00168                             if (src_indx[6] < src_ext[6])               \
00169                                 src_off[6] = src_indx[6] * src_strd[6]; \
00170                         }                                               \
00171                     }                                                   \
00172                 }                                                       \
00173             }                                                           \
00174         }                                                               \
00175     }
00176 
00177 /*
00178  *      INCR_RES increments the index for the result array, and calculates
00179  *      the offset into the result array.
00180  */
00181 
00182 #define INCR_RES()                                                      \
00183     res_indx[0]++;                                                      \
00184     if (res_indx[0] < res_ext[0])                                       \
00185         res_off[0] = res_indx[0] * res_strd[0];                         \
00186     else {                                                              \
00187         res_indx[0] = 0;                                                \
00188         res_off[0] = 0;                                                 \
00189         res_indx[1]++;                                                  \
00190         if (res_indx[1] < res_ext[1])                                   \
00191             res_off[1] = res_indx[1] * res_strd[1];                     \
00192         else {                                                          \
00193             res_indx[1] = 0;                                            \
00194             res_off[1] = 0;                                             \
00195             res_indx[2]++;                                              \
00196             if (res_indx[2] < res_ext[2])                               \
00197                 res_off[2] = res_indx[2] * res_strd[2];                 \
00198             else {                                                      \
00199                 res_indx[2] = 0;                                        \
00200                 res_off[2] = 0;                                         \
00201                 res_indx[3]++;                                          \
00202                 if (res_indx[3] < res_ext[3])                           \
00203                     res_off[3] = res_indx[3] * res_strd[3];             \
00204                 else {                                                  \
00205                     res_indx[3] = 0;                                    \
00206                     res_off[3] = 0;                                     \
00207                     res_indx[4]++;                                      \
00208                     if (res_indx[4] < res_ext[4])                       \
00209                         res_off[4] = res_indx[4] * res_strd[4];         \
00210                     else {                                              \
00211                         res_indx[4] = 0;                                \
00212                         res_off[4] = 0;                                 \
00213                         res_indx[5]++;                                  \
00214                         if (res_indx[5] < res_ext[5])                   \
00215                             res_off[5] = res_indx[5] * res_strd[5];     \
00216                         else {                                          \
00217                             res_indx[5] = 0;                            \
00218                             res_off[5] = 0;                             \
00219                             res_indx[6]++;                              \
00220                             if (res_indx[6] < res_ext[6])               \
00221                                 res_off[6] = res_indx[6] * res_strd[6]; \
00222                         }                                               \
00223                     }                                                   \
00224                 }                                                       \
00225             }                                                           \
00226         }                                                               \
00227     }
00228 
00229 #if defined _F_INT4 && defined _F_INT8
00230 #define VALUE(size, ptr) \
00231 ((int) (size == 64 ? (*((_f_int8 *) (ptr))) : (*((_f_int4 *) (ptr)))))
00232 #else
00233 #define VALUE(size, ptr) ((int) (*((_f_int *) (ptr))))
00234 #endif
00235 
00236 #ifdef _UNICOS
00237 #pragma _CRI duplicate _RESHAPE as RESHAPE@
00238 #endif
00239 void
00240 _RESHAPE (      DopeVectorType  *result,
00241                 DopeVectorType  *source,
00242                 DopeVectorType  *shape,
00243                 DopeVectorType  *pad,
00244                 DopeVectorType  *order)
00245 
00246 {
00247         int     *sh;            /* ptr to shape array           */
00248         char    *cs;            /* char ptr to source array     */
00249         char    *cr;            /* char ptr to result array     */
00250         char    *cp;            /* char ptr to pad array        */
00251         char            * restrict cptr1;       /* char         */
00252         char            * restrict cptr2;       /* char         */
00253         char            * restrict cptr3;       /* char         */
00254         _f_int8         * restrict uptr1;       /* 64-bit       */
00255         _f_int8         * restrict uptr2;       /* 64-bit       */
00256         _f_int8         * restrict uptr3;       /* 64-bit       */
00257         _f_int          * restrict fptr1;       /* def kind int */
00258         _f_int          * restrict fptr2;       /* def kind int */
00259         _f_int          * restrict fptr3;       /* def kind int */
00260         _f_real16       * restrict dptr1;       /* 128-bit      */
00261         _f_real16       * restrict dptr2;       /* 128-bit      */
00262         _f_real16       * restrict dptr3;       /* 128-bit      */
00263 #ifdef _F_COMP16
00264         dblcmplx        * restrict xptr1;       /* 256-bit      */
00265         dblcmplx        * restrict xptr2;       /* 256-bit      */
00266         dblcmplx        * restrict xptr3;       /* 256-bit      */
00267 #endif
00268         _f_int4         * restrict hptr1;       /* 32-bit       */
00269         _f_int4         * restrict hptr2;       /* 32-bit       */
00270         _f_int4         * restrict hptr3;       /* 32-bit       */
00271         void            * restrict sptr;        /* ptr to src   */
00272         void            * restrict rptr;        /* ptr to res   */
00273         void            * restrict pptr;        /* ptr to pad   */
00274         _f_int4         * restrict optr4;       /* ptr to odr   */
00275         _f_int8         * restrict optr8;       /* ptr to odr   */
00276         _f_int4         * restrict shptr4;      /* ptr to odr   */
00277         _f_int8         * restrict shptr8;      /* ptr to odr   */
00278         long    sindx;          /* source index                 */
00279         long    rindx;          /* result index                 */
00280         long    shindx;         /* shape index                  */
00281         long    pindx;          /* pad index                    */
00282         long    oindx;          /* order index                  */
00283         _f_int  bucketsize;     /* size of each data element    */
00284         long    nbytes;         /* # of bytes in result array   */
00285         _f_int  bytealligned;   /* byte aligned flag            */
00286         _f_int  type;           /* data type                    */
00287         _f_int  subtype;        /* sub-type                     */
00288         _f_int  arithmetic;     /* arithmetic data type         */
00289         _f_int  rank;           /* rank of result matrix        */
00290         long    src_ext[MAXDIM];   /* extents for source        */
00291         long    src_strd[MAXDIM];  /* stride for source         */
00292         long    src_off[MAXDIM];   /* source offset             */
00293         long    src_indx[MAXDIM];  /* source index              */
00294         long    res_ext[MAXDIM];   /* extents for result        */
00295         long    res_strd[MAXDIM];  /* stride for result         */
00296         long    res_off[MAXDIM];   /* result offset             */
00297         long    res_indx[MAXDIM];  /* result index              */
00298         long    pad_ext[MAXDIM];   /* extents for pad           */
00299         long    pad_strd[MAXDIM];  /* stride for pad            */
00300         long    pad_off[MAXDIM];   /* pad offset                */
00301         long    pad_indx[MAXDIM];  /* pad index                 */
00302         _f_int  src_rank;       /* rank of source matrix        */
00303         _f_int  res_rank;       /* rank of result matrix        */
00304         _f_int  pad_rank;       /* rank of pad matrix           */
00305         long    shp_strd;       /* stride for shape             */
00306         long    ord_strd;       /* stride for order             */
00307         long    tot_ext;        /* total extent counter         */
00308         long    tot_src;
00309         long    tot_shp;
00310         long    total;
00311         _f_int  early_src;
00312         _f_int  early_pad;
00313         _f_int  early_shp;
00314         _f_int  early_ord;
00315         long    i, j, k;
00316         long    shape_len;
00317         long    order_len;
00318         _f_int  shbucket;
00319         _f_int  obucket;
00320         _f_int  order_chk[7];
00321         long    shp_vals[MAXDIM];
00322 
00323 /*      Set type and rank variables     */
00324 
00325         type = source->type_lens.type;
00326         rank = shape->dimension[0].extent;
00327         shape_len = shape->base_addr.a.el_len;
00328 
00329 /*
00330  *      If the extent for any index of any array is 0, we can exit now without
00331  *      having to spend time doing any work.
00332  *
00333  *      If any of the indices passed in the shape array are less than 0,
00334  *      call the error routine.
00335  */
00336 
00337         if (shape->dimension[0].extent == 0) {
00338             _lerror (_LELVL_ABORT, FESHPSZZ);
00339         }
00340 /*
00341  *      Set up an array of the values of shape.  This will be done so that
00342  *      they can be done once, and referenced wherever they are needed.
00343  */
00344 
00345         if (shape->type_lens.int_len == 64) {
00346             shptr8 = (_f_int8 *) shape->base_addr.a.ptr;
00347 #ifdef _UNICOS
00348 #pragma _CRI    shortloop
00349 #endif
00350             for (i = 0; i < rank; i++) {
00351                 shindx = i * (shape->dimension[0].stride_mult /
00352                         (shape->type_lens.int_len / BITS_PER_WORD));
00353                 shp_vals[i] = VALUE(shape_len,(shptr8 + shindx));
00354             }
00355         } else {
00356             shptr4 = (_f_int4 *) shape->base_addr.a.ptr;
00357             shbucket = shape->type_lens.int_len / BITS_PER_WORD;
00358             if (shbucket == 0)
00359                 shbucket = 1;
00360 #ifdef _UNICOS
00361 #pragma _CRI    shortloop
00362 #endif
00363             for (i = 0; i < rank; i++) {
00364                 shindx = i * (shape->dimension[0].stride_mult / shbucket);
00365                 shp_vals[i] = VALUE(shape_len,(shptr4 + shindx));
00366             }
00367         }
00368                     
00369 #ifdef _UNICOS
00370 #pragma _CRI    shortloop
00371 #endif
00372         for (i = 0; i < rank; i++) {
00373             if (shp_vals[i] < 0)
00374                 _lerror (_LELVL_ABORT, FERSHNEG);
00375         }
00376         early_shp = 0;
00377 #ifdef _UNICOS
00378 #pragma _CRI    shortloop
00379 #endif
00380         for (i = 0; i < rank; i++) {
00381             if (shp_vals[i] == 0)
00382                 early_shp = 1;
00383         }
00384 
00385         early_src = 0;
00386 #ifdef _UNICOS
00387 #pragma _CRI    shortloop
00388 #endif
00389         for (i = 0; i < source->n_dim; i++) {
00390             if (source->dimension[i].extent == 0) {
00391                 early_src = 1;
00392             }
00393         }
00394 
00395         early_pad = 0;
00396         if (pad) {
00397 #ifdef _UNICOS
00398 #pragma _CRI    shortloop
00399 #endif
00400             for (i = 0; i < pad->n_dim; i++) {
00401                 if (pad->dimension[i].extent == 0)
00402                     early_pad = 1;
00403             }
00404         }
00405 
00406         early_ord = 0;
00407         if (order) {
00408             if (order->dimension[0].extent == 0)
00409                 early_ord = 1;
00410         }
00411 
00412 /*
00413  *      Initialize every array element to 0.
00414  */
00415 
00416 #ifdef _UNICOS
00417 #pragma _CRI    shortloop
00418 #endif
00419         for (i = 0; i < MAXDIM; i++) {
00420             src_ext[i] = 0;
00421             src_strd[i] = 0;
00422             src_off[i] = 0;
00423             src_indx[i] = 0;
00424             res_ext[i] = 0;
00425             res_strd[i] = 0;
00426             res_off[i] = 0;
00427             res_indx[i] = 0;
00428             pad_ext[i] = 0;
00429             pad_strd[i] = 0;
00430             pad_off[i] = 0;
00431             pad_indx[i] = 0;
00432         }
00433 
00434 /*      Size calculation is based on variable type      */
00435 
00436         switch (type) {
00437             case DVTYPE_ASCII :
00438                 bytealligned = 1;
00439                 bucketsize = _fcdlen(source->base_addr.charptr);  /* bytes */
00440                 subtype = DVSUBTYPE_CHAR;
00441                 arithmetic = 0;
00442                 break;
00443             case DVTYPE_DERIVEDBYTE :
00444                 bytealligned = 1;
00445                 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00446                 subtype = DVSUBTYPE_CHAR;
00447                 arithmetic = 0;
00448                 break;
00449             case DVTYPE_DERIVEDWORD :
00450                 bytealligned = 0;
00451                 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00452                 subtype = DVSUBTYPE_DERIVED;
00453                 arithmetic = 0;
00454                 break;
00455             default :
00456                 bytealligned = 0;
00457                 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00458                 if (source->type_lens.int_len == 64) {
00459                     subtype = DVSUBTYPE_BIT64;
00460                 } else if (source->type_lens.int_len == 32) {
00461                     subtype = DVSUBTYPE_BIT32;
00462                     bucketsize = 1;
00463 #ifdef _F_COMP16
00464                 } else if (source->type_lens.int_len == 256) {
00465                     subtype = DVSUBTYPE_BIT256;
00466 #endif
00467                 } else {
00468                     subtype = DVSUBTYPE_BIT128;
00469                 }
00470                 arithmetic = 1;
00471         }
00472 
00473 /*      If not allocated, set up dope vector for result         */
00474 
00475         if (!result->assoc) {
00476             result->base_addr.a.ptr = (void *) NULL;
00477             result->n_dim = rank;
00478             result->orig_base = 0;
00479             result->orig_size = 0;
00480 
00481 /*      Determine size of space to allocate     */
00482 
00483             if (!bytealligned)
00484                 nbytes = bucketsize * BYTES_PER_WORD;
00485             else
00486                 nbytes = bucketsize;
00487             shindx = 0;
00488             if (shape->type_lens.int_len == 64)
00489                 shptr8 = (_f_int8 *) shape->base_addr.a.ptr;
00490             else
00491                 shptr4 = (_f_int4 *) shape->base_addr.a.ptr;
00492             shbucket = shape->type_lens.int_len / BITS_PER_WORD;
00493             if (shbucket == 0)
00494                 shbucket = 1;
00495             shp_strd = shape->dimension[0].stride_mult / shbucket;
00496             tot_ext = 1;
00497             for (i = 0; i < rank; i++) {
00498                 result->dimension[i].extent = shp_vals[i];
00499                 result->dimension[i].low_bound = 1;
00500                 result->dimension[i].stride_mult = tot_ext * bucketsize;
00501                 tot_ext *= shp_vals[i];
00502                 nbytes *= shp_vals[i];
00503             }
00504             if (nbytes > 0 && early_ord != 1) {
00505                 result->base_addr.a.ptr = (void *) malloc (nbytes);
00506                 if (result->base_addr.a.ptr == NULL)
00507                     _lerror (_LELVL_ABORT, FENOMEMY);
00508                 result->assoc = 1;
00509             } else
00510                 result->base_addr.a.ptr = (void *) NULL;
00511 
00512             result->base_addr.a.el_len = source->base_addr.a.el_len;
00513             if (type == DVTYPE_ASCII) {
00514                 cr = (char *) result->base_addr.a.ptr;
00515                 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00516             }
00517             result->orig_base = (void *) result->base_addr.a.ptr;
00518             result->orig_size = nbytes * BITS_PER_BYTE;
00519         }
00520 
00521 /*      Check to see if we can exit early       */
00522 
00523         if (early_shp || early_ord)
00524             return;
00525         if (early_src && (!pad || early_pad))
00526                 _lerror (_LELVL_ABORT, FERSHNPD);
00527 
00528 /*      Set up non-dopevector variables to hold index information       */
00529 
00530         for (i = 0, tot_src = 1; i < source->n_dim; i++) {
00531             src_ext[i] = source->dimension[i].extent;
00532             if (bucketsize > 1 && arithmetic) {
00533                 src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00534             } else {
00535                 src_strd[i] = source->dimension[i].stride_mult;
00536             }
00537             tot_src *= src_ext[i];
00538         }
00539         if (order == NULL) {
00540             for (i = 0; i < result->n_dim; i++) {
00541                 res_ext[i] = result->dimension[i].extent;
00542                 if (bucketsize > 1 && arithmetic) {
00543                     res_strd[i] = result->dimension[i].stride_mult / bucketsize;
00544                 } else {
00545                     res_strd[i] = result->dimension[i].stride_mult;
00546                 }
00547             }
00548         } else {
00549             oindx = 0;
00550             obucket = order->type_lens.int_len / BITS_PER_WORD;
00551             if (obucket == 0)
00552                 obucket = 1;
00553             ord_strd = order->dimension[0].stride_mult / obucket;
00554             if (order->type_lens.int_len == 64) 
00555                 optr8 = (_f_int8 *) order->base_addr.a.ptr;
00556             else
00557                 optr4 = (_f_int4 *) order->base_addr.a.ptr;
00558             order_len = order->base_addr.a.el_len;
00559             for (i = 0; i < result->n_dim; i++)
00560                 order_chk[i] = -1;
00561             for (i = 0; i < result->n_dim; i++) {
00562                 if (order->type_lens.int_len == 64)
00563                     j = VALUE(order_len, (optr8 + oindx)) - 1;
00564                 else
00565                     j = VALUE(order_len, (optr4 + oindx)) - 1;
00566                 if (j < 0 || j >= result->n_dim) 
00567                     _lerror (_LELVL_ABORT, FEBDORDR);
00568                 order_chk[j] = 1;
00569                 oindx += ord_strd;
00570                 res_ext[i] = result->dimension[j].extent;
00571                 if (bucketsize > 1 && arithmetic) {
00572                     res_strd[i] = result->dimension[j].stride_mult / bucketsize;
00573                 } else {
00574                     res_strd[i] = result->dimension[j].stride_mult;
00575                 }
00576             }
00577             for (i = 0; i < result->n_dim; i++) {
00578                 if (order_chk[i] != 1) {
00579                     _lerror (_LELVL_ABORT, FEBDORDR);
00580                 }
00581             }
00582         }
00583         if (pad && !early_pad) {
00584             for (i = 0; i < pad->n_dim; i++) {
00585                 pad_ext[i] = pad->dimension[i].extent;
00586                 if (bucketsize > 1 && arithmetic) {
00587                     pad_strd[i] = pad->dimension[i].stride_mult / bucketsize;
00588                 } else {
00589                     pad_strd[i] = pad->dimension[i].stride_mult;
00590                 }
00591             }
00592         } else {
00593             tot_shp = 1;
00594             for (i = 0, tot_shp = 1; i < shape->dimension[0].extent; i++) {
00595                 tot_shp *= shp_vals[i];
00596             }
00597             if (tot_shp > tot_src) {
00598                 _lerror (_LELVL_ABORT, FERSHNPD);
00599             }
00600         }
00601 
00602 
00603 /*      Initialize pointers to data areas               */
00604 
00605         if (!bytealligned) {
00606             sptr = (void *) source->base_addr.a.ptr;
00607             rptr = (void *) result->base_addr.a.ptr;
00608             if (pad)
00609                 pptr = (void *) pad->base_addr.a.ptr;
00610         } else {
00611             if (type == DVTYPE_ASCII) {
00612                 cs = _fcdtocp (source->base_addr.charptr);
00613                 cr = _fcdtocp (result->base_addr.charptr);
00614                 if (pad)
00615                     cp = _fcdtocp (pad->base_addr.charptr);
00616             } else {
00617                 cs = (char *) source->base_addr.a.ptr;
00618                 cr = (char *) result->base_addr.a.ptr;
00619                 if (pad)
00620                     cp = (char *) pad->base_addr.a.ptr;
00621             }
00622         }
00623 
00624 /*      Initialize index counter variables              */
00625 
00626         for (i = 0; i < MAXDIM; i++) {
00627             res_off[i] = 0;
00628             src_off[i] = 0;
00629             pad_off[i] = 0;
00630             res_indx[i] = 0;
00631             src_indx[i] = 0;
00632             pad_indx[i] = 0;
00633         }
00634 
00635 /*      Initialize rank scalars         */
00636 
00637         src_rank = source->n_dim;
00638         res_rank = result->n_dim;
00639         if (pad)
00640             pad_rank = pad->n_dim;
00641         else
00642             pad_rank = 0;
00643 
00644 /*      Determine how many elements from source need to be moved        */
00645 
00646         if (tot_src < tot_ext)
00647             total = tot_src;
00648         else
00649             total = tot_ext;
00650 
00651 /*
00652  *      RESHAPE cannot be broken down by rank, because any rank can be put
00653  *      into any other rank.  Therefore, the only breakdown has been made by
00654  *      data type.  Since the values of the source array are not used, the
00655  *      breakdown is made by element size.  32-bit data is handled as one
00656  *      type, 64-bit data is another type, etc.  Character and derived byte
00657  *      are handled as one type, and derived word is a separate type.
00658  *
00659  *      The work is done by first determining how many source elements need
00660  *      to be moved.  This value is used as the loop counter for the first
00661  *      loop.  Inside the loop, the index values for the source and result
00662  *      elements are calculated, and that value is moved.  Then, the new
00663  *      values for source and result are calculated.  When all of the source
00664  *      elements have been moved, determine whether any pad values must be
00665  *      used.  If so, use a similar loop as the first one, but with the pad
00666  *      array rather than the source array.
00667  */
00668 
00669         switch (subtype) {
00670 
00671             case DVSUBTYPE_BIT64 :
00672                 uptr1 = (_f_int8 *) sptr;
00673                 uptr2 = (_f_int8 *) rptr;
00674                 for (i = 0; i < total; i++) {           /* move source  */
00675                     ADD_INDEX(sindx,src_off,src_rank);
00676                     ADD_INDEX(rindx,res_off,res_rank);
00677                     uptr2[rindx] = uptr1[sindx];
00678                     INCR_SRC();
00679                     INCR_RES();
00680                 }
00681                 if (tot_src < tot_ext) {
00682                     uptr3 = (_f_int8 *) pptr;
00683                     for ( ; i < tot_ext; i++) {         /* move pad     */
00684                         ADD_INDEX(pindx,pad_off,pad_rank);
00685                         ADD_INDEX(rindx,res_off,res_rank);
00686                         uptr2[rindx] = uptr3[pindx];
00687                         INCR_PAD();
00688                         INCR_RES();
00689                     }
00690                 }
00691                 break;
00692 
00693             case DVSUBTYPE_BIT32 :
00694                 hptr1 = (_f_int4 *) sptr;
00695                 hptr2 = (_f_int4 *) rptr;
00696                 for (i = 0; i < total; i++) {           /* move source  */
00697                     ADD_INDEX(sindx,src_off,src_rank);
00698                     ADD_INDEX(rindx,res_off,res_rank);
00699                     hptr2[rindx] = hptr1[sindx];
00700                     INCR_SRC();
00701                     INCR_RES();
00702                 }
00703                 if (tot_src < tot_ext) {
00704                     hptr3 = (_f_int4 *) pptr;
00705                     for ( ; i < tot_ext; i++) {         /* move pad     */
00706                         ADD_INDEX(pindx,pad_off,pad_rank);
00707                         ADD_INDEX(rindx,res_off,res_rank);
00708                         hptr2[rindx] = hptr3[pindx];
00709                         INCR_PAD();
00710                         INCR_RES();
00711                     }
00712                 }
00713                 break;
00714 
00715             case DVSUBTYPE_BIT128 :
00716                 dptr1 = (_f_real16 *) sptr;
00717                 dptr2 = (_f_real16 *) rptr;
00718                 for (i = 0; i < total; i++) {           /* move source  */
00719                     ADD_INDEX(sindx,src_off,src_rank);
00720                     ADD_INDEX(rindx,res_off,res_rank);
00721                     dptr2[rindx] = dptr1[sindx];
00722                     INCR_SRC();
00723                     INCR_RES();
00724                 }
00725                 if (tot_src < tot_ext) {
00726                     dptr3 = (_f_real16 *) pptr;
00727                     for ( ; i < tot_ext; i++) {         /* move pad     */
00728                         ADD_INDEX(pindx,pad_off,pad_rank);
00729                         ADD_INDEX(rindx,res_off,res_rank);
00730                         dptr2[rindx] = dptr3[pindx];
00731                         INCR_PAD();
00732                         INCR_RES();
00733                     }
00734                 }
00735                 break;
00736 
00737             case DVSUBTYPE_CHAR :
00738                 for (i = 0; i < total; i++) {           /* move source  */
00739                     ADD_INDEX(sindx,src_off,src_rank);
00740                     ADD_INDEX(rindx,res_off,res_rank);
00741                     cptr1 = (char *) cs + sindx;
00742                     cptr2 = (char *) cr + rindx;
00743                     (void) memcpy (cptr2, cptr1, bucketsize);
00744                     INCR_SRC();
00745                     INCR_RES();
00746                 }
00747                 if (tot_src < tot_ext) {
00748                     for ( ; i < tot_ext; i++) {
00749                         ADD_INDEX(pindx,pad_off,pad_rank);
00750                         ADD_INDEX(rindx,res_off,res_rank);
00751                         cptr2 = (char *) cr + rindx;
00752                         cptr3 = (char *) cp + pindx;
00753                         (void) memcpy (cptr2, cptr3, bucketsize);
00754                         INCR_PAD();
00755                         INCR_RES();
00756                     }
00757                 }
00758                 break;
00759 
00760             case DVSUBTYPE_DERIVED :
00761                 for (i = 0; i < bucketsize; i++) {
00762                     fptr1 = (_f_int *) sptr + i;
00763                     fptr2 = (_f_int *) rptr + i;
00764 #ifdef _UNICOS
00765 #pragma _CRI    shortloop
00766 #endif
00767                     for (j = 0; j < MAXDIM; j++) {
00768                         src_indx[j] = 0;
00769                         src_off[j] = 0;
00770                     }
00771 #ifdef _UNICOS
00772 #pragma _CRI    shortloop
00773 #endif
00774                     for (j = 0; j < MAXDIM; j++) {
00775                         res_indx[j] = 0;
00776                         res_off[j] = 0;
00777                     }
00778                     for (j = 0; j < total; j++) {       /* move source  */
00779                         ADD_INDEX(sindx,src_off,src_rank)
00780                         ADD_INDEX(rindx,res_off,res_rank)
00781                         fptr2[rindx] = fptr1[sindx];
00782                         INCR_SRC();
00783                         INCR_RES();
00784                     }
00785                     if (tot_src < tot_ext) {
00786                         fptr3 = (_f_int *) pptr + i;
00787 #ifdef _UNICOS
00788 #pragma _CRI    shortloop
00789 #endif
00790                         for (k = 0; k < pad_rank; k++) {
00791                             pad_indx[k] = 0;
00792                             pad_off[k] = 0;
00793                         }
00794                         for ( ; j < tot_ext; j++) {     /* move pad     */
00795                             ADD_INDEX(pindx,pad_off,pad_rank);
00796                             ADD_INDEX(rindx,res_off,res_rank);
00797                             fptr2[rindx] = fptr3[pindx];
00798                             INCR_PAD();
00799                             INCR_RES();
00800                         }
00801                     }
00802                 }
00803                 break;
00804 
00805 #ifdef _F_COMP16
00806             case DVSUBTYPE_BIT256 :
00807                 xptr1 = (dblcmplx *) sptr;
00808                 xptr2 = (dblcmplx *) rptr;
00809                 for (i = 0; i < total; i++) {           /* move source  */
00810                     ADD_INDEX(sindx,src_off,src_rank);
00811                     ADD_INDEX(rindx,res_off,res_rank);
00812                     xptr2[rindx].re = xptr1[sindx].re;
00813                     xptr2[rindx].im = xptr1[sindx].im;
00814                     INCR_SRC();
00815                     INCR_RES();
00816                 }
00817                 if (tot_src < tot_ext) {
00818                     xptr3 = (dblcmplx *) pptr;
00819                     for ( ; i < tot_ext; i++) {         /* move pad     */
00820                         ADD_INDEX(pindx,pad_off,pad_rank);
00821                         ADD_INDEX(rindx,res_off,res_rank);
00822                         xptr2[rindx].re = xptr3[pindx].re;
00823                         xptr2[rindx].im = xptr3[pindx].im;
00824                         INCR_PAD();
00825                         INCR_RES();
00826                     }
00827                 }
00828                 break;
00829 #endif
00830 
00831             default :
00832                 _lerror (_LELVL_ABORT, FEINTDTY);
00833         }
00834 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines