Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
pack.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License 
00007   as published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU Lesser General Public 
00021   License along with this program; if not, write the Free Software 
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 
00023   USA.
00024 
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027 
00028   http://www.sgi.com
00029 
00030   For further information regarding this notice, see:
00031 
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033 
00034 */
00035 
00036 
00037 #pragma ident "@(#) libfi/array/pack.c  92.1    07/07/99 15:52:02"
00038 
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044 
00045 /*
00046  *      Pack an array into an array of rank one under the control of a
00047  *      mask.
00048  *
00049  *      Calculate the indices which will be used in the outer loop
00050  *      of the rank 3-7 block.  Indices are calculated for the mask
00051  *      and source matrices.
00052  */
00053 
00054 #define FIND_INDX()                                                     \
00055         switch (rank) {                                                 \
00056             case 3 :                                                    \
00057                 indx1_msk = msk_off[0] + msk_off[1];                    \
00058                 indx1_src = src_off[0] + src_off[1];                    \
00059                 break;                                                  \
00060             case 4 :                                                    \
00061                 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2];       \
00062                 indx1_src = src_off[0] + src_off[1] + src_off[2];       \
00063                 break;                                                  \
00064             case 5 :                                                    \
00065                 indx1_msk = msk_off[0] + msk_off[1] +                   \
00066                             msk_off[2] + msk_off[3];                    \
00067                 indx1_src = src_off[0] + src_off[1] +                   \
00068                             src_off[2] + src_off[3];                    \
00069                 break;                                                  \
00070             case 6 :                                                    \
00071                 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] +      \
00072                             msk_off[3] + msk_off[4];                    \
00073                 indx1_src = src_off[0] + src_off[1] + src_off[2] +      \
00074                             src_off[3] + src_off[4];                    \
00075                 break;                                                  \
00076             default :                                                   \
00077                 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] +      \
00078                             msk_off[3] + msk_off[4] + msk_off[5];       \
00079                 indx1_src = src_off[0] + src_off[1] + src_off[2] +      \
00080                             src_off[3] + src_off[4] + src_off[5];       \
00081         }
00082 
00083 /*
00084  *      Increment the dimension counters used in the rank 3-7 block.
00085  *      In addition, calculate the offsets for each of the 2-rank
00086  *      dimensions.  These offsets are used in the FIND_INDX macro
00087  *      to calculate the index to be used for the mask and the source
00088  *      matrices.
00089  */
00090 
00091 #define INCREMENT()                                                     \
00092         curdim[0]++;                                                    \
00093         if (curdim[0] < src_ext[1]) {                                   \
00094             msk_off[0] = curdim[0] * msk_strd[1];                       \
00095             src_off[0] = curdim[0] * src_strd[1];                       \
00096         } else {                                                        \
00097             curdim[0] = 0;                                              \
00098             msk_off[0] = 0;                                             \
00099             src_off[0] = 0;                                             \
00100             curdim[1]++;                                                \
00101             if (curdim[1] < src_ext[2]) {                               \
00102                 msk_off[1] = curdim[1] * msk_strd[2];                   \
00103                 src_off[1] = curdim[1] * src_strd[2];                   \
00104             } else {                                                    \
00105                 curdim[1] = 0;                                          \
00106                 msk_off[1] = 0;                                         \
00107                 src_off[1] = 0;                                         \
00108                 curdim[2]++;                                            \
00109                 if (curdim[2] < src_ext[3]) {                           \
00110                     msk_off[2] = curdim[2] * msk_strd[3];               \
00111                     src_off[2] = curdim[2] * src_strd[3];               \
00112                 } else {                                                \
00113                     curdim[2] = 0;                                      \
00114                     msk_off[2] = 0;                                     \
00115                     src_off[2] = 0;                                     \
00116                     curdim[3]++;                                        \
00117                     if (curdim[3] < src_ext[4]) {                       \
00118                         msk_off[3] = curdim[3] * msk_strd[4];           \
00119                         src_off[3] = curdim[3] * src_strd[4];           \
00120                     } else {                                            \
00121                         curdim[3] = 0;                                  \
00122                         msk_off[3] = 0;                                 \
00123                         src_off[3] = 0;                                 \
00124                         curdim[4]++;                                    \
00125                         if (curdim[4] < src_ext[5]) {                   \
00126                             msk_off[4] = curdim[4] * msk_strd[5];       \
00127                             src_off[4] = curdim[4] * src_strd[5];       \
00128                         } else {                                        \
00129                             curdim[4] = 0;                              \
00130                             msk_off[4] = 0;                             \
00131                             src_off[4] = 0;                             \
00132                             curdim[5]++;                                \
00133                             if (curdim[5] < src_ext[6]) {               \
00134                                 msk_off[5] = curdim[5] * msk_strd[6];   \
00135                                 src_off[5] = curdim[5] * src_strd[6];   \
00136                             }                                           \
00137                         }                                               \
00138                     }                                                   \
00139                 }                                                       \
00140             }                                                           \
00141         }
00142 
00143 #ifdef _UNICOS
00144 #pragma _CRI duplicate _PACK as PACK@
00145 #endif
00146 void
00147 _PACK ( DopeVectorType * result,
00148         DopeVectorType * source,
00149         DopeVectorType * mask,
00150         DopeVectorType * vector)
00151 
00152 {
00153         char    *cs;            /* char ptr to source array     */
00154         char    *cr;            /* char ptr to result array     */
00155         char    *cv;            /* char ptr to vector array     */
00156         char            * restrict cptr1;       /* char                 */
00157         char            * restrict cptr2;       /* char                 */
00158         char            * restrict cptr3;       /* char                 */
00159         _f_int8         * restrict uptr1;       /* 64-bit               */
00160         _f_int8         * restrict uptr2;       /* 64-bit               */
00161         _f_int8         * restrict uptr3;       /* 64-bit               */
00162         _f_int          * restrict fptr1;       /* default-size         */
00163         _f_int          * restrict fptr2;       /* default-size         */
00164         _f_int          * restrict fptr3;       /* default-size         */
00165         _f_real16       * restrict dptr1;       /* 128-bit              */
00166         _f_real16       * restrict dptr2;       /* 128-bit              */
00167         _f_real16       * restrict dptr3;       /* 128-bit              */
00168 #ifdef _F_COMP16
00169         dblcmplx        * restrict xptr1;       /* 256-bit              */
00170         dblcmplx        * restrict xptr2;       /* 256-bit              */
00171         dblcmplx        * restrict xptr3;       /* 256-bit              */
00172 #endif
00173         _f_int4         * restrict hptr1;       /* 32-bit               */
00174         _f_int4         * restrict hptr2;       /* 32-bit               */
00175         _f_int4         * restrict hptr3;       /* 32-bit               */
00176         _f_mask         * restrict iptr4;       /* def kind mask        */
00177         void            * restrict sptr;        /* ptr to source        */
00178         void            * restrict rptr;        /* ptr to result        */
00179         void            * restrict mptr;        /* ptr to mask          */
00180         void            * restrict vptr;        /* ptr to vector        */
00181         _f_int  bucketsize;     /* size of each data element    */
00182         long    nbytes;         /* # of bytes in data array     */
00183         long    nwords;         /* # of words in data array     */
00184         long    curdim[MAXDIM]; /* current indices              */
00185         _f_int  bytealligned;   /* byte alligned flag           */
00186         long    sindx;          /* source index                 */
00187         long    rindx;          /* result index                 */
00188         long    mindx;          /* mask index                   */
00189         long    vindx;          /* vector index                 */
00190         _f_int  type;           /* type scalar                  */
00191         _f_int  subtype;        /* sub-type                     */
00192         _f_int  arithmetic;     /* arithmetic                   */
00193         _f_int  rank;           /* dimension of source scalar   */
00194         long    i, j, k;        /* index variables              */
00195         long    res_strd;       /* element stride for result    */
00196         long    vec_strd;       /* element stride for result    */
00197         long    src_ext[MAXDIM];   /* extents for source        */
00198         long    src_strd[MAXDIM];  /* element stride for source */
00199         long    src_off[MAXDIM];   /* offset values for source  */
00200         long    msk_strd[MAXDIM];  /* element stride for mask   */
00201         long    msk_off[MAXDIM];   /* offset values for mask    */
00202         long    indx1_src;      /* index for dim 1 of source    */
00203         long    indx2_src;      /* index for dim 2 of source    */
00204         long    indx1_vec;      /* index for dim 1 of vector    */
00205         long    indx2_vec;      /* index for dim 2 of vector    */
00206         long    indx1_res;      /* index for dim 1 of result    */
00207         long    indx2_res;      /* index for dim 2 of result    */
00208         long    indx1_msk;      /* index for dim 1 of msk       */
00209         long    indx2_msk;      /* index for dim 2 of msk       */
00210         long    total_ext;      /* total extent counter         */
00211         long    src_ext1;       /* extent for dim 1 of source   */
00212         long    src_ext2;       /* extent for dim 1 of source   */
00213         long    found;          /* count of # entries in result */
00214         long    mask_el_len;
00215         _f_int  early_exit;     /* early exit flag              */
00216 
00217 /*      Set type and dimension global variables         */
00218 
00219         type = source->type_lens.type;
00220         rank = source->n_dim;
00221         mask_el_len = mask->base_addr.a.el_len;
00222 
00223 /*
00224  *      Check to see if any of the matrices have size 0.  If any do,
00225  *      return without doing anything.
00226  */
00227 
00228         early_exit = 0;
00229 #ifdef _UNICOS
00230 #pragma _CRI     shortloop
00231 #endif
00232         for (i = 0; i < rank; i++) {
00233             if (!source->dimension[i].extent)
00234                 early_exit = 1;
00235         }
00236         if (result->assoc) {
00237             if (!result->dimension[0].extent)
00238                 early_exit = 1;
00239         }
00240         if (vector) {
00241             if (!vector->dimension[0].extent)
00242                 early_exit = 1;
00243         }
00244         if (mask) {
00245             if (mask->n_dim > 1) {
00246 #ifdef _UNICOS
00247 #pragma _CRI     shortloop
00248 #endif
00249                 for (i = 0; i < rank; i++)
00250                     if (!mask->dimension[i].extent)
00251                         early_exit = 1;
00252             }
00253         }
00254 
00255 /*
00256  *      Initialize every array element to 0.
00257  */
00258 
00259 #ifdef _UNICOS
00260 #pragma _CRI    shortloop
00261 #endif
00262         for (i = 0; i < MAXDIM; i++) {
00263             curdim[i] = 0;
00264             src_ext[i] = 0;
00265             src_strd[i] = 0;
00266             src_off[i] = 0;
00267             msk_strd[i] = 0;
00268             msk_off[i] = 0;
00269         }
00270 
00271 /*      Size calculation is based on variable type      */
00272 
00273         switch (type) {
00274             case DVTYPE_ASCII :
00275                 bytealligned = 1;
00276                 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */
00277                 subtype = DVSUBTYPE_CHAR;
00278                 arithmetic = 0;
00279                 break;
00280             case DVTYPE_DERIVEDBYTE :
00281                 bytealligned = 1;
00282                 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00283                 subtype = DVSUBTYPE_CHAR;
00284                 arithmetic = 0;
00285                 break;
00286             case DVTYPE_DERIVEDWORD :
00287                 bytealligned = 0;
00288                 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00289                 subtype = DVSUBTYPE_DERIVED;
00290                 arithmetic = 0;
00291                 break;
00292             default :
00293                 bytealligned = 0;
00294                 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00295                 if (source->type_lens.int_len == 64) {
00296                     subtype = DVSUBTYPE_BIT64;
00297                 } else if (source->type_lens.int_len == 32) {
00298                     subtype = DVSUBTYPE_BIT32;
00299                     bucketsize = 1;
00300                 } else if (source->type_lens.int_len == 256) {
00301                     subtype = DVSUBTYPE_BIT256;
00302                 } else {
00303                     subtype = DVSUBTYPE_BIT128;
00304                 }
00305                 arithmetic = 1;
00306         }
00307 
00308 /*      If necessary, fill result dope vector           */
00309 
00310         if (!result->assoc) {
00311             result->base_addr.a.ptr = (void *) NULL;
00312             result->orig_base = 0;
00313             result->orig_size = 0;
00314 
00315 /*      Determine size of space to allocate     */
00316 
00317             if (!bytealligned) {
00318                 nbytes = bucketsize * BYTES_PER_WORD;
00319 #ifdef _CRAYMPP
00320                 if (subtype == DVSUBTYPE_BIT32)
00321                     nbytes /= 2;
00322 #endif
00323             } else {
00324                 nbytes = bucketsize;
00325             }
00326             if (vector) {
00327                 nbytes *= vector->dimension[0].extent;
00328                 nwords = vector->dimension[0].extent;
00329             } else {
00330 #ifdef _UNICOS
00331 #pragma _CRI    shortloop
00332 #endif
00333                 for (i = 0; i < rank; i++)
00334                     nbytes *= source->dimension[i].extent;
00335                 nwords = nbytes / BYTES_PER_WORD;
00336             }
00337             if (nbytes > 0) {
00338                 result->base_addr.a.ptr = (void *) malloc (nbytes);
00339                 if (result->base_addr.a.ptr == NULL)
00340                     _lerror (_LELVL_ABORT, FENOMEMY);
00341             }
00342 
00343             result->assoc = 1;
00344             result->base_addr.a.el_len = source->base_addr.a.el_len;
00345             if (type == DVTYPE_ASCII) {
00346                 cr = (char *) result->base_addr.a.ptr;
00347                 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00348             }
00349             result->orig_size = nbytes * BITS_PER_BYTE;
00350 
00351 /*
00352  *      These are initial values which may be changed when it is
00353  *      determined how big the result array actually is.
00354  */
00355             result->dimension[0].low_bound = 1;
00356             result->dimension[0].extent = nwords;
00357             result->dimension[0].stride_mult = bucketsize;
00358 
00359 /*      if result array is already allocated    */
00360 
00361         } else {
00362             if (!bytealligned)
00363                 nbytes = bucketsize * BYTES_PER_WORD;
00364             else
00365                 nbytes = bucketsize;
00366             if (vector) {
00367                 nbytes *= vector->dimension[0].extent;
00368                 nwords = vector->dimension[0].extent;
00369             } else {
00370                 nwords = 1;
00371                 for (i = 0; i < rank; i++) {
00372                     nbytes *= source->dimension[i].extent;
00373                     nwords *= source->dimension[i].extent;
00374                 }
00375             }
00376         }
00377 
00378 /*      If early exit is required, exit now     */
00379 
00380         if (early_exit)
00381             return;
00382         if (mask) {
00383             iptr4 = (_f_mask *) mask->base_addr.a.ptr;
00384             if (mask->n_dim == 0 && !(vector) &&
00385                 !LTOB(mask_el_len, &iptr4[0])) {
00386                 result->dimension[0].extent = 0;
00387                 return;
00388             }
00389         }
00390 
00391 /*      Set up scalar pointers to all of the argument data areas        */
00392 
00393         if (mask)
00394             mptr = (void *) mask->base_addr.a.ptr;
00395         if (!bytealligned) {
00396             sptr = (void *) source->base_addr.a.ptr;
00397             rptr = (void *) result->base_addr.a.ptr;
00398             if (vector)
00399                 vptr = (void *) vector->base_addr.a.ptr;
00400         } else {
00401             if (type == DVTYPE_ASCII) {
00402                 cs = _fcdtocp (source->base_addr.charptr);
00403                 cr = _fcdtocp (result->base_addr.charptr);
00404                 if (vector)
00405                     cv = _fcdtocp (vector->base_addr.charptr);
00406             } else {
00407                 cs = (char *) source->base_addr.a.ptr;
00408                 cr = (char *) result->base_addr.a.ptr;
00409                 if (vector)
00410                     cv = (char *) vector->base_addr.a.ptr;
00411             }
00412         }
00413 
00414 /*      Set up some 'shortcut' variables used for index calculation     */
00415 
00416         if (bucketsize > 1 && arithmetic) {
00417             res_strd = result->dimension[0].stride_mult / bucketsize;
00418             if (vector)
00419                 vec_strd = vector->dimension[0].stride_mult / bucketsize;
00420         } else {
00421             res_strd = result->dimension[0].stride_mult;
00422             if (vector)
00423                 vec_strd = vector->dimension[0].stride_mult;
00424         }
00425 
00426 #ifdef _UNICOS
00427 #pragma _CRI    shortloop
00428 #endif
00429         for (i = 0; i < rank; i++) {
00430             src_ext[i] = source->dimension[i].extent;
00431             if (bucketsize > 1 && arithmetic) {
00432                 src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00433             } else {
00434                 src_strd[i] = source->dimension[i].stride_mult;
00435             }
00436         }
00437         if (mask->n_dim > 0) {
00438 #ifdef _UNICOS
00439 #pragma _CRI    shortloop
00440 #endif
00441             for (i = 0; i < rank; i++) {
00442                 msk_strd[i] = mask->dimension[i].stride_mult;
00443                 iptr4 = (_f_mask *) mptr;
00444 #ifdef  _CRAYMPP
00445                 if (mask_el_len == 64 && sizeof (iptr4[0]) == 4)
00446                     msk_strd[i] <<= 1;
00447 #endif
00448             }
00449         }
00450 
00451 /*
00452  *      The program is divided up into three blocks.  The first block deals
00453  *      with arrays of rank 1.  Inside each block, the data types are broken
00454  *      up into groups based on container size.  Integer, real, and logical
00455  *      types are all one word, and the actual value is not used, so they
00456  *      are all grouped together and treated as long.  The same is
00457  *      true for double and complex, as well as ascii and derivedbyte.
00458  *
00459  *      For each group, the mask array is checked for true values.  When one
00460  *      is encountered, the corresponding value from the source array is put
00461  *      into the next available position in the result array.  If no vector
00462  *      is passed, the routine is finished at this point with the result
00463  *      array length set to the number of true elements in the mask.  If a
00464  *      vector is furnished, the size of the vector determines the size of
00465  *      the result array.  If this size has been reached, the routine is done.
00466  *      If not, elements from the vector array are put into the result array
00467  *      until it is full.
00468  */
00469 
00470         if (rank == 1) {
00471             found = 0;
00472             iptr4 = (_f_mask *) mptr;
00473             switch (subtype) {
00474                 case DVSUBTYPE_BIT64 :
00475                     uptr1 = (_f_int8 *) sptr;
00476                     uptr2 = (_f_int8 *) vptr;
00477                     uptr3 = (_f_int8 *) rptr;
00478                     rindx = 0;
00479                     mindx = 0;
00480                     vindx = 0;
00481                     sindx = 0;
00482                     src_ext1 = source->dimension[0].extent;
00483                     for (i = 0; i < src_ext1; i++) {
00484                         if (LTOB(mask_el_len, &iptr4[mindx])) {
00485                             sindx = i * src_strd[0];
00486                             uptr3[rindx] = uptr1[sindx];
00487                             rindx += res_strd;
00488                             found++;
00489                         }
00490                         mindx += msk_strd[0];
00491                     }
00492                     if (!vector || found == nwords) {
00493                         result->dimension[0].extent = found;
00494                     } else {
00495                         vindx = found * vec_strd;
00496                         for ( ; found < nwords; found++) {
00497                             uptr3[rindx] = uptr2[vindx];
00498                             rindx += res_strd;
00499                             vindx += vec_strd;
00500                         }
00501                     }
00502                     break;
00503 
00504                 case DVSUBTYPE_BIT32 :
00505                     hptr1 = (_f_int4 *) sptr;
00506                     hptr2 = (_f_int4 *) vptr;
00507                     hptr3 = (_f_int4 *) rptr;
00508                     rindx = 0;
00509                     mindx = 0;
00510                     vindx = 0;
00511                     sindx = 0;
00512                     src_ext1 = source->dimension[0].extent;
00513                     for (i = 0; i < src_ext1; i++) {
00514                         if (LTOB(mask_el_len, &iptr4[mindx])) {
00515                             sindx = i * src_strd[0];
00516                             hptr3[rindx] = hptr1[sindx];
00517                             rindx += res_strd;
00518                             found++;
00519                         }
00520                         mindx += msk_strd[0];
00521                     }
00522                     if (!vector || found == nwords) {
00523                         result->dimension[0].extent = found;
00524                     } else {
00525                         vindx = found * vec_strd;
00526                         for ( ; found < nwords; found++) {
00527                             hptr3[rindx] = hptr2[vindx];
00528                             rindx += res_strd;
00529                             vindx += vec_strd;
00530                         }
00531                     }
00532                     break;
00533 
00534                 case DVSUBTYPE_BIT128 :
00535                     dptr1 = (_f_real16 *) sptr;
00536                     dptr2 = (_f_real16 *) vptr;
00537                     dptr3 = (_f_real16 *) rptr;
00538                     rindx = 0;
00539                     mindx = 0;
00540                     vindx = 0;
00541                     sindx = 0;
00542                     src_ext1 = source->dimension[0].extent;
00543                     for (i = 0; i < src_ext1; i++) {
00544                         if (LTOB(mask_el_len, &iptr4[mindx])) {
00545                             sindx = i * src_strd[0];
00546                             dptr3[rindx] = dptr1[sindx];
00547                             rindx += res_strd;
00548                             found++;
00549                         }
00550                         mindx += msk_strd[0];
00551                     }
00552                     if (!vector || found == nwords) {
00553                         result->dimension[0].extent = found;
00554                     } else {
00555                         vindx = found * vec_strd;
00556                         for ( ; found < nwords; found++) {
00557                             dptr3[rindx] = dptr2[vindx];
00558                             rindx += res_strd;
00559                             vindx += vec_strd;
00560                         }
00561                     }
00562                     break;
00563 
00564                 case DVSUBTYPE_CHAR :
00565                     rindx = 0;
00566                     mindx = 0;
00567                     vindx = 0;
00568                     sindx = 0;
00569                     src_ext1 = source->dimension[0].extent;
00570                     for (i = 0; i < src_ext1; i++) {
00571                         if (LTOB(mask_el_len, &iptr4[mindx])) {
00572                             cptr3 = (char *) cr + rindx;
00573                             cptr1 = (char *) cs + (i * src_strd[0]);
00574                             (void) memcpy (cptr3, cptr1, bucketsize);
00575                             rindx += res_strd;
00576                             found++;
00577                         }
00578                         mindx += msk_strd[0];
00579                     }
00580                     if (!vector || found == nwords) {
00581                         result->dimension[0].extent = found;
00582                     } else {
00583                         vindx = found * vec_strd;
00584                         for ( ; found < nwords; found++) {
00585                             cptr3 = (char *) cr + rindx;
00586                             cptr2 = (char *) cv + vindx;
00587                             (void) memcpy (cptr3, cptr2, bucketsize);
00588                             rindx += res_strd;
00589                             vindx += vec_strd;
00590                         }
00591                     }
00592                     break;
00593 
00594                 case DVSUBTYPE_DERIVED :
00595                     fptr1 = (_f_int *) sptr;
00596                     fptr2 = (_f_int *) vptr;
00597                     fptr3 = (_f_int *) rptr;
00598                     src_ext1 = source->dimension[0].extent;
00599                     indx1_res = 0;
00600 /*
00601  *      The derived word type is handled the same as the other types except
00602  *      that another loop is added.  The assumption was made that extent of
00603  *      the array would be larger than the number of words in the derived
00604  *      type.  Therefore, to try and make this routine optimal, the first
00605  *      loop uses the extent as its inner loop, which should provide better
00606  *      optimization.  The second loop is also done this way.
00607  */
00608                     for (i = 0; i < bucketsize; i++) {
00609                         rindx = i;
00610                         mindx = 0;
00611                         for (j = 0; j < src_ext1; j++) {
00612                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00613                                 sindx = i + (j * src_strd[0]);
00614                                 fptr3[rindx] = fptr1[sindx];
00615                                 rindx += res_strd;
00616                                 if (i == 0) {
00617                                     indx1_res = rindx;
00618                                     found++;
00619                                 }
00620                             }
00621                             mindx += msk_strd[0];
00622                         }
00623                     }
00624                     if (!vector || found == nwords) {
00625                         result->dimension[0].extent = found;
00626                     } else {
00627                         indx1_vec = found * vec_strd;
00628                         found = nwords - found;
00629                         for (i = 0; i < bucketsize; i++) {
00630                             rindx = indx1_res + i;
00631                             vindx = indx1_vec + i;
00632                             for (j = 0; j < found; j++) {
00633                                 fptr3[rindx] = fptr2[vindx];
00634                                 rindx += res_strd;
00635                                 vindx += vec_strd;
00636                             }
00637                         }
00638                     }
00639                     break;
00640 
00641 #ifdef _F_COMP16
00642                 case DVSUBTYPE_BIT256 :
00643                     xptr1 = (dblcmplx *) sptr;
00644                     xptr2 = (dblcmplx *) vptr;
00645                     xptr3 = (dblcmplx *) rptr;
00646                     rindx = 0;
00647                     mindx = 0;
00648                     vindx = 0;
00649                     sindx = 0;
00650                     src_ext1 = source->dimension[0].extent;
00651                     for (i = 0; i < src_ext1; i++) {
00652                         if (LTOB(mask_el_len, &iptr4[mindx])) {
00653                             sindx = i * src_strd[0];
00654                             xptr3[rindx].re = xptr1[sindx].re;
00655                             xptr3[rindx].im = xptr1[sindx].im;
00656                             rindx += res_strd;
00657                             found++;
00658                         }
00659                         mindx += msk_strd[0];
00660                     }
00661                     if (!vector || found == nwords) {
00662                         result->dimension[0].extent = found;
00663                     } else {
00664                         vindx = found * vec_strd;
00665                         for ( ; found < nwords; found++) {
00666                             xptr3[rindx].re = xptr2[vindx].re;
00667                             xptr3[rindx].im = xptr2[vindx].im;
00668                             rindx += res_strd;
00669                             vindx += vec_strd;
00670                         }
00671                     }
00672                     break;
00673 #endif
00674 
00675                 default :
00676                     _lerror (_LELVL_ABORT, FEINTDTY);
00677             }
00678         } else if (rank == 2) {
00679 
00680 /*
00681  *      Rank 2 matrices are handled in a manner similar to rank 1 arrays,
00682  *      except that the first loop in each data type is a nested loop, with
00683  *      the outer loop being the second dimension, and the inner loop being
00684  *      the first.  This preserves the storage order which is necessary for
00685  *      pack to work.  The second part of each block is not affected by the
00686  *      number of dimensions in the source matrix.
00687  */
00688 
00689             found = 0;
00690             iptr4 = (_f_mask *) mptr;
00691             switch (subtype) {
00692                 case DVSUBTYPE_BIT64 :
00693                     uptr1 = (_f_int8 *) sptr;
00694                     uptr2 = (_f_int8 *) vptr;
00695                     uptr3 = (_f_int8 *) rptr;
00696                     indx2_msk = 0;
00697                     indx2_src = 0;
00698                     rindx = 0;
00699                     src_ext1 = src_ext[0];
00700                     src_ext2 = src_ext[1];
00701                     for (i = 0; i < src_ext2; i++) {
00702                         indx1_msk = 0;
00703                         for (j = 0; j < src_ext1; j++) {
00704                             mindx = indx1_msk + indx2_msk;
00705                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00706                                 sindx = indx2_src + (j * src_strd[0]);
00707                                 uptr3[rindx] = uptr1[sindx];
00708                                 rindx += res_strd;
00709                                 found++;
00710                             }
00711                             indx1_msk += msk_strd[0];
00712                         }
00713                         indx2_msk += msk_strd[1];
00714                         indx2_src += src_strd[1];
00715                     }
00716                     if (!vector || found == nwords) {
00717                         result->dimension[0].extent = found;
00718                     } else {
00719                         vindx = found * vec_strd;
00720                         for ( ; found < nwords; found++) {
00721                             uptr3[rindx] = uptr2[vindx];
00722                             rindx += res_strd;
00723                             vindx += vec_strd;
00724                         }
00725                     }
00726                     break;
00727 
00728                 case DVSUBTYPE_BIT32 :
00729                     hptr1 = (_f_int4 *) sptr;
00730                     hptr2 = (_f_int4 *) vptr;
00731                     hptr3 = (_f_int4 *) rptr;
00732                     indx2_msk = 0;
00733                     indx2_src = 0;
00734                     rindx = 0;
00735                     src_ext1 = src_ext[0];
00736                     src_ext2 = src_ext[1];
00737                     for (i = 0; i < src_ext2; i++) {
00738                         indx1_msk = 0;
00739                         for (j = 0; j < src_ext1; j++) {
00740                             mindx = indx1_msk + indx2_msk;
00741                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00742                                 sindx = indx2_src + (j * src_strd[0]);
00743                                 hptr3[rindx] = hptr1[sindx];
00744                                 rindx += res_strd;
00745                                 found++;
00746                             }
00747                             indx1_msk += msk_strd[0];
00748                         }
00749                         indx2_msk += msk_strd[1];
00750                         indx2_src += src_strd[1];
00751                     }
00752                     if (!vector || found == nwords) {
00753                         result->dimension[0].extent = found;
00754                     } else {
00755                         vindx = found * vec_strd;
00756                         for ( ; found < nwords; found++) {
00757                             hptr3[rindx] = hptr2[vindx];
00758                             rindx += res_strd;
00759                             vindx += vec_strd;
00760                         }
00761                     }
00762                     break;
00763 
00764                 case DVSUBTYPE_BIT128 :
00765                     dptr1 = (_f_real16 *) sptr;
00766                     dptr2 = (_f_real16 *) vptr;
00767                     dptr3 = (_f_real16 *) rptr;
00768                     indx2_msk = 0;
00769                     indx2_src = 0;
00770                     rindx = 0;
00771                     src_ext1 = src_ext[0];
00772                     src_ext2 = src_ext[1];
00773                     for (i = 0; i < src_ext2; i++) {
00774                         indx1_msk = 0;
00775                         for (j = 0; j < src_ext1; j++) {
00776                             mindx = indx1_msk + indx2_msk;
00777                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00778                                 sindx = indx2_src + (j * src_strd[0]);
00779                                 dptr3[rindx] = dptr1[sindx];
00780                                 rindx += res_strd;
00781                                 found++;
00782                             }
00783                             indx1_msk += msk_strd[0];
00784                         }
00785                         indx2_msk += msk_strd[1];
00786                         indx2_src += src_strd[1];
00787                     }
00788                     if (!vector || found == nwords) {
00789                         result->dimension[0].extent = found;
00790                     } else {
00791                         vindx = found * vec_strd;
00792                         for ( ; found < nwords; found++) {
00793                             dptr3[rindx] = dptr2[vindx];
00794                             rindx += res_strd;
00795                             vindx += vec_strd;
00796                         }
00797                     }
00798                     break;
00799 
00800                 case DVSUBTYPE_CHAR :
00801                     indx2_msk = 0;
00802                     indx2_src = 0;
00803                     rindx = 0;
00804                     src_ext1 = src_ext[0];
00805                     src_ext2 = src_ext[1];
00806                     for (i = 0; i < src_ext2; i++) {
00807                         indx1_msk = 0;
00808                         for (j = 0; j < src_ext1; j++) {
00809                             mindx = indx1_msk + indx2_msk;
00810                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00811                                 sindx = indx2_src + (j * src_strd[0]);
00812                                 cptr1 = (char *) cs + sindx;
00813                                 cptr3 = (char *) cr + rindx;
00814                                 (void) memcpy (cptr3, cptr1, bucketsize);
00815                                 rindx += res_strd;
00816                                 found++;
00817                             }
00818                             indx1_msk += msk_strd[0];
00819                         }
00820                         indx2_msk += msk_strd[1];
00821                         indx2_src += src_strd[1];
00822                     }
00823                     if (!vector || found == nwords) {
00824                         result->dimension[0].extent = found;
00825                     } else {
00826                         vindx = found * vec_strd;
00827                         for ( ; found < nwords; found++) {
00828                             cptr2 = (char *) cv + vindx;
00829                             cptr3 = (char *) cr + rindx;
00830                             (void) memcpy (cptr3, cptr2, bucketsize);
00831                             rindx += res_strd;
00832                             vindx += vec_strd;
00833                         }
00834                     }
00835                     break;
00836 
00837                 case DVSUBTYPE_DERIVED :
00838                     fptr1 = (_f_int *) sptr;
00839                     fptr2 = (_f_int *) vptr;
00840                     fptr3 = (_f_int *) rptr;
00841                     src_ext1 = src_ext[0];
00842                     src_ext2 = src_ext[1];
00843                     for (i = 0; i < bucketsize; i++) {
00844                         indx2_msk = 0;
00845                         indx2_src = 0;
00846                         rindx = i;
00847                         for (j = 0; j < src_ext2; j++) {
00848                             indx1_msk = 0;
00849                             for (k = 0; k < src_ext1; k++) {
00850                                 mindx = indx1_msk + indx2_msk;
00851                                 if (LTOB(mask_el_len, &iptr4[mindx])) {
00852                                     sindx = indx2_src + i + (k * src_strd[0]);
00853                                     fptr3[rindx] = fptr1[sindx];
00854                                     rindx += res_strd;
00855                                     if (i == 0)
00856                                         found++;
00857                                 }
00858                                 indx1_msk += msk_strd[0];
00859                             }
00860                             indx2_msk += msk_strd[1];
00861                             indx2_src += src_strd[1];
00862                         }
00863                     }
00864                     if (!vector || found == nwords) {
00865                         result->dimension[0].extent = found;
00866                     } else {
00867                         indx1_res = found * res_strd;
00868                         indx1_vec = found * vec_strd;
00869                         found = nwords - found;
00870                         for (i = 0; i < bucketsize; i++) {
00871                             rindx = indx1_res + i;
00872                             vindx = indx1_vec + i;
00873                             for (j = 0; j < found; j++) {
00874                                 fptr3[rindx] = fptr2[vindx];
00875                                 rindx += res_strd;
00876                                 vindx += vec_strd;
00877                             }
00878                         }
00879                     }
00880                     break;
00881 
00882 #ifdef _F_COMP16
00883                 case DVSUBTYPE_BIT256 :
00884                     xptr1 = (dblcmplx *) sptr;
00885                     xptr2 = (dblcmplx *) vptr;
00886                     xptr3 = (dblcmplx *) rptr;
00887                     indx2_msk = 0;
00888                     indx2_src = 0;
00889                     rindx = 0;
00890                     src_ext1 = src_ext[0];
00891                     src_ext2 = src_ext[1];
00892                     for (i = 0; i < src_ext2; i++) {
00893                         indx1_msk = 0;
00894                         for (j = 0; j < src_ext1; j++) {
00895                             mindx = indx1_msk + indx2_msk;
00896                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00897                                 sindx = indx2_src + (j * src_strd[0]);
00898                                 xptr3[rindx].re = xptr1[sindx].re;
00899                                 xptr3[rindx].im = xptr1[sindx].im;
00900                                 rindx += res_strd;
00901                                 found++;
00902                             }
00903                             indx1_msk += msk_strd[0];
00904                         }
00905                         indx2_msk += msk_strd[1];
00906                         indx2_src += src_strd[1];
00907                     }
00908                     if (!vector || found == nwords) {
00909                         result->dimension[0].extent = found;
00910                     } else {
00911                         vindx = found * vec_strd;
00912                         for ( ; found < nwords; found++) {
00913                             xptr3[rindx].re = xptr2[vindx].re;
00914                             xptr3[rindx].im = xptr2[vindx].im;
00915                             rindx += res_strd;
00916                             vindx += vec_strd;
00917                         }
00918                     }
00919                     break;
00920 #endif
00921 
00922                 default :
00923                     _lerror (_LELVL_ABORT, FEINTDTY);
00924             }
00925         } else {                        /* rank 3-7 */
00926 
00927 /*
00928  *      Ranks 3 through 7 are all handled in this last block.  It was assumed
00929  *      that ranks 1 and 2 would account for the majority of calls to pack,
00930  *      and that the remaining ranks could be done in one block.
00931  *
00932  *      The logic behind these blocks is the same as for the other ranks.
00933  *      The first part of the routine uses two nested loops, with the inner
00934  *      loop being the first dimension, and the outer loop being the product
00935  *      of all of the remaining dimensions.  A array of counters keeps track
00936  *      of the values for each of the dimensions.  Two macros are used in
00937  *      this block.  INCREMENT are used to calculate the values of each of
00938  *      the dimension counters, and to calculate the offsets into the array
00939  *      for each index.  FIND_INDX sums these offsets into one offset, which
00940  *      is used for each iteration of the inner loop.  As with the other two
00941  *      blocks, the second part of each section is not affected by the number
00942  *      of dimensions in the source matrix.
00943  *
00944  *      Calculate the product of each of the dimensions 2-n.  This is the
00945  *      number of times the outer loop will be executed.  Also, initialize
00946  *      the offset and dimension counter arrays.
00947  */
00948             total_ext = 1;
00949 #ifdef _UNICOS
00950 #pragma _CRI    shortloop
00951 #endif
00952             for (i = 0; i < MAXDIM; i++) {
00953                 curdim[i] = 0;
00954                 msk_off[i] = 0;
00955                 src_off[i] = 0;
00956             }
00957 
00958 #ifdef _UNICOS
00959 #pragma _CRI    shortloop
00960 #endif
00961             for (i = 1; i < rank; i++)
00962                 total_ext *= source->dimension[i].extent;
00963             iptr4 = (_f_mask *) mptr;
00964             found = 0;
00965 
00966             switch (subtype) {
00967                 case DVSUBTYPE_BIT64 :
00968                     uptr2 = (_f_int8 *) vptr;
00969                     uptr3 = (_f_int8 *) rptr;
00970                     rindx = 0;
00971                     for (i = 0; i < total_ext; i++) {
00972                         FIND_INDX();
00973                         uptr1 = (_f_int8 *) sptr + indx1_src;
00974                         iptr4 = (_f_mask *) mptr + indx1_msk;
00975                         for (j = 0; j < src_ext[0]; j++) {
00976                             mindx = j * msk_strd[0];
00977                             if (LTOB(mask_el_len, &iptr4[mindx])) {
00978                                 sindx = j * src_strd[0];
00979                                 uptr3[rindx] = uptr1[sindx];
00980                                 rindx += res_strd;
00981                                 found++;
00982                             }
00983                         }
00984                         INCREMENT();
00985                     }
00986                     if (!vector || found == nwords) {
00987                         result->dimension[0].extent = found;
00988                     } else {
00989                         vindx = found * vec_strd;
00990                         for ( ; found < nwords; found++) {
00991                             uptr3[rindx] = uptr2[vindx];
00992                             rindx += res_strd;
00993                             vindx += vec_strd;
00994                         }
00995                     }
00996                     break;
00997 
00998                 case DVSUBTYPE_BIT32 :
00999                     hptr2 = (_f_int4 *) vptr;
01000                     hptr3 = (_f_int4 *) rptr;
01001                     rindx = 0;
01002                     for (i = 0; i < total_ext; i++) {
01003                         FIND_INDX();
01004                         hptr1 = (_f_int4 *) sptr + indx1_src;
01005                         iptr4 = (_f_mask *) mptr + indx1_msk;
01006                         for (j = 0; j < src_ext[0]; j++) {
01007                             mindx = j * msk_strd[0];
01008                             if (LTOB(mask_el_len, &iptr4[mindx])) {
01009                                 sindx = j * src_strd[0];
01010                                 hptr3[rindx] = hptr1[sindx];
01011                                 rindx += res_strd;
01012                                 found++;
01013                             }
01014                         }
01015                         INCREMENT();
01016                     }
01017                     if (!vector || found == nwords) {
01018                         result->dimension[0].extent = found;
01019                     } else {
01020                         vindx = found * vec_strd;
01021                         for ( ; found < nwords; found++) {
01022                             hptr3[rindx] = hptr2[vindx];
01023                             rindx += res_strd;
01024                             vindx += vec_strd;
01025                         }
01026                     }
01027                     break;
01028 
01029                 case DVSUBTYPE_BIT128 :
01030                     dptr2 = (_f_real16 *) vptr;
01031                     dptr3 = (_f_real16 *) rptr;
01032                     rindx = 0;
01033                     for (i = 0; i < total_ext; i++) {
01034                         FIND_INDX();
01035                         dptr1 = (_f_real16 *) sptr + indx1_src;
01036                         iptr4 = (_f_mask *) mptr + indx1_msk;
01037                         for (j = 0; j < src_ext[0]; j++) {
01038                             mindx = j * msk_strd[0];
01039                             if (LTOB(mask_el_len, &iptr4[mindx])) {
01040                                 sindx = j * src_strd[0];
01041                                 dptr3[rindx] = dptr1[sindx];
01042                                 rindx += res_strd;
01043                                 found++;
01044                             }
01045                         }
01046                         INCREMENT();
01047                     }
01048                     if (!vector || found == nwords) {
01049                         result->dimension[0].extent = found;
01050                     } else {
01051                         vindx = found * vec_strd;
01052                         for ( ; found < nwords; found++) {
01053                             dptr3[rindx] = dptr2[vindx];
01054                             rindx += res_strd;
01055                             vindx += vec_strd;
01056                         }
01057                     }
01058                     break;
01059 
01060                 case DVSUBTYPE_CHAR :
01061                     cptr2 = (char *) vptr;
01062                     cptr3 = (char *) rptr;
01063                     rindx = 0;
01064                     for (i = 0; i < total_ext; i++) {
01065                         FIND_INDX();
01066                         iptr4 = (_f_mask *) mptr + indx1_msk;
01067                         for (j = 0; j < src_ext[0]; j++) {
01068                             mindx = j * msk_strd[0];
01069                             if (LTOB(mask_el_len, &iptr4[mindx])) {
01070                                 sindx = indx1_src + (j * src_strd[0]);
01071                                 cptr1 = (char *) cs + sindx;
01072                                 cptr3 = (char *) cr + rindx;
01073                                 (void) memcpy (cptr3, cptr1, bucketsize);
01074                                 rindx += res_strd;
01075                                 found++;
01076                             }
01077                         }
01078                         INCREMENT();
01079                     }
01080                     if (!vector || found == nwords) {
01081                         result->dimension[0].extent = found;
01082                     } else {
01083                         vindx = found * vec_strd;
01084                         for ( ; found < nwords; found++) {
01085                             cptr3 = (char *) cr + rindx;
01086                             cptr2 = (char *) cv + vindx;
01087                             (void) memcpy (cptr3, cptr2, bucketsize);
01088                             rindx += res_strd;
01089                             vindx += vec_strd;
01090                         }
01091                     }
01092                     break;
01093 
01094                 case DVSUBTYPE_DERIVED :
01095                     fptr2 = (_f_int *) vptr;
01096                     fptr3 = (_f_int *) rptr;
01097                     for (i = 0; i < bucketsize; i++) {
01098                         rindx = i;
01099                         for (j = 0; j < rank; j++) {
01100                             msk_off[j] = 0;
01101                             src_off[j] = 0;
01102                             curdim[j] = 0;
01103                         }
01104                         for (j = 0; j < total_ext; j++) {
01105                             FIND_INDX();
01106                             fptr1 = (_f_int *) sptr + i + indx1_src;
01107                             iptr4 = (_f_mask *) mptr + indx1_msk;
01108                             for (k = 0; k < src_ext[0]; k++) {
01109                                 mindx = k * msk_strd[0];
01110                                 if (LTOB(mask_el_len, &iptr4[mindx])) {
01111                                     sindx = k * src_strd[0];
01112                                     fptr3[rindx] = fptr1[sindx];
01113                                     rindx += res_strd;
01114                                     if (i == 0)
01115                                         found++;
01116                                 }
01117                             }
01118                             INCREMENT();
01119                         }
01120                     }
01121                     if (!vector || found == nwords) {
01122                         result->dimension[0].extent = found;
01123                     } else {
01124                         indx1_res = found * res_strd;
01125                         indx1_vec = found * vec_strd;
01126                         found = nwords - found;
01127                         for (i = 0; i < bucketsize; i++) {
01128                             rindx = indx1_res + i;
01129                             vindx = indx1_vec + i;
01130                             for (j = 0; j < found; j++) {
01131                                 fptr3[rindx] = fptr2[vindx];
01132                                 rindx += res_strd;
01133                                 vindx += vec_strd;
01134                             }
01135                         }
01136                     }
01137                     break;
01138 
01139 #ifdef _F_COMP16
01140                 case DVSUBTYPE_BIT256 :
01141                     xptr2 = (dblcmplx *) vptr;
01142                     xptr3 = (dblcmplx *) rptr;
01143                     rindx = 0;
01144                     for (i = 0; i < total_ext; i++) {
01145                         FIND_INDX();
01146                         xptr1 = (dblcmplx *) sptr + indx1_src;
01147                         iptr4 = (_f_mask *) mptr + indx1_msk;
01148                         for (j = 0; j < src_ext[0]; j++) {
01149                             mindx = j * msk_strd[0];
01150                             if (LTOB(mask_el_len, &iptr4[mindx])) {
01151                                 sindx = j * src_strd[0];
01152                                 xptr3[rindx].re = xptr1[sindx].re;
01153                                 xptr3[rindx].im = xptr1[sindx].im;
01154                                 rindx += res_strd;
01155                                 found++;
01156                             }
01157                         }
01158                         INCREMENT();
01159                     }
01160                     if (!vector || found == nwords) {
01161                         result->dimension[0].extent = found;
01162                     } else {
01163                         vindx = found * vec_strd;
01164                         for ( ; found < nwords; found++) {
01165                             xptr3[rindx].re = xptr2[vindx].re;
01166                             xptr3[rindx].im = xptr2[vindx].im;
01167                             rindx += res_strd;
01168                             vindx += vec_strd;
01169                         }
01170                     }
01171                     if (!vector || found == nwords) {
01172                         result->dimension[0].extent = found;
01173                     } else {
01174                         vindx = found * vec_strd;
01175                         for ( ; found < nwords; found++) {
01176                             xptr3[rindx].re = xptr2[vindx].re;
01177                             xptr3[rindx].im = xptr2[vindx].im;
01178                             rindx += res_strd;
01179                             vindx += vec_strd;
01180                         }
01181                     }
01182                     break;
01183 #endif
01184 
01185                 default :
01186                     _lerror (_LELVL_ABORT, FEINTDTY);
01187             }
01188         }
01189 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines