OpenAD/Open64Dox/spread_8c_source.html

00001 /*
00002
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License
00007   as published by the Free Software Foundation.
00008
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement
00015   or the like.  Any license provided herein, whether implied or
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with
00018   other software, or any other product whatsoever.
00019
00020   You should have received a copy of the GNU Lesser General Public
00021   License along with this program; if not, write the Free Software
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307,
00023   USA.
00024
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027
00028   http://www.sgi.com
00029
00030   For further information regarding this notice, see:
00031
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033
00034 */
00035
00036
00037 static const char USMID[] = "@(#) libfi/array/spread.c  92.0    10/08/98 14:37:14";
00038
00039 #include <stddef.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "arraydefs.h"
00044
00045 /*
00046  *      Replicates an array by adding a dimension.  Broadcasts several
00047  *      copies of SOURCE along a specified dimension and thus forms an
00048  *      array of rank one greater.
00049  *
00050  *      Calculate the offsets for each index of the source and result
00051  *      matrices.  The index values are maintained in an array named
00052  *      curdim.  The total offset for the source array is stored in
00053  *      variable sindx, and the offset for the non-added dimension
00054  *      indices is stored in res_tmp2;
00055  */
00056
00057 #define INCREMENT()                                                     \
00058         curdim[0]++;                                                    \
00059         if (curdim[0] < src_ext[0]) {                                   \
00060             src_indx[0] += src_strd[0];                                 \
00061             sindx += src_strd[0];                                       \
00062             res_indx[0] += res_strd[0];                                 \
00063             res_tmp2 += res_strd[0];                                    \
00064         } else {                                                        \
00065             sindx -= src_indx[0];                                       \
00066             res_tmp2 -= res_indx[0];                                    \
00067             curdim[0] = 0;                                              \
00068             src_indx[0] = 0;                                            \
00069             res_indx[0] = 0;                                            \
00070             curdim[1]++;                                                \
00071             if (curdim[1] < src_ext[1]) {                               \
00072                 src_indx[1] += src_strd[1];                             \
00073                 sindx += src_strd[1];                                   \
00074                 res_indx[1] += res_strd[1];                             \
00075                 res_tmp2 += res_strd[1];                                \
00076             } else {                                                    \
00077                 sindx -= src_indx[1];                                   \
00078                 res_tmp2 -= res_indx[1];                                \
00079                 curdim[1] = 0;                                          \
00080                 src_indx[1] = 0;                                        \
00081                 res_indx[1] = 0;                                        \
00082                 curdim[2]++;                                            \
00083                 if (curdim[2] < src_ext[2]) {                           \
00084                     src_indx[2] += src_strd[2];                         \
00085                     sindx += src_strd[2];                               \
00086                     res_indx[2] += res_strd[2];                         \
00087                     res_tmp2 += res_strd[2];                            \
00088                 } else {                                                \
00089                     sindx -= src_indx[2];                               \
00090                     res_tmp2 -= res_indx[2];                            \
00091                     curdim[2] = 0;                                      \
00092                     src_indx[2] = 0;                                    \
00093                     res_indx[2] = 0;                                    \
00094                     curdim[3]++;                                        \
00095                     if (curdim[3] < src_ext[3]) {                       \
00096                         src_indx[3] += src_strd[3];                     \
00097                         sindx += src_strd[3];                           \
00098                         res_indx[3] += res_strd[3];                     \
00099                         res_tmp2 += res_strd[3];                        \
00100                     } else {                                            \
00101                         sindx -= src_indx[3];                           \
00102                         res_tmp2 -= res_indx[3];                        \
00103                         curdim[3] = 0;                                  \
00104                         src_indx[3] = 0;                                \
00105                         res_indx[3] = 0;                                \
00106                         curdim[4]++;                                    \
00107                         if (curdim[4] < src_ext[4]) {                   \
00108                             src_indx[4] += src_strd[4];                 \
00109                             sindx += src_strd[4];                       \
00110                             res_indx[4] += res_strd[4];                 \
00111                             res_tmp2 += res_strd[4];                    \
00112                         } else {                                        \
00113                             sindx -= src_indx[4];                       \
00114                             res_tmp2 -= res_indx[4];                    \
00115                             curdim[4] = 0;                              \
00116                             src_indx[4] = 0;                            \
00117                             res_indx[4] = 0;                            \
00118                             curdim[5]++;                                \
00119                             src_indx[5] += src_strd[5];                 \
00120                             sindx += src_strd[5];                       \
00121                             res_indx[5] += res_strd[5];                 \
00122                             res_tmp2 += res_strd[5];                    \
00123                         }                                               \
00124                     }                                                   \
00125                 }                                                       \
00126             }                                                           \
00127         }
00128
00129 #ifdef _UNICOS
00130 #pragma _CRI duplicate _SPREAD as SPREAD@
00131 #endif
00132 void
00133 _SPREAD (       DopeVectorType * result,
00134                 DopeVectorType * source,
00135                 _f_int          *dimp,
00136                 _f_int          *ncopiesp)
00137 {
00138         char    *cs;            /* char ptr to source array     */
00139         char    *cr;            /* char ptr to result array     */
00140         char            * restrict cptr1;       /* char         */
00141         char            * restrict cptr2;       /* char         */
00142         _f_int8         * restrict uptr1;       /* full word    */
00143         _f_int8         * restrict uptr2;       /* full word    */
00144         _f_int          * restrict fptr1;       /* full word    */
00145         _f_int          * restrict fptr2;       /* full word    */
00146         _f_real16       * restrict dptr1;       /* double word  */
00147         _f_real16       * restrict dptr2;       /* double word  */
00148 #ifdef _F_COMP16
00149         dblcmplx        * restrict xptr1;       /* quad word    */
00150         dblcmplx        * restrict xptr2;       /* quad word    */
00151 #endif
00152         _f_int4         * restrict hptr1;       /* half word    */
00153         _f_int4         * restrict hptr2;       /* half word    */
00154         void            * restrict sptr;        /* ptr to src   */
00155         void            * restrict rptr;        /* ptr to res   */
00156         _f_int  dim;            /* dimension value              */
00157         long    ncopies;        /* ncopies value                */
00158         _f_int  rank;           /* rank of source matrix        */
00159         _f_int  type;           /* type of source matrix        */
00160         _f_int  subtype;        /* sub-type                     */
00161         _f_int  arithmetic;     /* arithmetic data type         */
00162         long    nbytes;         /* # bytes in data area         */
00163         long    sindx;          /* source index                 */
00164         long    sindx2;         /* source index + 1             */
00165         long    rindx;          /* result index                 */
00166         _f_int  bytealligned;   /* byte aligned flag            */
00167         _f_int  bucketsize;     /* element size                 */
00168         long    src_strd[MAXDIM];       /* index stride         */
00169         long    src_ext[MAXDIM];        /* extents              */
00170         long    src_indx[MAXDIM];       /* index counters       */
00171         long    res_strd[MAXDIM];       /* index stride         */
00172         long    res_ext[MAXDIM];        /* extents              */
00173         long    res_indx[MAXDIM];       /* index counters       */
00174         long    curdim[MAXDIM];         /* current dimension    */
00175         long    tot_ext;                /* total extent counter */
00176         long    src_tot_ext;            /* total source extent  */
00177         _f_int  ndim;                   /* non-dim dimension    */
00178         long    res_tmp1;               /* temp value           */
00179         long    res_tmp2;               /* temp value           */
00180         long    res_dim_strd;           /* stride for dim index */
00181         long    cnt;                    /* counter              */
00182         long    i, j, k;                /* index variables      */
00183         _f_int  early_exit;             /* early exit flag      */
00184
00185 /*      Set type and dimension global variables */
00186
00187         rank = source->n_dim;
00188         type = source->type_lens.type;
00189
00190 /*      See if we can use the shortcut exit     */
00191
00192         early_exit = 0;
00193         for (i = 0; i < rank; i++) {
00194             if (source->dimension[i].extent == 0)
00195                 early_exit = 1;
00196         }
00197         if (result->assoc) {
00198             for (i = 0; i < result->n_dim; i++) {
00199                 if (result->dimension[i].extent == 0)
00200                     early_exit = 1;
00201             }
00202         }
00203
00204 /*
00205  *      Initialize every array element to 0.
00206  */
00207
00208 #ifdef _UNICOS
00209 #pragma _CRI    shortloop
00210 #endif
00211         for (i = 0; i < MAXDIM; i++) {
00212             curdim[i] = 0;
00213             src_strd[i] = 0;
00214             src_ext[i] = 0;
00215             src_indx[i] = 0;
00216             res_strd[i] = 0;
00217             res_ext[i] = 0;
00218             res_indx[i] = 0;
00219         }
00220
00221 /*      Set up scalars for dim and ncopies      */
00222
00223         if (*dimp < 1 || *dimp > rank+2)
00224             _lerror (_LELVL_ABORT, FESCIDIM);
00225         dim = *dimp - 1;
00226         if (*ncopiesp > 0)
00227             ncopies = *ncopiesp;
00228         else
00229             ncopies = 0;
00230
00231 /*      Size calculation is based on variable type      */
00232
00233         switch (type) {
00234             case DVTYPE_ASCII :
00235                 bytealligned = 1;
00236                 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */
00237                 subtype = DVSUBTYPE_CHAR;
00238                 arithmetic = 0;
00239                 break;
00240             case DVTYPE_DERIVEDBYTE :
00241                 bytealligned = 1;
00242                 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE;
00243                 subtype = DVSUBTYPE_CHAR;
00244                 arithmetic = 0;
00245                 break;
00246             case DVTYPE_DERIVEDWORD :
00247                 bytealligned = 0;
00248                 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD;
00249                 subtype = DVSUBTYPE_DERIVED;
00250                 arithmetic = 0;
00251                 break;
00252             default :
00253                 bytealligned = 0;
00254                 bucketsize = source->type_lens.int_len / BITS_PER_WORD;
00255                 if (source->type_lens.int_len == 64) {
00256                     subtype = DVSUBTYPE_BIT64;
00257                 } else if (source->type_lens.int_len == 32) {
00258                     subtype = DVSUBTYPE_BIT32;
00259                     bucketsize = 1;
00260                 } else if (source->type_lens.int_len == 256) {
00261                     subtype = DVSUBTYPE_BIT256;
00262                 } else {
00263                     subtype = DVSUBTYPE_BIT128;
00264                 }
00265                 arithmetic = 1;
00266         }
00267
00268 /*      If necessary, fill result dope vector   */
00269         if (!result->assoc) {
00270             result->base_addr.a.ptr = (void *) NULL;
00271             result->orig_base      = 0;
00272             result->orig_size      = 0;
00273
00274             tot_ext = 1;
00275 #ifdef _UNICOS
00276 #pragma _CRI novector
00277 #endif
00278             for (i = 0; i < dim; i++) {
00279                 result->dimension[i].extent = source->dimension[i].extent;
00280                 result->dimension[i].stride_mult = tot_ext * bucketsize;
00281                 result->dimension[i].low_bound = 1;
00282                 tot_ext *= source->dimension[i].extent;
00283             }
00284             result->dimension[i].extent = ncopies;
00285             result->dimension[i].stride_mult = tot_ext * bucketsize;
00286             result->dimension[i].low_bound = 1;
00287             tot_ext *= ncopies;
00288             i++;
00289 #ifdef _UNICOS
00290 #pragma _CRI novector
00291 #endif
00292             for ( ; i < rank+1; i++) {
00293                 result->dimension[i].extent = source->dimension[i-1].extent;
00294                 result->dimension[i].stride_mult = tot_ext * bucketsize;
00295                 result->dimension[i].low_bound = 1;
00296                 tot_ext *= source->dimension[i-1].extent;
00297             }
00298
00299             if (!bytealligned) {
00300                 nbytes = bucketsize * BYTES_PER_WORD;
00301 #ifdef _CRAYMPP
00302                 if (subtype == DVSUBTYPE_BIT32)
00303                     nbytes /= 2;
00304 #endif
00305             } else {
00306                 nbytes = bucketsize;
00307             }
00308             for (i = 0; i < result->n_dim; i++)
00309                 nbytes *= result->dimension[i].extent;
00310             if (ncopies > 0 && nbytes > 0) {
00311                 result->base_addr.a.ptr = (void *) malloc (nbytes);
00312                 if (result->base_addr.a.ptr == NULL)
00313                     _lerror (_LELVL_ABORT, FENOMEMY);
00314             } else {
00315                 result->base_addr.a.ptr = NULL;
00316             }
00317
00318             result->assoc = 1;
00319             result->base_addr.a.el_len = source->base_addr.a.el_len;
00320             if (type == DVTYPE_ASCII) {
00321                 result->base_addr.a.ptr = (void *) result->base_addr.a.ptr;
00322                 cr = (char *) result->base_addr.a.ptr;
00323                 result->base_addr.charptr = _cptofcd (cr, bucketsize);
00324             }
00325             result->orig_base = (void *) result->base_addr.a.ptr;
00326             result->orig_size = nbytes * BITS_PER_BYTE;
00327         }
00328
00329 /*      If ncopies is 0 or early exit conditions met, return    */
00330
00331         if (ncopies == 0 || early_exit == 1)
00332             return;
00333
00334 /*      Set up scalar pointers to the argument data areas       */
00335
00336         if (!bytealligned) {
00337             sptr = (void *) source->base_addr.a.ptr;
00338             rptr = (void *) result->base_addr.a.ptr;
00339         } else {
00340             if (type == DVTYPE_ASCII) {
00341                 cs = _fcdtocp (source->base_addr.charptr);
00342                 cr = _fcdtocp (result->base_addr.charptr);
00343             } else {
00344                 cs = (char *) source->base_addr.a.ptr;
00345                 cr = (char *) result->base_addr.a.ptr;
00346             }
00347         }
00348
00349 /*
00350  *      This program is divided up into three sections.  The first handles
00351  *      scalars being expanded into 1-dimensional arrays.  The second treats
00352  *      arrays being spread into 2-dimensional matrices.  The last section
00353  *      deals with all other cases.  Inside each section, the data types
00354  *      are broken down into groups based on their bit size.  All 64-bit
00355  *      entities are handled as the same data type, as are all 128-bit, and
00356  *      where applicable, all 32-bit and all 256-bit data types are treated
00357  *      together.  Character and derived types are also handled separately.
00358  */
00359
00360         if (rank == 0) {
00361             if (bucketsize > 1 && arithmetic) {
00362                 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00363             } else {
00364                 res_strd[0] = result->dimension[0].stride_mult;
00365             }
00366             switch (subtype) {
00367                 case DVSUBTYPE_BIT64 :
00368                     uptr1 = (_f_int8 *) sptr;
00369                     uptr2 = (_f_int8 *) rptr;
00370                     for (i = 0; i < ncopies; i++) {
00371                         rindx = i * res_strd[0];
00372                         uptr2[rindx] = uptr1[0];
00373                     }
00374                     break;
00375
00376                 case DVSUBTYPE_BIT32 :
00377                     hptr1 = (_f_int4 *) sptr;
00378                     hptr2 = (_f_int4 *) rptr;
00379                     for (i = 0; i < ncopies; i++) {
00380                         rindx = i * res_strd[0];
00381                         hptr2[rindx] = hptr1[0];
00382                     }
00383                     break;
00384
00385                 case DVSUBTYPE_BIT128 :
00386                     dptr1 = (_f_real16 *) sptr;
00387                     dptr2 = (_f_real16 *) rptr;
00388                     for (i = 0; i < ncopies; i++) {
00389                         rindx = i * res_strd[0];
00390                         dptr2[rindx] = dptr1[0];
00391                     }
00392                     break;
00393
00394                 case DVSUBTYPE_CHAR :
00395                     cptr1 = (char *) cs;
00396                     for (i = 0; i < ncopies; i++) {
00397                         cptr2 = (char *) cr + (i * res_strd[0]);
00398                         (void) memcpy (cptr2, cptr1, bucketsize);
00399                     }
00400                 break;
00401
00402                 case DVSUBTYPE_DERIVED :
00403                     for (i = 0; i < bucketsize; i++) {
00404                         fptr1 = (_f_int *) sptr + i;
00405                         fptr2 = (_f_int *) rptr + i;
00406                         for (j = 0; j < ncopies; j++) {
00407                             rindx = j * res_strd[0];
00408                             fptr2[rindx] = fptr1[0];
00409                         }
00410                     }
00411                     break;
00412
00413 #ifdef _F_COMP16
00414                 case DVSUBTYPE_BIT256 :
00415                     xptr1 = (dblcmplx *) sptr;
00416                     xptr2 = (dblcmplx *) rptr;
00417                     for (i = 0; i < ncopies; i++) {
00418                         rindx = i * res_strd[0];
00419                         xptr2[rindx].re = xptr1[0].re;
00420                         xptr2[rindx].im = xptr1[0].im;
00421                     }
00422                     break;
00423 #endif
00424
00425                 default :
00426                     _lerror (_LELVL_ABORT, FEINTDTY);
00427             }
00428         } else if (rank == 1) {
00429             src_ext[0] = source->dimension[0].extent;
00430             if (bucketsize > 1 && arithmetic) {
00431                 src_strd[0] = source->dimension[0].stride_mult / bucketsize;
00432                 res_strd[0] = result->dimension[0].stride_mult / bucketsize;
00433                 res_strd[1] = result->dimension[1].stride_mult / bucketsize;
00434             } else {
00435                 src_strd[0] = source->dimension[0].stride_mult;
00436                 res_strd[0] = result->dimension[0].stride_mult;
00437                 res_strd[1] = result->dimension[1].stride_mult;
00438             }
00439
00440             if (dim == 0)
00441                 ndim = 1;
00442             else
00443                 ndim = 0;
00444
00445             switch (subtype) {
00446                 case DVSUBTYPE_BIT64 :
00447                     uptr1 = (_f_int8 *) sptr;
00448                     uptr2 = (_f_int8 *) rptr;
00449                     for (i = 0; i < src_ext[0]; i++) {
00450                         res_tmp1 = i * res_strd[ndim];
00451                         sindx = i * src_strd[0];
00452                         for (j = 0; j < ncopies; j++) {
00453                             rindx = res_tmp1 + (j * res_strd[dim]);
00454                             uptr2[rindx] = uptr1[sindx];
00455                         }
00456                     }
00457                     break;
00458
00459                 case DVSUBTYPE_BIT32 :
00460                     hptr1 = (_f_int4 *) sptr;
00461                     hptr2 = (_f_int4 *) rptr;
00462                     for (i = 0; i < src_ext[0]; i++) {
00463                         res_tmp1 = i * res_strd[ndim];
00464                         sindx = i * src_strd[0];
00465                         for (j = 0; j < ncopies; j++) {
00466                             rindx = res_tmp1 + (j * res_strd[dim]);
00467                             hptr2[rindx] = hptr1[sindx];
00468                         }
00469                     }
00470                     break;
00471
00472                 case DVSUBTYPE_BIT128 :
00473                     dptr1 = (_f_real16 *) sptr;
00474                     dptr2 = (_f_real16 *) rptr;
00475                     for (i = 0; i < src_ext[0]; i++) {
00476                         res_tmp1 = i * res_strd[ndim];
00477                         sindx = i * src_strd[0];
00478                         for (j = 0; j < ncopies; j++) {
00479                             rindx = res_tmp1 + (j * res_strd[dim]);
00480                             dptr2[rindx] = dptr1[sindx];
00481                         }
00482                     }
00483                     break;
00484
00485                 case DVSUBTYPE_CHAR :
00486                     for (i = 0; i < src_ext[0]; i++) {
00487                         res_tmp1 = i * res_strd[ndim];
00488                         sindx = i * src_strd[0];
00489                         cptr1 = (char *) cs + sindx;
00490                         for (j = 0; j < ncopies; j++) {
00491                             rindx = res_tmp1 + (j * res_strd[dim]);
00492                             cptr2 = (char *) cr + rindx;
00493                             (void) memcpy (cptr2, cptr1, bucketsize);
00494                         }
00495                     }
00496                     break;
00497
00498                 case DVSUBTYPE_DERIVED :
00499                     for (i = 0; i < bucketsize; i++) {
00500                         fptr1 = (_f_int *) sptr + i;
00501                         fptr2 = (_f_int *) rptr + i;
00502                         for (j = 0; j < src_ext[0]; j++) {
00503                             res_tmp1 = j * res_strd[ndim];
00504                             sindx = j * src_strd[0];
00505                             for (k = 0; k < ncopies; k++) {
00506                                 rindx = res_tmp1 + (k * res_strd[dim]);
00507                                 fptr2[rindx] = fptr1[sindx];
00508                             }
00509                         }
00510                     }
00511                     break;
00512
00513 #ifdef _F_COMP16
00514                 case DVSUBTYPE_BIT256 :
00515                     xptr1 = (dblcmplx *) sptr;
00516                     xptr2 = (dblcmplx *) rptr;
00517                     for (i = 0; i < src_ext[0]; i++) {
00518                         res_tmp1 = i * res_strd[ndim];
00519                         sindx = i * src_strd[0];
00520                         for (j = 0; j < ncopies; j++) {
00521                             rindx = res_tmp1 + (j * res_strd[dim]);
00522                             xptr2[rindx].re = xptr1[sindx].re;
00523                             xptr2[rindx].im = xptr1[sindx].im;
00524                         }
00525                     }
00526                     break;
00527 #endif
00528
00529                 default :
00530                     _lerror (_LELVL_ABORT, FEINTDTY);
00531             }
00532         } else {
00533             src_tot_ext = 1;
00534 #ifdef _UNICOS
00535 #pragma _CRI    shortloop
00536 #endif
00537             for (i = 0; i < rank; i++) {
00538                 if (bucketsize > 1 && arithmetic) {
00539                     src_strd[i] = source->dimension[i].stride_mult / bucketsize;
00540                 } else {
00541                     src_strd[i] = source->dimension[i].stride_mult;
00542                 }
00543                 src_ext[i] = source->dimension[i].extent;
00544                 src_tot_ext *= src_ext[i];
00545                 src_indx[i] = 0;
00546                 curdim[i] = 0;
00547             }
00548
00549             cnt = 0;
00550             tot_ext = 1;
00551 #ifdef _UNICOS
00552 #pragma _CRI    shortloop
00553 #endif
00554             for (i = 0; i <= rank; i++) {
00555                 res_indx[i] = 0;
00556                 if (i != dim) {
00557                     if (type == DVTYPE_ASCII || type == DVTYPE_DERIVEDBYTE ||
00558                         type == DVTYPE_DERIVEDWORD)
00559                         res_strd[cnt] = tot_ext * bucketsize;
00560                     else
00561                         res_strd[cnt] = tot_ext;
00562                     res_ext[cnt] = result->dimension[i].extent;
00563                     tot_ext *= res_ext[cnt];
00564                     cnt++;
00565                 } else {
00566                     if (type == DVTYPE_ASCII || type == DVTYPE_DERIVEDBYTE ||
00567                         type == DVTYPE_DERIVEDWORD)
00568                         res_dim_strd = tot_ext * bucketsize;
00569                     else
00570                         res_dim_strd = tot_ext;
00571                     tot_ext *= ncopies;
00572                 }
00573             }
00574
00575             switch (subtype) {
00576                 case DVSUBTYPE_BIT64 :
00577                     uptr1 = (_f_int8 *) sptr;
00578                     uptr2 = (_f_int8 *) rptr;
00579                     sindx = 0;
00580                     res_tmp2 = 0;
00581                     for (i = 0; i < src_tot_ext; i++) {
00582                         for (j = 0; j < ncopies; j++) {
00583                             rindx = res_tmp2 + (j * res_dim_strd);
00584                             uptr2[rindx] = uptr1[sindx];
00585                         }
00586                         INCREMENT();
00587                     }
00588                     break;
00589
00590                 case DVSUBTYPE_BIT32 :
00591                     hptr1 = (_f_int4 *) sptr;
00592                     hptr2 = (_f_int4 *) rptr;
00593                     sindx = 0;
00594                     res_tmp2 = 0;
00595                     for (i = 0; i < src_tot_ext; i++) {
00596                         for (j = 0; j < ncopies; j++) {
00597                             rindx = res_tmp2 + (j * res_dim_strd);
00598                             hptr2[rindx] = hptr1[sindx];
00599                         }
00600                         INCREMENT();
00601                     }
00602                     break;
00603
00604                 case DVSUBTYPE_BIT128 :
00605                     dptr1 = (_f_real16 *) sptr;
00606                     dptr2 = (_f_real16 *) rptr;
00607                     sindx = 0;
00608                     res_tmp2 = 0;
00609                     for (i = 0; i < src_tot_ext; i++) {
00610                         for (j = 0; j < ncopies; j++) {
00611                             rindx = res_tmp2 + (j * res_dim_strd);
00612                             dptr2[rindx] = dptr1[sindx];
00613                         }
00614                         INCREMENT();
00615                     }
00616                     break;
00617
00618                 case DVSUBTYPE_CHAR :
00619                     sindx = 0;
00620                     res_tmp2 = 0;
00621                     for (i = 0; i < src_tot_ext; i++) {
00622                         cptr1 = (char *) cs + sindx;
00623                         for (j = 0; j < ncopies; j++) {
00624                             rindx = res_tmp2 + (j * res_dim_strd);
00625                             cptr2 = (char *) cr + rindx;
00626                             (void) memcpy (cptr2, cptr1, bucketsize);
00627                         }
00628                         INCREMENT();
00629                     }
00630                     break;
00631
00632                 case DVSUBTYPE_DERIVED :
00633                     for (i = 0; i < bucketsize; i++) {
00634                         fptr1 = (_f_int *) sptr + i;
00635                         fptr2 = (_f_int *) rptr + i;
00636                         sindx = 0;
00637                         res_tmp2 = 0;
00638                         for (j = 0; j < src_tot_ext; j++) {
00639                             for (k = 0; k < ncopies; k++) {
00640                                 rindx = res_tmp2 + (k * res_dim_strd);
00641                                 fptr2[rindx] = fptr1[sindx];
00642                             }
00643                             INCREMENT();
00644                         }
00645                     }
00646                     break;
00647
00648 #ifdef _F_COMP16
00649                 case DVSUBTYPE_BIT256 :
00650                     xptr1 = (dblcmplx *) sptr;
00651                     xptr2 = (dblcmplx *) rptr;
00652                     sindx = 0;
00653                     res_tmp2 = 0;
00654                     for (i = 0; i < src_tot_ext; i++) {
00655                         for (j = 0; j < ncopies; j++) {
00656                             rindx = res_tmp2 + (j * res_dim_strd);
00657                             xptr2[rindx].re = xptr1[sindx].re;
00658                             xptr2[rindx].im = xptr1[sindx].im;
00659                         }
00660                         INCREMENT();
00661                     }
00662                     break;
00663 #endif
00664
00665                 default :
00666                     _lerror (_LELVL_ABORT, FEINTDTY);
00667             }
00668         }
00669 }