Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
scatgath.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001, Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2.1 of the GNU Lesser General Public License 
00007   as published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU Lesser General Public 
00021   License along with this program; if not, write the Free Software 
00022   Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 
00023   USA.
00024 
00025   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00026   Mountain View, CA 94043, or:
00027 
00028   http://www.sgi.com
00029 
00030   For further information regarding this notice, see:
00031 
00032   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00033 
00034 */
00035 
00036 
00037 
00038 #pragma ident "@(#) libf/fio/scatgath.c 92.1    06/18/99 18:41:02"
00039 
00040 #if     defined(__mips)
00041 #include <sgidefs.h>
00042 #endif
00043 
00044 #include <memory.h>
00045 #include <cray/nassert.h>
00046 
00047 #ifdef _CRAY1
00048 #define GET_PTR(ptr)    ( (long) _dshiftl((long) ptr, (long) ptr, 3) )
00049 #else
00050 #define GET_PTR(ptr)    ( (long long) ptr )
00051 #endif
00052 
00053 #define PTR_ALIGNED(ptr,type)   ( ( GET_PTR(ptr) & (sizeof(type) - 1) ) == 0)
00054 
00055 #define SIZE_ALIGNED(len,type)  ( ( len & (sizeof(type) - 1) ) == 0)
00056 
00057 /*
00058  *      These routines handle gather/scatter operations using native data
00059  *      types, where applicable.  Due to the different sizes of various
00060  *      data types on different architectures, these routines use the
00061  *      following notation:
00062  *
00063  *              LARGE   8-byte
00064  *              MEDIUM  4-byte
00065  *              SMALL   2-byte
00066  *
00067  *
00068  *      Sizes (in bytes) of various types on different architectures:
00069  *
00070  *                        short    int     long  long long
00071  *      =========        ======= ======= ======= =========
00072  *      Solaris             2       4       4        8
00073  *      Mips -n32           2       4       4        8
00074  *      Mips -64            2       4       8        8
00075  *      CRAY MPP            4       8       8        8
00076  *      CRAY PVP            8       8       8        8
00077  *
00078  *      Note that the CRAY architectures do not support all sizes.
00079  */
00080 
00081 #if     defined(_SOLARIS) || ( defined(__mips) && (_MIPS_SZLONG == 32) ) || \
00082         defined(_LITTLE_ENDIAN)
00083 #define LARGE   long long
00084 #else
00085 #define LARGE   long
00086 #endif
00087 
00088 #ifndef _CRAY
00089 #define MEDIUM  int
00090 #define SMALL   short
00091 #elif   defined(_CRAYMPP)
00092 #define MEDIUM  short
00093 #elif   defined(__sv2)
00094 #define MEDIUM  int
00095 #endif
00096 
00097 /*
00098  *      _gather_data(buf, items, inc, len, ptr)
00099  *
00100  *      Gathers data items into a local buffer when there is a non-unit
00101  *      stride.
00102  *
00103  *      buf     Local target buffer for gathered data
00104  *      items   Number of items to be gathered
00105  *      inc     Increment in bytes between the start of the items to be 
00106  *              gathered (inc must be a multiple of len).
00107  *      len     Length of each item in bytes
00108  *      ptr     User array to be gathered
00109  */
00110 
00111 void
00112 _gather_data(
00113         void    *buf,
00114         long    items,
00115         long    inc,
00116         int     len,
00117         void    *ptr)
00118 {
00119         register long   i, j, k;
00120         register long   step;
00121 
00122         assert ( inc % len == 0 );  /* assumption simplifies alignment check */
00123 
00124         j       = 0;
00125         k       = 0;
00126 
00127 /*
00128  *      Process items which are 8, 16 or 32 bytes long and which are
00129  *      aligned on an 8-byte boundary.
00130  */
00131 
00132         if ( SIZE_ALIGNED(len, LARGE) && PTR_ALIGNED(ptr, LARGE) ) {
00133                 LARGE   *lp, *lb;
00134 
00135                 lp      = (LARGE *) ptr;
00136                 lb      = (LARGE *) buf;
00137                 step    = inc >> 3;
00138 
00139                 assert ( sizeof(LARGE) == 8 );
00140 
00141                 if (len == sizeof(LARGE)) {     /* If LARGE items */
00142 #ifdef  _CRAY1
00143 #pragma _CRI ivdep
00144 #endif
00145                         for (i = 0; i < items; i++) {
00146                                 lb[i]   = lp[k];
00147                                 k       = k + step;
00148                         }
00149                 }
00150                 else if (len == (sizeof(LARGE) << 1)) { /* If 2X LARGE items */
00151 #ifdef  _CRAY1
00152 #pragma _CRI ivdep
00153 #endif
00154                         for (i = 0; i < items; i++) {
00155                                 lb[j]   = lp[k];
00156                                 lb[j+1] = lp[k+1];
00157                                 j       = j + 2;
00158                                 k       = k + step;
00159                         }
00160                 }
00161                 else if (len == (sizeof(LARGE) << 2)) { /* If 4X LARGE items */
00162 #ifdef  _CRAY1
00163 #pragma _CRI ivdep
00164 #endif
00165                         for (i = 0 ; i < items; i++) {
00166                                 lb[j]   = lp[k];
00167                                 lb[j+1] = lp[k+1];
00168                                 lb[j+2] = lp[k+2];
00169                                 lb[j+3] = lp[k+3];
00170                                 j       = j + 4;
00171                                 k       = k + step;
00172                         }
00173                 }
00174                 else
00175                         goto general_case;
00176         }
00177 
00178 /*
00179  *      Process items which are 4 or 8 bytes long and which are aligned
00180  *      on a 4-byte boundary.
00181  */
00182 
00183 #ifdef  MEDIUM
00184         else if ( SIZE_ALIGNED(len, MEDIUM) && PTR_ALIGNED(ptr, MEDIUM) ) {
00185                 MEDIUM  *ip, *ib;
00186 
00187                 ip      = (MEDIUM *) ptr;
00188                 ib      = (MEDIUM *) buf;
00189                 step    = inc >> 2;
00190 
00191                 assert ( sizeof(MEDIUM) == 4 );
00192 
00193                 if (len == sizeof(MEDIUM)) {    /* If MEDIUM items */
00194                         for (i = 0; i < items; i++) {
00195                                 ib[i]   = ip[k];
00196                                 k       = k + step;
00197                         }
00198                 }
00199                 else if (len == (sizeof(MEDIUM) << 1)) { /* If 2X MEDIUM items */
00200                         for (i = 0; i < items; i++) {
00201                                 ib[j]   = ip[k];
00202                                 ib[j+1] = ip[k+1];
00203                                 j       = j + 2;
00204                                 k       = k + step;
00205                         }
00206                 }
00207                 else
00208                         goto general_case;
00209         }
00210 #endif  /* MEDIUM */
00211 
00212 /*
00213  *      Process items which are 2 or 4 bytes long and which are aligned
00214  *      on a 2-byte boundary.
00215  */
00216 
00217 #ifdef  SMALL
00218         else if ( SIZE_ALIGNED(len, SMALL) && PTR_ALIGNED(ptr, SMALL) ) {
00219                 SMALL   *sp, *sb;
00220 
00221                 sp      = (SMALL *) ptr;
00222                 sb      = (SMALL *) buf;
00223                 step    = inc >> 1;
00224 
00225                 assert ( sizeof(SMALL) == 2 );
00226 
00227                 if (len == sizeof(SMALL)) {     /* If SMALL items */
00228                         for (i = 0; i < items; i++) {
00229                                 sb[i]   = sp[k];
00230                                 k       = k + step;
00231                         }
00232                 }
00233                 else if (len == (sizeof(SMALL) << 1)) { /* If 2X SMALL items */
00234                         for (i = 0; i < items; i++) {
00235                                 sb[j]   = sp[k];
00236                                 sb[j+1] = sp[k+1];
00237                                 j       = j + 2;
00238                                 k       = k + step;
00239                         }
00240                 }
00241                 else
00242                         goto general_case;
00243         }
00244 #endif  /* SMALL */
00245 
00246 /*
00247  *      Process single byte items or anything else which is not naturally
00248  *      aligned.
00249  */
00250 
00251         else {
00252                 char    *cp, *cb;
00253 general_case:
00254                 cp      = (char *) ptr;
00255                 cb      = (char *) buf;
00256 
00257                 /* Don't call memcpy() for single byte transfers */
00258 
00259                 if (len == 1)
00260                         for (i = 0; i < items; i++) {
00261                                 *cb++   = *cp;
00262                                 cp      = cp + inc;
00263                         }
00264                 else
00265                         for (i = 0; i < items; i++) {
00266                                 (void) memcpy(cb, cp, len);
00267                                 cb      = cb + len;
00268                                 cp      = cp + inc;
00269                         }
00270         }
00271 
00272         return;
00273 }
00274 
00275 /*
00276  *      _scatter_data(ptr, items, inc, len, buf)
00277  *
00278  *      Scatters data items from a local buffer to a user array when there
00279  *      is a non-unit stride.
00280  *
00281  *      ptr     User array into which local data will be scattered
00282  *      items   Number of items to be scattered
00283  *      inc     Increment in bytes between the start of the items to be 
00284  *              gathered (inc must be a multiple of len).
00285  *      len     Length of each item in bytes
00286  *      buf     Local buffer containing packed data
00287  */
00288 void
00289 _scatter_data (
00290         void    *ptr,
00291         long    items,
00292         long    inc,
00293         int     len,
00294         void    *buf)
00295 {
00296         register long   i, j, k;
00297         register long   step;
00298 
00299         assert ( inc % len == 0 ); /* assumption simplifies alignment check */
00300 
00301         j       = 0;
00302         k       = 0;
00303 
00304 /*
00305  *      Process items which are 8, 16 or 32 bytes long and which are
00306  *      aligned on an 8-byte boundary.
00307  */
00308 
00309         if ( SIZE_ALIGNED(len, LARGE) && PTR_ALIGNED(ptr, LARGE) ) {
00310                 LARGE   *lp, *lb;
00311 
00312                 lp      = (LARGE *) ptr;
00313                 lb      = (LARGE *) buf;
00314                 step    = inc >> 3;
00315 
00316                 assert ( sizeof(LARGE) == 8 );
00317 
00318                 if (len == sizeof(LARGE)) {     /* If LARGE items */
00319 #ifdef  _CRAY1
00320 #pragma _CRI ivdep
00321 #endif
00322                         for (i = 0; i < items; i++) {
00323                                 lp[k]   = lb[i];
00324                                 k       = k + step;
00325                         }
00326                 }
00327                 else if (len == (sizeof(LARGE) << 1)) { /* If 2X LARGE items */
00328 #ifdef  _CRAY1
00329 #pragma _CRI ivdep
00330 #endif
00331                         for (i = 0; i < items; i++) {
00332                                 lp[k]   = lb[j];
00333                                 lp[k+1] = lb[j+1];
00334                                 j       = j + 2;
00335                                 k       = k + step;
00336                         }
00337                 }
00338                 else if (len == (sizeof(LARGE) << 2)) { /* If 4X LARGE items */
00339 #ifdef  _CRAY1
00340 #pragma _CRI ivdep
00341 #endif
00342                         for (i = 0 ; i < items; i++) {
00343                                 lp[k]   = lb[j];
00344                                 lp[k+1] = lb[j+1];
00345                                 lp[k+2] = lb[j+2];
00346                                 lp[k+3] = lb[j+3];
00347                                 j       = j + 4;
00348                                 k       = k + step;
00349                         }
00350                 }
00351                 else
00352                         goto general_case;
00353         }
00354 
00355 /*
00356  *      Process items which are 4 or 8 bytes long and which are aligned
00357  *      on a 4-byte boundary.
00358  */
00359 
00360 #ifdef  MEDIUM
00361         else if ( SIZE_ALIGNED(len, MEDIUM) && PTR_ALIGNED(ptr, MEDIUM) ) {
00362                 MEDIUM  *ip, *ib;
00363 
00364                 ip      = (MEDIUM *) ptr;
00365                 ib      = (MEDIUM *) buf;
00366                 step    = inc >> 2;
00367 
00368                 assert ( sizeof(MEDIUM) == 4 );
00369 
00370                 if (len == sizeof(MEDIUM)) {    /* If MEDIUM items */
00371                         for (i = 0; i < items; i++) {
00372                                 ip[k]   = ib[i];
00373                                 k       = k + step;
00374                         }
00375                 }
00376                 else if (len == (sizeof(MEDIUM) << 1)) { /* If 2X MEDIUM items */
00377                         for (i = 0; i < items; i++) {
00378                                 ip[k]   = ib[j];
00379                                 ip[k+1] = ib[j+1];
00380                                 j       = j + 2;
00381                                 k       = k + step;
00382                         }
00383                 }
00384                 else
00385                         goto general_case;
00386         }
00387 #endif  /* MEDIUM */
00388 
00389 /*
00390  *      Process items which are 2 or 4 bytes long and which are aligned
00391  *      on a 2-byte boundary.
00392  */
00393 
00394 #ifdef  SMALL
00395         else if ( SIZE_ALIGNED(len, SMALL) && PTR_ALIGNED(ptr, SMALL) ) {
00396                 SMALL   *sp, *sb;
00397 
00398                 sp      = (SMALL *) ptr;
00399                 sb      = (SMALL *) buf;
00400                 step    = inc >> 1;
00401 
00402                 assert ( sizeof(SMALL) == 2 );
00403 
00404                 if (len == sizeof(SMALL)) {     /* If SMALL items */
00405                         for (i = 0; i < items; i++) {
00406                                 sp[k]   = sb[i];
00407                                 k       = k + step;
00408                         }
00409                 }
00410                 else if (len == (sizeof(SMALL) << 1)) { /* If 2X SMALL items */
00411                         for (i = 0; i < items; i++) {
00412                                 sp[k]   = sb[j];
00413                                 sp[k+1] = sb[j+1];
00414                                 j       = j + 2;
00415                                 k       = k + step;
00416                         }
00417                 }
00418                 else
00419                         goto general_case;
00420         }
00421 #endif  /* SMALL */
00422 
00423 /*
00424  *      Process single byte items or anything else which is not naturally
00425  *      aligned.
00426  */
00427 
00428         else {
00429                 char    *cp, *cb;
00430 general_case:
00431                 cp      = (char *) ptr;
00432                 cb      = (char *) buf;
00433 
00434                 /* Don't call memcpy() for single byte transfers */
00435 
00436                 if (len == 1)
00437                         for (i = 0; i < items; i++) {
00438                                 *cp     = *cb++;
00439                                 cp      = cp + inc;
00440                         }
00441                 else
00442                         for (i = 0; i < items; i++) {
00443                                 (void) memcpy(cp, cb, len);
00444                                 cb      = cb + len;
00445                                 cp      = cp + inc;
00446                         }
00447         }
00448 
00449         return;
00450 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines