Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001, Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 00038 #pragma ident "@(#) libf/fio/scatgath.c 92.1 06/18/99 18:41:02" 00039 00040 #if defined(__mips) 00041 #include <sgidefs.h> 00042 #endif 00043 00044 #include <memory.h> 00045 #include <cray/nassert.h> 00046 00047 #ifdef _CRAY1 00048 #define GET_PTR(ptr) ( (long) _dshiftl((long) ptr, (long) ptr, 3) ) 00049 #else 00050 #define GET_PTR(ptr) ( (long long) ptr ) 00051 #endif 00052 00053 #define PTR_ALIGNED(ptr,type) ( ( GET_PTR(ptr) & (sizeof(type) - 1) ) == 0) 00054 00055 #define SIZE_ALIGNED(len,type) ( ( len & (sizeof(type) - 1) ) == 0) 00056 00057 /* 00058 * These routines handle gather/scatter operations using native data 00059 * types, where applicable. Due to the different sizes of various 00060 * data types on different architectures, these routines use the 00061 * following notation: 00062 * 00063 * LARGE 8-byte 00064 * MEDIUM 4-byte 00065 * SMALL 2-byte 00066 * 00067 * 00068 * Sizes (in bytes) of various types on different architectures: 00069 * 00070 * short int long long long 00071 * ========= ======= ======= ======= ========= 00072 * Solaris 2 4 4 8 00073 * Mips -n32 2 4 4 8 00074 * Mips -64 2 4 8 8 00075 * CRAY MPP 4 8 8 8 00076 * CRAY PVP 8 8 8 8 00077 * 00078 * Note that the CRAY architectures do not support all sizes. 00079 */ 00080 00081 #if defined(_SOLARIS) || ( defined(__mips) && (_MIPS_SZLONG == 32) ) || \ 00082 defined(_LITTLE_ENDIAN) 00083 #define LARGE long long 00084 #else 00085 #define LARGE long 00086 #endif 00087 00088 #ifndef _CRAY 00089 #define MEDIUM int 00090 #define SMALL short 00091 #elif defined(_CRAYMPP) 00092 #define MEDIUM short 00093 #elif defined(__sv2) 00094 #define MEDIUM int 00095 #endif 00096 00097 /* 00098 * _gather_data(buf, items, inc, len, ptr) 00099 * 00100 * Gathers data items into a local buffer when there is a non-unit 00101 * stride. 00102 * 00103 * buf Local target buffer for gathered data 00104 * items Number of items to be gathered 00105 * inc Increment in bytes between the start of the items to be 00106 * gathered (inc must be a multiple of len). 00107 * len Length of each item in bytes 00108 * ptr User array to be gathered 00109 */ 00110 00111 void 00112 _gather_data( 00113 void *buf, 00114 long items, 00115 long inc, 00116 int len, 00117 void *ptr) 00118 { 00119 register long i, j, k; 00120 register long step; 00121 00122 assert ( inc % len == 0 ); /* assumption simplifies alignment check */ 00123 00124 j = 0; 00125 k = 0; 00126 00127 /* 00128 * Process items which are 8, 16 or 32 bytes long and which are 00129 * aligned on an 8-byte boundary. 00130 */ 00131 00132 if ( SIZE_ALIGNED(len, LARGE) && PTR_ALIGNED(ptr, LARGE) ) { 00133 LARGE *lp, *lb; 00134 00135 lp = (LARGE *) ptr; 00136 lb = (LARGE *) buf; 00137 step = inc >> 3; 00138 00139 assert ( sizeof(LARGE) == 8 ); 00140 00141 if (len == sizeof(LARGE)) { /* If LARGE items */ 00142 #ifdef _CRAY1 00143 #pragma _CRI ivdep 00144 #endif 00145 for (i = 0; i < items; i++) { 00146 lb[i] = lp[k]; 00147 k = k + step; 00148 } 00149 } 00150 else if (len == (sizeof(LARGE) << 1)) { /* If 2X LARGE items */ 00151 #ifdef _CRAY1 00152 #pragma _CRI ivdep 00153 #endif 00154 for (i = 0; i < items; i++) { 00155 lb[j] = lp[k]; 00156 lb[j+1] = lp[k+1]; 00157 j = j + 2; 00158 k = k + step; 00159 } 00160 } 00161 else if (len == (sizeof(LARGE) << 2)) { /* If 4X LARGE items */ 00162 #ifdef _CRAY1 00163 #pragma _CRI ivdep 00164 #endif 00165 for (i = 0 ; i < items; i++) { 00166 lb[j] = lp[k]; 00167 lb[j+1] = lp[k+1]; 00168 lb[j+2] = lp[k+2]; 00169 lb[j+3] = lp[k+3]; 00170 j = j + 4; 00171 k = k + step; 00172 } 00173 } 00174 else 00175 goto general_case; 00176 } 00177 00178 /* 00179 * Process items which are 4 or 8 bytes long and which are aligned 00180 * on a 4-byte boundary. 00181 */ 00182 00183 #ifdef MEDIUM 00184 else if ( SIZE_ALIGNED(len, MEDIUM) && PTR_ALIGNED(ptr, MEDIUM) ) { 00185 MEDIUM *ip, *ib; 00186 00187 ip = (MEDIUM *) ptr; 00188 ib = (MEDIUM *) buf; 00189 step = inc >> 2; 00190 00191 assert ( sizeof(MEDIUM) == 4 ); 00192 00193 if (len == sizeof(MEDIUM)) { /* If MEDIUM items */ 00194 for (i = 0; i < items; i++) { 00195 ib[i] = ip[k]; 00196 k = k + step; 00197 } 00198 } 00199 else if (len == (sizeof(MEDIUM) << 1)) { /* If 2X MEDIUM items */ 00200 for (i = 0; i < items; i++) { 00201 ib[j] = ip[k]; 00202 ib[j+1] = ip[k+1]; 00203 j = j + 2; 00204 k = k + step; 00205 } 00206 } 00207 else 00208 goto general_case; 00209 } 00210 #endif /* MEDIUM */ 00211 00212 /* 00213 * Process items which are 2 or 4 bytes long and which are aligned 00214 * on a 2-byte boundary. 00215 */ 00216 00217 #ifdef SMALL 00218 else if ( SIZE_ALIGNED(len, SMALL) && PTR_ALIGNED(ptr, SMALL) ) { 00219 SMALL *sp, *sb; 00220 00221 sp = (SMALL *) ptr; 00222 sb = (SMALL *) buf; 00223 step = inc >> 1; 00224 00225 assert ( sizeof(SMALL) == 2 ); 00226 00227 if (len == sizeof(SMALL)) { /* If SMALL items */ 00228 for (i = 0; i < items; i++) { 00229 sb[i] = sp[k]; 00230 k = k + step; 00231 } 00232 } 00233 else if (len == (sizeof(SMALL) << 1)) { /* If 2X SMALL items */ 00234 for (i = 0; i < items; i++) { 00235 sb[j] = sp[k]; 00236 sb[j+1] = sp[k+1]; 00237 j = j + 2; 00238 k = k + step; 00239 } 00240 } 00241 else 00242 goto general_case; 00243 } 00244 #endif /* SMALL */ 00245 00246 /* 00247 * Process single byte items or anything else which is not naturally 00248 * aligned. 00249 */ 00250 00251 else { 00252 char *cp, *cb; 00253 general_case: 00254 cp = (char *) ptr; 00255 cb = (char *) buf; 00256 00257 /* Don't call memcpy() for single byte transfers */ 00258 00259 if (len == 1) 00260 for (i = 0; i < items; i++) { 00261 *cb++ = *cp; 00262 cp = cp + inc; 00263 } 00264 else 00265 for (i = 0; i < items; i++) { 00266 (void) memcpy(cb, cp, len); 00267 cb = cb + len; 00268 cp = cp + inc; 00269 } 00270 } 00271 00272 return; 00273 } 00274 00275 /* 00276 * _scatter_data(ptr, items, inc, len, buf) 00277 * 00278 * Scatters data items from a local buffer to a user array when there 00279 * is a non-unit stride. 00280 * 00281 * ptr User array into which local data will be scattered 00282 * items Number of items to be scattered 00283 * inc Increment in bytes between the start of the items to be 00284 * gathered (inc must be a multiple of len). 00285 * len Length of each item in bytes 00286 * buf Local buffer containing packed data 00287 */ 00288 void 00289 _scatter_data ( 00290 void *ptr, 00291 long items, 00292 long inc, 00293 int len, 00294 void *buf) 00295 { 00296 register long i, j, k; 00297 register long step; 00298 00299 assert ( inc % len == 0 ); /* assumption simplifies alignment check */ 00300 00301 j = 0; 00302 k = 0; 00303 00304 /* 00305 * Process items which are 8, 16 or 32 bytes long and which are 00306 * aligned on an 8-byte boundary. 00307 */ 00308 00309 if ( SIZE_ALIGNED(len, LARGE) && PTR_ALIGNED(ptr, LARGE) ) { 00310 LARGE *lp, *lb; 00311 00312 lp = (LARGE *) ptr; 00313 lb = (LARGE *) buf; 00314 step = inc >> 3; 00315 00316 assert ( sizeof(LARGE) == 8 ); 00317 00318 if (len == sizeof(LARGE)) { /* If LARGE items */ 00319 #ifdef _CRAY1 00320 #pragma _CRI ivdep 00321 #endif 00322 for (i = 0; i < items; i++) { 00323 lp[k] = lb[i]; 00324 k = k + step; 00325 } 00326 } 00327 else if (len == (sizeof(LARGE) << 1)) { /* If 2X LARGE items */ 00328 #ifdef _CRAY1 00329 #pragma _CRI ivdep 00330 #endif 00331 for (i = 0; i < items; i++) { 00332 lp[k] = lb[j]; 00333 lp[k+1] = lb[j+1]; 00334 j = j + 2; 00335 k = k + step; 00336 } 00337 } 00338 else if (len == (sizeof(LARGE) << 2)) { /* If 4X LARGE items */ 00339 #ifdef _CRAY1 00340 #pragma _CRI ivdep 00341 #endif 00342 for (i = 0 ; i < items; i++) { 00343 lp[k] = lb[j]; 00344 lp[k+1] = lb[j+1]; 00345 lp[k+2] = lb[j+2]; 00346 lp[k+3] = lb[j+3]; 00347 j = j + 4; 00348 k = k + step; 00349 } 00350 } 00351 else 00352 goto general_case; 00353 } 00354 00355 /* 00356 * Process items which are 4 or 8 bytes long and which are aligned 00357 * on a 4-byte boundary. 00358 */ 00359 00360 #ifdef MEDIUM 00361 else if ( SIZE_ALIGNED(len, MEDIUM) && PTR_ALIGNED(ptr, MEDIUM) ) { 00362 MEDIUM *ip, *ib; 00363 00364 ip = (MEDIUM *) ptr; 00365 ib = (MEDIUM *) buf; 00366 step = inc >> 2; 00367 00368 assert ( sizeof(MEDIUM) == 4 ); 00369 00370 if (len == sizeof(MEDIUM)) { /* If MEDIUM items */ 00371 for (i = 0; i < items; i++) { 00372 ip[k] = ib[i]; 00373 k = k + step; 00374 } 00375 } 00376 else if (len == (sizeof(MEDIUM) << 1)) { /* If 2X MEDIUM items */ 00377 for (i = 0; i < items; i++) { 00378 ip[k] = ib[j]; 00379 ip[k+1] = ib[j+1]; 00380 j = j + 2; 00381 k = k + step; 00382 } 00383 } 00384 else 00385 goto general_case; 00386 } 00387 #endif /* MEDIUM */ 00388 00389 /* 00390 * Process items which are 2 or 4 bytes long and which are aligned 00391 * on a 2-byte boundary. 00392 */ 00393 00394 #ifdef SMALL 00395 else if ( SIZE_ALIGNED(len, SMALL) && PTR_ALIGNED(ptr, SMALL) ) { 00396 SMALL *sp, *sb; 00397 00398 sp = (SMALL *) ptr; 00399 sb = (SMALL *) buf; 00400 step = inc >> 1; 00401 00402 assert ( sizeof(SMALL) == 2 ); 00403 00404 if (len == sizeof(SMALL)) { /* If SMALL items */ 00405 for (i = 0; i < items; i++) { 00406 sp[k] = sb[i]; 00407 k = k + step; 00408 } 00409 } 00410 else if (len == (sizeof(SMALL) << 1)) { /* If 2X SMALL items */ 00411 for (i = 0; i < items; i++) { 00412 sp[k] = sb[j]; 00413 sp[k+1] = sb[j+1]; 00414 j = j + 2; 00415 k = k + step; 00416 } 00417 } 00418 else 00419 goto general_case; 00420 } 00421 #endif /* SMALL */ 00422 00423 /* 00424 * Process single byte items or anything else which is not naturally 00425 * aligned. 00426 */ 00427 00428 else { 00429 char *cp, *cb; 00430 general_case: 00431 cp = (char *) ptr; 00432 cb = (char *) buf; 00433 00434 /* Don't call memcpy() for single byte transfers */ 00435 00436 if (len == 1) 00437 for (i = 0; i < items; i++) { 00438 *cp = *cb++; 00439 cp = cp + inc; 00440 } 00441 else 00442 for (i = 0; i < items; i++) { 00443 (void) memcpy(cp, cb, len); 00444 cb = cb + len; 00445 cp = cp + inc; 00446 } 00447 } 00448 00449 return; 00450 }