Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 /* automatically generated file, do not edit */ 00037 00038 #include "f90_intrinsic.h" 00039 00040 void 00041 _CSHIFT( 00042 DopeVectorType *result, 00043 DopeVectorType *array, 00044 DopeVectorType *shift, 00045 i4 *dim) 00046 { 00047 char * result_p, * result_b ; 00048 char * array_p, * array_b ; 00049 char * shift_p, * shift_b ; 00050 00051 size_t src_extent [MAX_NARY_DIMS] ; 00052 size_t src_stride [MAX_NARY_DIMS] ; 00053 size_t src_offset [MAX_NARY_DIMS] ; 00054 size_t counter[MAX_NARY_DIMS] ; 00055 00056 size_t res_stride [MAX_NARY_DIMS] ; 00057 size_t res_extent [MAX_NARY_DIMS] ; 00058 size_t res_offset [MAX_NARY_DIMS] ; 00059 00060 int32_t j,ii; 00061 char *rp, *ap ; 00062 int32_t res_rank ; 00063 int32_t shf_rank ; 00064 int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; 00065 00066 size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); 00067 00068 size_t a_size,a_stride,r_stride, i,k ; 00069 int8_t zero_szd_source = FALSE; 00070 int8_t byte_aligned = FALSE; 00071 00072 size_t a_offs,a_bump,r_bump ; 00073 size_t ll1,ll2; 00074 int64_t shft,shf_typ_sz ; 00075 char *rp1, *ap1, *ap2 ; 00076 int32_t ddim ; 00077 00078 size_t shf_stride [MAX_NARY_DIMS] ; 00079 size_t shf_offset [MAX_NARY_DIMS] ; 00080 size_t num_trues ; 00081 int32_t local_alloc ; 00082 size_t tot_ext ; 00083 size_t str_sz ; 00084 00085 size_t src_size ; 00086 00087 size_t res_sz; 00088 size_t xfer_sz; 00089 size_t tot_sz; 00090 00091 int8_t computed_shift = FALSE ; 00092 ddim = (*dim) - 1 ; 00093 00094 if ((ddim > src_rank) || (ddim < 0)) 00095 ERROR(_LELVL_ABORT,FESCIDIM); 00096 00097 src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ; 00098 src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ; 00099 byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ; 00100 00101 for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { 00102 if (j != ddim ) { 00103 src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ; 00104 src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ; 00105 src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1])) ; 00106 k++ ; 00107 } 00108 counter[j] = 0 ; 00109 shf_offset[j] = 0 ; 00110 zero_szd_source = zero_szd_source || (src_extent[j] == 0) ; 00111 } 00112 00113 if (!GET_ASSOCIATED_FROM_DESC(result)) { 00114 00115 size_t nbytes ; 00116 size_t ext ; 00117 char *p ; 00118 00119 SET_ADDRESS_IN_DESC(result,NULL); 00120 SET_ORIG_BS_IN_DESC(result,NULL) ; 00121 SET_ORIG_SZ_IN_DESC(result,0) ; 00122 00123 p = NULL ; 00124 tot_ext = 1 ; 00125 nbytes = typ_sz ; 00126 str_sz = MK_STRIDE(byte_aligned,typ_sz); 00127 00128 for ( i = 0 ; i <= src_rank ; i ++) { 00129 ext = GET_EXTENT_FROM_DESC(array,i) ; 00130 SET_LBOUND_IN_DESC(result,i,1); 00131 SET_EXTENT_IN_DESC(result,i,ext); 00132 SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz ); 00133 tot_ext *= ext; 00134 nbytes *= ext; 00135 } 00136 00137 if (nbytes > 0) { 00138 p = (void *) malloc (nbytes); 00139 if (p == NULL) 00140 ERROR(_LELVL_ABORT, FENOMEMY); 00141 00142 SET_ADDRESS_IN_DESC(result,p); 00143 } 00144 00145 SET_ASSOCIATED_IN_DESC(result); 00146 SET_CONTIG_IN_DESC(result); 00147 if (GET_DV_ASCII_FROM_DESC(array)) { 00148 SET_CHARPTR_IN_DESC(result,p,typ_sz); 00149 } 00150 SET_ORIG_BS_IN_DESC(result,p) ; 00151 SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ; 00152 } 00153 00154 res_stride[0] = GET_STRIDE_FROM_DESC(result,ddim) ; 00155 00156 for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { 00157 if (j != ddim ) { 00158 res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ; 00159 res_offset[k-1] = res_stride[k] - (res_stride [k-1] * (src_extent[k-1])) ; 00160 k++ ; 00161 } 00162 } 00163 00164 shf_typ_sz = GET_ELEMENT_SZ_FROM_DESC(shift); 00165 shf_rank = GET_RANK_FROM_DESC(shift); 00166 shift_p = GET_ADDRESS_FROM_DESC(shift); 00167 00168 shf_stride[0] = 0 ; 00169 for ( j = 0 ; j < shf_rank ; j ++ ) { 00170 shf_stride[j] = GET_STRIDE_FROM_DESC(shift,j) ; 00171 } 00172 00173 for ( j = 1 ; j < shf_rank ; j ++ ) { 00174 shf_offset[j] = shf_stride[j] - (shf_stride [j-1] * (src_extent[j])) ; 00175 } 00176 00177 a_bump = src_extent[0] * src_stride[0] ; 00178 r_bump = src_extent[0] * res_stride[0] ; 00179 00180 if (zero_szd_source) 00181 return ; 00182 00183 a_size = src_extent[0] ; 00184 a_stride = src_stride[0] ; 00185 r_stride = res_stride[0] ; 00186 array_p = GET_ADDRESS_FROM_DESC(array); 00187 result_p = GET_ADDRESS_FROM_DESC(result); 00188 00189 if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p)) { 00190 00191 while (counter[src_rank] < src_extent[src_rank] ) { 00192 if (!computed_shift) { 00193 switch (shf_typ_sz) { 00194 case sizeof(i1) : 00195 shft = * (i1 *)shift_p ; 00196 break ; 00197 case sizeof(i2) : 00198 shft = * (i2 *)shift_p ; 00199 break ; 00200 case sizeof(i4) : 00201 shft = * (i4 *)shift_p ; 00202 break ; 00203 case sizeof(i8) : 00204 shft = * (i8 *)shift_p ; 00205 break ; 00206 } 00207 00208 shft = shft % (int64_t)src_extent[0]; 00209 if (shft < 0 ) { 00210 ll1 = abs(shft) ; 00211 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00212 00213 } else { 00214 ll1 = (int64_t)src_extent[0] - shft ; 00215 ll2 = shft ; 00216 } 00217 a_offs = a_stride * ll2 ; 00218 shift_p += shf_stride[0] ; 00219 00220 if (shf_rank == 0) 00221 computed_shift = TRUE; 00222 } 00223 00224 ap1 = array_p + a_offs ; 00225 00226 for ( k = 0 ; k < ll1 ; k ++ ) { 00227 *(i1 *)result_p = *(i1 *)ap1 ; 00228 result_p += r_stride ; 00229 ap1 += a_stride ; 00230 } 00231 00232 ap2 = array_p ; 00233 00234 for ( k = 0 ; k < ll2 ; k ++ ) { 00235 *(i1 *)result_p = *(i1 *)ap2 ; 00236 result_p += r_stride ; 00237 00238 ap2 += a_stride ; 00239 } 00240 array_p += a_bump ; 00241 00242 counter[0] = a_size ; 00243 j = 0 ; 00244 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00245 array_p += src_offset[j] ; 00246 result_p += res_offset[j] ; 00247 shift_p += shf_offset[j] ; 00248 counter[j+1]++ ; 00249 counter[j] = 0 ; 00250 j ++ ; 00251 } 00252 00253 } 00254 } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) ) { 00255 00256 while (counter[src_rank] < src_extent[src_rank] ) { 00257 if (!computed_shift) { 00258 switch (shf_typ_sz) { 00259 case sizeof(i1) : 00260 shft = * (i1 *)shift_p ; 00261 break ; 00262 case sizeof(i2) : 00263 shft = * (i2 *)shift_p ; 00264 break ; 00265 case sizeof(i4) : 00266 shft = * (i4 *)shift_p ; 00267 break ; 00268 case sizeof(i8) : 00269 shft = * (i8 *)shift_p ; 00270 break ; 00271 } 00272 00273 shft = shft % (int64_t)src_extent[0]; 00274 if (shft < 0 ) { 00275 ll1 = abs(shft) ; 00276 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00277 00278 } else { 00279 ll1 = (int64_t)src_extent[0] - shft ; 00280 ll2 = shft ; 00281 } 00282 a_offs = a_stride * ll2 ; 00283 shift_p += shf_stride[0] ; 00284 00285 if (shf_rank == 0) 00286 computed_shift = TRUE; 00287 } 00288 00289 ap1 = array_p + a_offs ; 00290 00291 for ( k = 0 ; k < ll1 ; k ++ ) { 00292 *(i2 *)result_p = *(i2 *)ap1 ; 00293 result_p += r_stride ; 00294 ap1 += a_stride ; 00295 } 00296 00297 ap2 = array_p ; 00298 00299 for ( k = 0 ; k < ll2 ; k ++ ) { 00300 *(i2 *)result_p = *(i2 *)ap2 ; 00301 result_p += r_stride ; 00302 00303 ap2 += a_stride ; 00304 } 00305 array_p += a_bump ; 00306 00307 counter[0] = a_size ; 00308 j = 0 ; 00309 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00310 array_p += src_offset[j] ; 00311 result_p += res_offset[j] ; 00312 shift_p += shf_offset[j] ; 00313 counter[j+1]++ ; 00314 counter[j] = 0 ; 00315 j ++ ; 00316 } 00317 00318 } 00319 } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) ) { 00320 00321 while (counter[src_rank] < src_extent[src_rank] ) { 00322 if (!computed_shift) { 00323 switch (shf_typ_sz) { 00324 case sizeof(i1) : 00325 shft = * (i1 *)shift_p ; 00326 break ; 00327 case sizeof(i2) : 00328 shft = * (i2 *)shift_p ; 00329 break ; 00330 case sizeof(i4) : 00331 shft = * (i4 *)shift_p ; 00332 break ; 00333 case sizeof(i8) : 00334 shft = * (i8 *)shift_p ; 00335 break ; 00336 } 00337 00338 shft = shft % (int64_t)src_extent[0]; 00339 if (shft < 0 ) { 00340 ll1 = abs(shft) ; 00341 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00342 00343 } else { 00344 ll1 = (int64_t)src_extent[0] - shft ; 00345 ll2 = shft ; 00346 } 00347 a_offs = a_stride * ll2 ; 00348 shift_p += shf_stride[0] ; 00349 00350 if (shf_rank == 0) 00351 computed_shift = TRUE; 00352 } 00353 00354 ap1 = array_p + a_offs ; 00355 00356 for ( k = 0 ; k < ll1 ; k ++ ) { 00357 *(r4 *)result_p = *(r4 *)ap1 ; 00358 result_p += r_stride ; 00359 ap1 += a_stride ; 00360 } 00361 00362 ap2 = array_p ; 00363 00364 for ( k = 0 ; k < ll2 ; k ++ ) { 00365 *(r4 *)result_p = *(r4 *)ap2 ; 00366 result_p += r_stride ; 00367 00368 ap2 += a_stride ; 00369 } 00370 array_p += a_bump ; 00371 00372 counter[0] = a_size ; 00373 j = 0 ; 00374 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00375 array_p += src_offset[j] ; 00376 result_p += res_offset[j] ; 00377 shift_p += shf_offset[j] ; 00378 counter[j+1]++ ; 00379 counter[j] = 0 ; 00380 j ++ ; 00381 } 00382 00383 } 00384 } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) ) { 00385 00386 while (counter[src_rank] < src_extent[src_rank] ) { 00387 if (!computed_shift) { 00388 switch (shf_typ_sz) { 00389 case sizeof(i1) : 00390 shft = * (i1 *)shift_p ; 00391 break ; 00392 case sizeof(i2) : 00393 shft = * (i2 *)shift_p ; 00394 break ; 00395 case sizeof(i4) : 00396 shft = * (i4 *)shift_p ; 00397 break ; 00398 case sizeof(i8) : 00399 shft = * (i8 *)shift_p ; 00400 break ; 00401 } 00402 00403 shft = shft % (int64_t)src_extent[0]; 00404 if (shft < 0 ) { 00405 ll1 = abs(shft) ; 00406 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00407 00408 } else { 00409 ll1 = (int64_t)src_extent[0] - shft ; 00410 ll2 = shft ; 00411 } 00412 a_offs = a_stride * ll2 ; 00413 shift_p += shf_stride[0] ; 00414 00415 if (shf_rank == 0) 00416 computed_shift = TRUE; 00417 } 00418 00419 ap1 = array_p + a_offs ; 00420 00421 for ( k = 0 ; k < ll1 ; k ++ ) { 00422 *(r8 *)result_p = *(r8 *)ap1 ; 00423 result_p += r_stride ; 00424 ap1 += a_stride ; 00425 } 00426 00427 ap2 = array_p ; 00428 00429 for ( k = 0 ; k < ll2 ; k ++ ) { 00430 *(r8 *)result_p = *(r8 *)ap2 ; 00431 result_p += r_stride ; 00432 00433 ap2 += a_stride ; 00434 } 00435 array_p += a_bump ; 00436 00437 counter[0] = a_size ; 00438 j = 0 ; 00439 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00440 array_p += src_offset[j] ; 00441 result_p += res_offset[j] ; 00442 shift_p += shf_offset[j] ; 00443 counter[j+1]++ ; 00444 counter[j] = 0 ; 00445 j ++ ; 00446 } 00447 00448 } 00449 } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) ) { 00450 00451 while (counter[src_rank] < src_extent[src_rank] ) { 00452 if (!computed_shift) { 00453 switch (shf_typ_sz) { 00454 case sizeof(i1) : 00455 shft = * (i1 *)shift_p ; 00456 break ; 00457 case sizeof(i2) : 00458 shft = * (i2 *)shift_p ; 00459 break ; 00460 case sizeof(i4) : 00461 shft = * (i4 *)shift_p ; 00462 break ; 00463 case sizeof(i8) : 00464 shft = * (i8 *)shift_p ; 00465 break ; 00466 } 00467 00468 shft = shft % (int64_t)src_extent[0]; 00469 if (shft < 0 ) { 00470 ll1 = abs(shft) ; 00471 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00472 00473 } else { 00474 ll1 = (int64_t)src_extent[0] - shft ; 00475 ll2 = shft ; 00476 } 00477 a_offs = a_stride * ll2 ; 00478 shift_p += shf_stride[0] ; 00479 00480 if (shf_rank == 0) 00481 computed_shift = TRUE; 00482 } 00483 00484 ap1 = array_p + a_offs ; 00485 00486 for ( k = 0 ; k < ll1 ; k ++ ) { 00487 *(r16 *)result_p = *(r16 *)ap1 ; 00488 result_p += r_stride ; 00489 ap1 += a_stride ; 00490 } 00491 00492 ap2 = array_p ; 00493 00494 for ( k = 0 ; k < ll2 ; k ++ ) { 00495 *(r16 *)result_p = *(r16 *)ap2 ; 00496 result_p += r_stride ; 00497 00498 ap2 += a_stride ; 00499 } 00500 array_p += a_bump ; 00501 00502 counter[0] = a_size ; 00503 j = 0 ; 00504 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00505 array_p += src_offset[j] ; 00506 result_p += res_offset[j] ; 00507 shift_p += shf_offset[j] ; 00508 counter[j+1]++ ; 00509 counter[j] = 0 ; 00510 j ++ ; 00511 } 00512 00513 } 00514 } else { 00515 while (counter[src_rank] < src_extent[src_rank] ) { 00516 if (!computed_shift) { 00517 switch (shf_typ_sz) { 00518 case sizeof(i1) : 00519 shft = * (i1 *)shift_p ; 00520 break ; 00521 case sizeof(i2) : 00522 shft = * (i2 *)shift_p ; 00523 break ; 00524 case sizeof(i4) : 00525 shft = * (i4 *)shift_p ; 00526 break ; 00527 case sizeof(i8) : 00528 shft = * (i8 *)shift_p ; 00529 break ; 00530 } 00531 00532 shft = shft % (int64_t)src_extent[0]; 00533 if (shft < 0 ) { 00534 ll1 = abs(shft) ; 00535 ll2 = (int64_t)src_extent[0] - abs(shft) ; 00536 00537 } else { 00538 ll1 = (int64_t)src_extent[0] - shft ; 00539 ll2 = shft ; 00540 } 00541 a_offs = a_stride * ll2 ; 00542 shift_p += shf_stride[0] ; 00543 00544 if (shf_rank == 0) 00545 computed_shift = TRUE; 00546 } 00547 00548 ap1 = array_p + a_offs ; 00549 00550 for ( k = 0 ; k < ll1 ; k ++ ) { 00551 rp = result_p ; 00552 ap = ap1 ; 00553 if (typ_sz > BIGDEFAULTSZ) 00554 (void) memcpy (rp, ap, typ_sz); 00555 else 00556 for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; 00557 result_p += r_stride ; 00558 ap1 += a_stride ; 00559 } 00560 00561 ap2 = array_p ; 00562 00563 for ( k = 0 ; k < ll2 ; k ++ ) { 00564 rp = result_p ; 00565 ap = ap2 ; 00566 if (typ_sz > BIGDEFAULTSZ) 00567 (void) memcpy (rp, ap, typ_sz); 00568 else 00569 for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; 00570 result_p += r_stride ; 00571 00572 ap2 += a_stride ; 00573 } 00574 array_p += a_bump ; 00575 00576 counter[0] = a_size ; 00577 j = 0 ; 00578 while ((counter[j] == src_extent[j]) && (j < src_rank)) { 00579 array_p += src_offset[j] ; 00580 result_p += res_offset[j] ; 00581 shift_p += shf_offset[j] ; 00582 counter[j+1]++ ; 00583 counter[j] = 0 ; 00584 j ++ ; 00585 } 00586 00587 } 00588 } 00589 }