Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/unpack.c 92.1 07/07/99 15:52:02" 00038 00039 #include <stddef.h> 00040 #include <liberrno.h> 00041 #include <cray/dopevec.h> 00042 #include <cray/portdefs.h> 00043 #include "arraydefs.h" 00044 00045 /* 00046 * Determine the total offset of dimensions 2-rank for mask, field 00047 * and result matrices. 00048 */ 00049 00050 #define FIND_INDX() \ 00051 switch (rank) { \ 00052 case 3 : \ 00053 indx1_msk = msk_off[0] + msk_off[1]; \ 00054 indx1_fld = fld_off[0] + fld_off[1]; \ 00055 indx1_res = res_off[0] + res_off[1]; \ 00056 break; \ 00057 case 4 : \ 00058 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2]; \ 00059 indx1_fld = fld_off[0] + fld_off[1] + fld_off[2]; \ 00060 indx1_res = res_off[0] + res_off[1] + res_off[2]; \ 00061 break; \ 00062 case 5 : \ 00063 indx1_msk = msk_off[0] + msk_off[1] + \ 00064 msk_off[2] + msk_off[3]; \ 00065 indx1_fld = fld_off[0] + fld_off[1] + \ 00066 fld_off[2] + fld_off[3]; \ 00067 indx1_res = res_off[0] + res_off[1] + \ 00068 res_off[2] + res_off[3]; \ 00069 break; \ 00070 case 6 : \ 00071 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] + \ 00072 msk_off[3] + msk_off[4]; \ 00073 indx1_fld = fld_off[0] + fld_off[1] + fld_off[2] + \ 00074 fld_off[3] + fld_off[4]; \ 00075 indx1_res = res_off[0] + res_off[1] + res_off[2] + \ 00076 res_off[3] + res_off[4]; \ 00077 break; \ 00078 default : \ 00079 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] + \ 00080 msk_off[3] + msk_off[4] + msk_off[5]; \ 00081 indx1_fld = fld_off[0] + fld_off[1] + fld_off[2] + \ 00082 fld_off[3] + fld_off[4] + fld_off[5]; \ 00083 indx1_res = res_off[0] + res_off[1] + res_off[2] + \ 00084 res_off[3] + res_off[4] + res_off[5]; \ 00085 } 00086 00087 00088 /* 00089 * Determine the value for each index, as well as the offset for 00090 * each dimension 2-rank for mask, field and result matrices. 00091 */ 00092 00093 /* 00094 * INCREMENT calculates the offset for each dimension of the mask, 00095 * field and result arrays. The sum of each of the offset arrays 00096 * gives the offset from the beginning of the array for each of the 00097 * arrays. 00098 */ 00099 00100 #define INCREMENT() \ 00101 curdim[0]++; \ 00102 if (curdim[0] < msk_ext[1]) { \ 00103 msk_off[0] = curdim[0] * msk_strd[1]; \ 00104 fld_off[0] = curdim[0] * fld_strd[1]; \ 00105 res_off[0] = curdim[0] * res_strd[1]; \ 00106 } else { \ 00107 curdim[0] = 0; \ 00108 msk_off[0] = 0; \ 00109 fld_off[0] = 0; \ 00110 res_off[0] = 0; \ 00111 curdim[1]++; \ 00112 if (curdim[1] < msk_ext[2]) { \ 00113 msk_off[1] = curdim[1] * msk_strd[2]; \ 00114 fld_off[1] = curdim[1] * fld_strd[2]; \ 00115 res_off[1] = curdim[1] * res_strd[2]; \ 00116 } else { \ 00117 curdim[1] = 0; \ 00118 msk_off[1] = 0; \ 00119 fld_off[1] = 0; \ 00120 res_off[1] = 0; \ 00121 curdim[2]++; \ 00122 if (curdim[2] < msk_ext[3]) { \ 00123 msk_off[2] = curdim[2] * msk_strd[3]; \ 00124 fld_off[2] = curdim[2] * fld_strd[3]; \ 00125 res_off[2] = curdim[2] * res_strd[3]; \ 00126 } else { \ 00127 curdim[2] = 0; \ 00128 msk_off[2] = 0; \ 00129 fld_off[2] = 0; \ 00130 res_off[2] = 0; \ 00131 curdim[3]++; \ 00132 if (curdim[3] < msk_ext[4]) { \ 00133 msk_off[3] = curdim[3] * msk_strd[4]; \ 00134 fld_off[3] = curdim[3] * fld_strd[4]; \ 00135 res_off[3] = curdim[3] * res_strd[4]; \ 00136 } else { \ 00137 curdim[3] = 0; \ 00138 msk_off[3] = 0; \ 00139 fld_off[3] = 0; \ 00140 res_off[3] = 0; \ 00141 curdim[4]++; \ 00142 if (curdim[4] < msk_ext[5]) { \ 00143 msk_off[4] = curdim[4] * msk_strd[5]; \ 00144 fld_off[4] = curdim[4] * fld_strd[5]; \ 00145 res_off[4] = curdim[4] * res_strd[5]; \ 00146 } else { \ 00147 curdim[4] = 0; \ 00148 msk_off[4] = 0; \ 00149 fld_off[4] = 0; \ 00150 res_off[4] = 0; \ 00151 curdim[5]++; \ 00152 msk_off[5] = curdim[5] * msk_strd[6]; \ 00153 fld_off[5] = curdim[5] * fld_strd[6]; \ 00154 res_off[5] = curdim[5] * res_strd[6]; \ 00155 } \ 00156 } \ 00157 } \ 00158 } \ 00159 } 00160 00161 #ifdef _UNICOS 00162 #pragma _CRI duplicate _UNPACK as UNPACK@ 00163 #endif 00164 void 00165 _UNPACK ( DopeVectorType * result, 00166 DopeVectorType * vector, 00167 DopeVectorType * mask, 00168 DopeVectorType * field) 00169 00170 { 00171 char *cf; /* char ptr to field array */ 00172 char *cr; /* char ptr to result array */ 00173 char *cv; /* char ptr to vector array */ 00174 char * restrict cptr1; /* char */ 00175 char * restrict cptr2; /* char */ 00176 char * restrict cptr3; /* char */ 00177 _f_int8 * restrict uptr1; /* 64-bit */ 00178 _f_int8 * restrict uptr2; /* 64-bit */ 00179 _f_int8 * restrict uptr3; /* 64-bit */ 00180 _f_int * restrict fptr1; /* def kind int */ 00181 _f_int * restrict fptr2; /* def kind int */ 00182 _f_int * restrict fptr3; /* def kind int */ 00183 _f_real16 * restrict dptr1; /* 128-bit */ 00184 _f_real16 * restrict dptr2; /* 128-bit */ 00185 _f_real16 * restrict dptr3; /* 128-bit */ 00186 #ifdef _F_COMP16 00187 dblcmplx * restrict xptr1; /* 256-bit */ 00188 dblcmplx * restrict xptr2; /* 256-bit */ 00189 dblcmplx * restrict xptr3; /* 256-bit */ 00190 #endif 00191 _f_int4 * restrict hptr1; /* 32-bit */ 00192 _f_int4 * restrict hptr2; /* 32-bit */ 00193 _f_int4 * restrict hptr3; /* 32-bit */ 00194 _f_mask * restrict iptr4; /* int */ 00195 void * restrict fptr; /* field */ 00196 void * restrict rptr; /* result */ 00197 void * restrict vptr; /* vector */ 00198 void * restrict mptr; /* mask */ 00199 _f_int bucketsize; /* size of element */ 00200 long nbytes; /* number of bytes */ 00201 long nwords; /* number of words */ 00202 long curdim[MAXDIM]; /* current indices */ 00203 _f_int bytealligned; /* byte aligned flag */ 00204 long findx; /* field index */ 00205 long rindx; /* result index */ 00206 long mindx; /* mask index */ 00207 long vindx; /* vector index */ 00208 _f_int type; /* data type */ 00209 _f_int subtype; /* sub-type */ 00210 _f_int arithmetic; /* arithmetic data type */ 00211 _f_int rank; /* rank of result matrix */ 00212 long i, j, k; /* index variables */ 00213 long vec_strd; /* stride of vector */ 00214 long fld_ext[MAXDIM]; /* extents for field */ 00215 long fld_strd[MAXDIM]; /* element stride for field */ 00216 long fld_incr[MAXDIM]; /* incr for each index */ 00217 long fld_off[MAXDIM]; /* incr for each index */ 00218 long msk_ext[MAXDIM]; /* extents for field */ 00219 long msk_strd[MAXDIM]; /* element stride for field */ 00220 long msk_incr[MAXDIM]; /* incr for each index */ 00221 long msk_off[MAXDIM]; /* incr for each index */ 00222 long res_ext[MAXDIM]; /* extents for field */ 00223 long res_strd[MAXDIM]; /* element stride for field */ 00224 long res_incr[MAXDIM]; /* incr for each index */ 00225 long res_off[MAXDIM]; /* incr for each index */ 00226 long fld_cum_decr; /* field cumulative decrement */ 00227 long msk_cum_decr; /* mask cumulative decrement */ 00228 long res_cum_decr; /* result cumulative decrement */ 00229 long indx1_fld; /* index for dim 1 of field */ 00230 long indx2_fld; /* index for dim 2 of field */ 00231 long fld2_off; /* offset for field dim 2 */ 00232 long indx1_res; /* index for dim 1 of result */ 00233 long indx2_res; /* index for dim 2 of result */ 00234 long res2_off; /* offset for result dim 2 */ 00235 long indx1_msk; /* index for dim 1 of mask */ 00236 long indx2_msk; /* index for dim 2 of mask */ 00237 long msk2_off; /* offset for mask dim 2 */ 00238 long tot_ext; /* total mask extent counter */ 00239 long vec_ext; /* total vector extent counter */ 00240 long mask_el_len; 00241 _f_int early_exit; /* early exit flag */ 00242 00243 /* Set type and rank global variables */ 00244 00245 type = field->type_lens.type; 00246 rank = mask->n_dim; 00247 mask_el_len = mask->base_addr.a.el_len; 00248 00249 /* 00250 * If any result array extents are zero (zero-sized array) and 00251 * the result dope vector is associated, exit without any load/store 00252 * since the result array (dope vector contents) cannot be changed. 00253 * If an extent from a mask array is zero, exit without doing any 00254 * load/store since the result must have the same shape as MASK. 00255 * If the vector source array is zero sized, an early exit is not 00256 * possible unless mask is zero-sized. If mask is all false but not 00257 * zero sized, field is used to fill the result array. The cases: 00258 * 1. RESULT array is associated and zero sized (cannot change 00259 * assoc array): - early exit with no change to RESULT array. 00260 * 2. MASK is scalar (MASK must be array - diagnosed at compilation. 00261 * 3. MASK is nonzero-sized array and FIELD does not conform to MASK 00262 * - diagnosed with -RC option only at execution possibly. 00263 * 4. MASK array is zero sized (RESULT array must be same shape as 00264 * MASK): - early exit with RESULT same shape as MASK. 00265 * 5. MASK nonzero-sized array and any size VECTOR array. VECTOR 00266 * array must have as many elements as there are TRUE elements 00267 * in MASK. FIELD values are used to fill elements of RESULT 00268 * that correspond to FALSE elements of MASK. 00269 * - WRITE elements of VECTOR to RESULT. 00270 * - WRITE elements of FIELD to RESULT. 00271 * ERROR if VECTOR does not have enough elements for TRUE elements 00272 * elements of MASK. A scalar FIELD always has enough elements. 00273 */ 00274 00275 early_exit = 0; 00276 if (result->assoc) { 00277 #ifdef _UNICOS 00278 #pragma _CRI shortloop 00279 #endif 00280 for (i = 0; i < rank; i++) { 00281 if (result->dimension[i].extent == 0) 00282 early_exit = 1; 00283 } 00284 } 00285 #ifdef _UNICOS 00286 #pragma _CRI shortloop 00287 #endif 00288 for (i = 0; i < rank; i++) { 00289 if (mask->dimension[i].extent == 0) 00290 early_exit = 1; 00291 } 00292 00293 /* 00294 * Initialize every array element to 0. 00295 */ 00296 00297 #ifdef _UNICOS 00298 #pragma _CRI shortloop 00299 #endif 00300 for (i = 0; i < MAXDIM; i++) { 00301 curdim[i] = 0; 00302 fld_ext[i] = 0; 00303 fld_strd[i] = 0; 00304 fld_incr[i] = 0; 00305 fld_off[i] = 0; 00306 msk_ext[i] = 0; 00307 msk_strd[i] = 0; 00308 msk_incr[i] = 0; 00309 msk_off[i] = 0; 00310 res_ext[i] = 0; 00311 res_strd[i] = 0; 00312 res_incr[i] = 0; 00313 res_off[i] = 0; 00314 } 00315 00316 /* Size calculation is based on variable type */ 00317 00318 switch (type) { 00319 case DVTYPE_ASCII : 00320 bytealligned = 1; 00321 bucketsize = _fcdlen (field->base_addr.charptr); /* bytes */ 00322 subtype = DVSUBTYPE_CHAR; 00323 arithmetic = 0; 00324 break; 00325 case DVTYPE_DERIVEDBYTE : 00326 bytealligned = 1; 00327 bucketsize = field->base_addr.a.el_len / BITS_PER_BYTE; 00328 subtype = DVSUBTYPE_CHAR; 00329 arithmetic = 0; 00330 break; 00331 case DVTYPE_DERIVEDWORD : 00332 bytealligned = 0; 00333 bucketsize = field->base_addr.a.el_len / BITS_PER_WORD; 00334 subtype = DVSUBTYPE_DERIVED; 00335 arithmetic = 0; 00336 break; 00337 default : 00338 bytealligned = 0; 00339 bucketsize = field->type_lens.int_len / BITS_PER_WORD; 00340 if (field->type_lens.int_len == 64) { 00341 subtype = DVSUBTYPE_BIT64; 00342 } else if (field->type_lens.int_len == 32) { 00343 subtype = DVSUBTYPE_BIT32; 00344 bucketsize = 1; 00345 } else if (field->type_lens.int_len == 256) { 00346 subtype = DVSUBTYPE_BIT256; 00347 } else { 00348 subtype = DVSUBTYPE_BIT128; 00349 } 00350 arithmetic = 1; 00351 } 00352 00353 /* Set up dope vector for result array */ 00354 00355 if (!result->assoc) { 00356 result->base_addr.a.ptr = (void *) NULL; 00357 result->orig_base = 0; 00358 result->orig_size = 0; 00359 #ifdef _UNICOS 00360 #pragma _CRI shortloop 00361 #endif 00362 for (i = 0, tot_ext = bucketsize; i < rank; i++) { 00363 result->dimension[i].extent = mask->dimension[i].extent; 00364 result->dimension[i].low_bound = 1; 00365 result->dimension[i].stride_mult = tot_ext; 00366 tot_ext *= result->dimension[i].extent; 00367 } 00368 00369 /* Determine size of space to allocate */ 00370 00371 if (!bytealligned) { 00372 nbytes = bucketsize * BYTES_PER_WORD; 00373 #ifdef _CRAYMPP 00374 if (subtype == DVSUBTYPE_BIT32) 00375 nbytes /= 2; 00376 #endif 00377 } else { 00378 nbytes = bucketsize; 00379 } 00380 for (i = 0; i < rank; i++) 00381 nbytes *= mask->dimension[i].extent; 00382 nwords = nbytes / BYTES_PER_WORD; 00383 if (nbytes > 0) { 00384 result->base_addr.a.ptr = (void *) malloc (nbytes); 00385 if (result->base_addr.a.ptr == NULL) 00386 _lerror (_LELVL_ABORT, FENOMEMY); 00387 } 00388 00389 result->assoc = 1; 00390 result->base_addr.a.el_len = field->base_addr.a.el_len; 00391 if (type == DVTYPE_ASCII) { 00392 cr = (char *) result->base_addr.a.ptr; 00393 result->base_addr.charptr = _cptofcd (cr, bucketsize); 00394 } 00395 result->orig_base = (void *) result->base_addr.a.ptr; 00396 result->orig_size = nbytes * BITS_PER_BYTE; 00397 } 00398 00399 /* If one of our early exit conditions is met, exit now */ 00400 00401 if (early_exit) 00402 return; 00403 00404 /* Initialize scalar pointers to all of the argument data areas */ 00405 00406 if (!bytealligned) { 00407 fptr = (void *) field->base_addr.a.ptr; 00408 rptr = (void *) result->base_addr.a.ptr; 00409 vptr = (void *) vector->base_addr.a.ptr; 00410 } else { 00411 if (type == DVTYPE_ASCII) { 00412 cf = _fcdtocp (field->base_addr.charptr); 00413 cv = _fcdtocp ( vector->base_addr.charptr); 00414 cr = _fcdtocp (result->base_addr.charptr); 00415 } else { 00416 cf = (char *) field->base_addr.a.ptr; 00417 cv = (char *) vector->base_addr.a.ptr; 00418 cr = (char *) result->base_addr.a.ptr; 00419 } 00420 } 00421 if (mask) 00422 mptr = (void *) mask->base_addr.a.ptr; 00423 00424 /* Initialize 'shortcut variables used for index calculation */ 00425 00426 vec_ext = vector->dimension[0].extent; 00427 if (bucketsize > 1 && arithmetic) { 00428 vec_strd = vector->dimension[0].stride_mult / bucketsize; 00429 } else { 00430 vec_strd = vector->dimension[0].stride_mult; 00431 } 00432 00433 #ifdef _UNICOS 00434 #pragma _CRI shortloop 00435 #endif 00436 for (i = 0; i < rank; i++) { 00437 msk_ext[i] = mask->dimension[i].extent; 00438 msk_strd[i] = mask->dimension[i].stride_mult; 00439 #ifdef _CRAYMPP 00440 if (mask_el_len == 64 && sizeof(iptr4[0]) == 4) 00441 msk_strd[i] <<= 1; 00442 #endif 00443 res_ext[i] = result->dimension[i].extent; 00444 if (bucketsize > 1 && arithmetic) { 00445 res_strd[i] = result->dimension[i].stride_mult / bucketsize; 00446 } else { 00447 res_strd[i] = result->dimension[i].stride_mult; 00448 } 00449 } 00450 00451 if (field->n_dim > 0) { 00452 #ifdef _UNICOS 00453 #pragma _CRI shortloop 00454 #endif 00455 for (i = 0; i < rank; i++) { 00456 fld_ext[i] = field->dimension[i].extent; 00457 if (bucketsize > 1 && arithmetic) { 00458 fld_strd[i] = field->dimension[i].stride_mult / bucketsize; 00459 } else { 00460 fld_strd[i] = field->dimension[i].stride_mult; 00461 } 00462 } 00463 } else { 00464 #ifdef _UNICOS 00465 #pragma _CRI shortloop 00466 #endif 00467 for (i = 0; i < rank; i++) { 00468 fld_ext[i] = 0; 00469 fld_strd[i] = 0; 00470 } 00471 } 00472 00473 /* Calculate total number of elements to move */ 00474 00475 tot_ext = 1; 00476 #ifdef _UNICOS 00477 #pragma _CRI novector 00478 #endif 00479 for (i = 0; i < rank; i++) 00480 tot_ext *= msk_ext[i]; 00481 00482 /* 00483 * The program is divided up into three blocks. The first block deals 00484 * with arrays of rank 1. Inside each block, the data types are broken 00485 * up into groups based on container size. Integer, real, and logical 00486 * types are all one word, and the actual value is not used, so they 00487 * are all grouped together and treated as long. The same is true 00488 * for double and complex, as well as ascii and derivedbyte. 00489 * 00490 * For each group, the mask array is checked for a true value. When 00491 * one is encountered, the next value from the vector array is put into 00492 * the result array. If the mask is false, the corresponding element of 00493 * the field array is put into the result. 00494 */ 00495 00496 if (rank == 1) { 00497 iptr4 = (_f_mask *) mptr; 00498 switch (subtype) { 00499 case DVSUBTYPE_BIT64 : 00500 uptr1 = (_f_int8 *) vptr; 00501 uptr2 = (_f_int8 *) fptr; 00502 uptr3 = (_f_int8 *) rptr; 00503 rindx = 0; 00504 mindx = 0; 00505 vindx = 0; 00506 findx = 0; 00507 for (i = 0; i < tot_ext; i++) { 00508 if (LTOB(mask_el_len, &iptr4[mindx])) { 00509 if (vec_ext-- > 0) { 00510 uptr3[rindx] = uptr1[vindx]; 00511 vindx += vec_strd; /* use vector */ 00512 } else { 00513 _lerror (_LELVL_ABORT, FEVECUNP); 00514 } 00515 } else { 00516 findx = i * fld_strd[0]; 00517 uptr3[rindx] = uptr2[findx]; /* use field */ 00518 } 00519 rindx += res_strd[0]; 00520 mindx += msk_strd[0]; 00521 } 00522 break; 00523 00524 case DVSUBTYPE_BIT32 : 00525 hptr1 = (_f_int4 *) vptr; 00526 hptr2 = (_f_int4 *) fptr; 00527 hptr3 = (_f_int4 *) rptr; 00528 rindx = 0; 00529 mindx = 0; 00530 vindx = 0; 00531 findx = 0; 00532 for (i = 0; i < tot_ext; i++) { 00533 if (LTOB(mask_el_len, &iptr4[mindx])) { 00534 if (vec_ext-- > 0) { 00535 hptr3[rindx] = hptr1[vindx]; 00536 vindx += vec_strd; 00537 } else { 00538 _lerror (_LELVL_ABORT, FEVECUNP); 00539 } 00540 } else { 00541 findx = i * fld_strd[0]; 00542 hptr3[rindx] = hptr2[findx]; 00543 } 00544 rindx += res_strd[0]; 00545 mindx += msk_strd[0]; 00546 } 00547 break; 00548 00549 case DVSUBTYPE_BIT128 : 00550 dptr1 = (_f_real16 *) vptr; 00551 dptr2 = (_f_real16 *) fptr; 00552 dptr3 = (_f_real16 *) rptr; 00553 rindx = 0; 00554 mindx = 0; 00555 vindx = 0; 00556 findx = 0; 00557 for (i = 0; i < tot_ext; i++) { 00558 if (LTOB(mask_el_len, &iptr4[mindx])) { 00559 if (vec_ext-- > 0) { 00560 dptr3[rindx] = dptr1[vindx]; 00561 vindx += vec_strd; 00562 } else { 00563 _lerror (_LELVL_ABORT, FEVECUNP); 00564 } 00565 } else { 00566 findx = i * fld_strd[0]; 00567 dptr3[rindx] = dptr2[findx]; 00568 } 00569 rindx += res_strd[0]; 00570 mindx += msk_strd[0]; 00571 } 00572 break; 00573 00574 case DVSUBTYPE_CHAR : 00575 rindx = 0; 00576 mindx = 0; 00577 vindx = 0; 00578 findx = 0; 00579 for (i = 0; i < tot_ext; i++) { 00580 cptr3 = (char *) cr + rindx; 00581 if (LTOB(mask_el_len, &iptr4[mindx])) { 00582 if (vec_ext-- > 0) { 00583 cptr1 = (char *) cv + vindx; 00584 (void) memcpy (cptr3, cptr1, bucketsize); 00585 vindx += vec_strd; 00586 } else { 00587 _lerror (_LELVL_ABORT, FEVECUNP); 00588 } 00589 } else { 00590 findx = i * fld_strd[0]; 00591 cptr2 = (char *) cf + findx; 00592 (void) memcpy (cptr3, cptr2, bucketsize); 00593 } 00594 rindx += res_strd[0]; 00595 mindx += msk_strd[0]; 00596 } 00597 break; 00598 00599 case DVSUBTYPE_DERIVED : 00600 fptr1 = (_f_int *) vptr; 00601 fptr2 = (_f_int *) fptr; 00602 fptr3 = (_f_int *) rptr; 00603 /* 00604 * For this type, the assumption was made that the extent size would 00605 * more often than not be larger than the word size of the structure. 00606 * Therefore, an outer loop was added for the container size. This 00607 * will make the extent the inner loop, which should help optimization. 00608 */ 00609 for (i = 0; i < bucketsize; i++) { 00610 rindx = i; 00611 mindx = 0; 00612 vindx = i; 00613 vec_ext = vector->dimension[0].extent; 00614 for (j = 0; j < tot_ext; j++) { 00615 if (LTOB(mask_el_len, &iptr4[mindx])) { 00616 if (vec_ext-- > 0) { 00617 fptr3[rindx] = fptr1[vindx]; 00618 vindx += vec_strd; 00619 } else { 00620 _lerror (_LELVL_ABORT, FEVECUNP); 00621 } 00622 } else { 00623 findx = (j * fld_strd[0]) + i; 00624 fptr3[rindx] = fptr2[findx]; 00625 } 00626 rindx += res_strd[0]; 00627 mindx += msk_strd[0]; 00628 } 00629 } 00630 break; 00631 00632 #ifdef _F_COMP16 00633 case DVSUBTYPE_BIT256 : 00634 xptr1 = (dblcmplx *) vptr; 00635 xptr2 = (dblcmplx *) fptr; 00636 xptr3 = (dblcmplx *) rptr; 00637 rindx = 0; 00638 mindx = 0; 00639 vindx = 0; 00640 findx = 0; 00641 for (i = 0; i < tot_ext; i++) { 00642 if (LTOB(mask_el_len, &iptr4[mindx])) { 00643 if (vec_ext-- > 0) { 00644 xptr3[rindx].re = xptr1[vindx].re; 00645 xptr3[rindx].im = xptr1[vindx].im; 00646 vindx += vec_strd; 00647 } else { 00648 _lerror (_LELVL_ABORT, FEVECUNP); 00649 } 00650 } else { 00651 findx = i * fld_strd[0]; 00652 xptr3[rindx].re = xptr2[findx].re; 00653 xptr3[rindx].im = xptr2[findx].im; 00654 } 00655 rindx += res_strd[0]; 00656 mindx += msk_strd[0]; 00657 } 00658 break; 00659 #endif 00660 00661 default : 00662 _lerror (_LELVL_ABORT, FEINTDTY); 00663 } 00664 00665 } else if (rank == 2) { 00666 00667 /* 00668 * Rank two matrices are handled in a manner similar to rank one arrays, 00669 * but differ in that they are nested with the first index being the 00670 * inner loop. Scalar variables are set to the offsets of the second 00671 * dimension, and these are used for each iteration of the inner loop. 00672 */ 00673 00674 iptr4 = (_f_mask *) mptr; 00675 switch (subtype) { 00676 case DVSUBTYPE_BIT64 : 00677 uptr1 = (_f_int8 *) vptr; 00678 uptr2 = (_f_int8 *) fptr; 00679 uptr3 = (_f_int8 *) rptr; 00680 vindx = 0; 00681 for (i = 0; i < msk_ext[1]; i++) { 00682 indx2_msk = i * msk_strd[1]; 00683 indx2_fld = i * fld_strd[1]; 00684 indx2_res = i * res_strd[1]; 00685 indx1_msk = 0; 00686 indx1_res = 0; 00687 for (j = 0; j < msk_ext[0]; j++) { 00688 mindx = indx1_msk + indx2_msk; 00689 rindx = indx1_res + indx2_res; 00690 if (LTOB(mask_el_len, &iptr4[mindx])) { 00691 if (vec_ext-- > 0) { 00692 uptr3[rindx] = uptr1[vindx]; 00693 vindx += vec_strd; 00694 } else { 00695 _lerror (_LELVL_ABORT, FEVECUNP); 00696 } 00697 } else { 00698 findx = indx2_fld + (j * fld_strd[0]); 00699 uptr3[rindx] = uptr2[findx]; 00700 } 00701 indx1_msk += msk_strd[0]; 00702 indx1_res += res_strd[0]; 00703 } 00704 } 00705 break; 00706 00707 case DVSUBTYPE_BIT32 : 00708 hptr1 = (_f_int4 *) vptr; 00709 hptr2 = (_f_int4 *) fptr; 00710 hptr3 = (_f_int4 *) rptr; 00711 vindx = 0; 00712 for (i = 0; i < msk_ext[1]; i++) { 00713 indx2_msk = i * msk_strd[1]; 00714 indx2_fld = i * fld_strd[1]; 00715 indx2_res = i * res_strd[1]; 00716 indx1_msk = 0; 00717 indx1_res = 0; 00718 for (j = 0; j < msk_ext[0]; j++) { 00719 mindx = indx1_msk + indx2_msk; 00720 rindx = indx1_res + indx2_res; 00721 if (LTOB(mask_el_len, &iptr4[mindx])) { 00722 if (vec_ext-- > 0) { 00723 hptr3[rindx] = hptr1[vindx]; 00724 vindx += vec_strd; 00725 } else { 00726 _lerror (_LELVL_ABORT, FEVECUNP); 00727 } 00728 } else { 00729 findx = indx2_fld + (j * fld_strd[0]); 00730 hptr3[rindx] = hptr2[findx]; 00731 } 00732 indx1_msk += msk_strd[0]; 00733 indx1_res += res_strd[0]; 00734 } 00735 } 00736 break; 00737 00738 case DVSUBTYPE_BIT128 : 00739 dptr1 = (_f_real16 *) vptr; 00740 dptr2 = (_f_real16 *) fptr; 00741 dptr3 = (_f_real16 *) rptr; 00742 vindx = 0; 00743 for (i = 0; i < msk_ext[1]; i++) { 00744 indx2_msk = i * msk_strd[1]; 00745 indx2_fld = i * fld_strd[1]; 00746 indx2_res = i * res_strd[1]; 00747 indx1_msk = 0; 00748 indx1_res = 0; 00749 for (j = 0; j < msk_ext[0]; j++) { 00750 mindx = indx1_msk + indx2_msk; 00751 rindx = indx1_res + indx2_res; 00752 if (LTOB(mask_el_len, &iptr4[mindx])) { 00753 if (vec_ext-- > 0) { 00754 dptr3[rindx] = dptr1[vindx]; 00755 vindx += vec_strd; 00756 } else { 00757 _lerror (_LELVL_ABORT, FEVECUNP); 00758 } 00759 } else { 00760 findx = indx2_fld + (j * fld_strd[0]); 00761 dptr3[rindx] = dptr2[findx]; 00762 } 00763 indx1_msk += msk_strd[0]; 00764 indx1_res += res_strd[0]; 00765 } 00766 } 00767 break; 00768 00769 case DVSUBTYPE_CHAR : 00770 vindx = 0; 00771 for (i = 0; i < msk_ext[1]; i++) { 00772 indx2_msk = i * msk_strd[1]; 00773 indx2_fld = i * fld_strd[1]; 00774 indx2_res = i * res_strd[1]; 00775 indx1_msk = 0; 00776 indx1_res = 0; 00777 for (j = 0; j < msk_ext[0]; j++) { 00778 mindx = indx1_msk + indx2_msk; 00779 rindx = indx1_res + indx2_res; 00780 if (LTOB(mask_el_len, &iptr4[mindx])) { 00781 if (vec_ext-- > 0) { 00782 cptr1 = (char *) cv + vindx; 00783 cptr3 = (char *) cr + rindx; 00784 (void) memcpy (cptr3, cptr1, 00785 bucketsize); 00786 vindx += vec_strd; 00787 } else { 00788 _lerror (_LELVL_ABORT, FEVECUNP); 00789 } 00790 } else { 00791 findx = indx2_fld + (j * fld_strd[0]); 00792 cptr2 = (char *) cf + findx; 00793 cptr3 = (char *) cr + rindx; 00794 (void) memcpy (cptr3, cptr2, bucketsize); 00795 } 00796 indx1_msk += msk_strd[0]; 00797 indx1_res += res_strd[0]; 00798 } 00799 } 00800 break; 00801 00802 case DVSUBTYPE_DERIVED : 00803 fptr1 = (_f_int *) vptr; 00804 fptr2 = (_f_int *) fptr; 00805 fptr3 = (_f_int *) rptr; 00806 for (i = 0; i < bucketsize; i++) { 00807 vec_ext = vector->dimension[0].extent; 00808 vindx = i; 00809 for (j = 0; j < msk_ext[1]; j++) { 00810 indx1_msk = 0; 00811 indx1_res = i; 00812 indx2_msk = j * msk_strd[1]; 00813 indx2_fld = j * fld_strd[1] + i; 00814 indx2_res = j * res_strd[1]; 00815 for (k = 0; k < msk_ext[0]; k++) { 00816 mindx = indx1_msk + indx2_msk; 00817 rindx = indx1_res + indx2_res; 00818 if (LTOB(mask_el_len, &iptr4[mindx])) { 00819 if (vec_ext-- > 0) { 00820 fptr3[rindx] = fptr1[vindx]; 00821 vindx += vec_strd; 00822 } else { 00823 _lerror (_LELVL_ABORT, FEVECUNP); 00824 } 00825 } else { 00826 findx = indx2_fld + (k * fld_strd[0]); 00827 fptr3[rindx] = fptr2[findx]; 00828 } 00829 indx1_msk += msk_strd[0]; 00830 indx1_res += res_strd[0]; 00831 } 00832 } 00833 } 00834 break; 00835 00836 #ifdef _F_COMP16 00837 case DVSUBTYPE_BIT256 : 00838 xptr1 = (dblcmplx *) vptr; 00839 xptr2 = (dblcmplx *) fptr; 00840 xptr3 = (dblcmplx *) rptr; 00841 vindx = 0; 00842 for (i = 0; i < msk_ext[1]; i++) { 00843 indx2_msk = i * msk_strd[1]; 00844 indx2_fld = i * fld_strd[1]; 00845 indx2_res = i * res_strd[1]; 00846 indx1_msk = 0; 00847 indx1_res = 0; 00848 for (j = 0; j < msk_ext[0]; j++) { 00849 mindx = indx1_msk + indx2_msk; 00850 rindx = indx1_res + indx2_res; 00851 if (LTOB(mask_el_len, &iptr4[mindx])) { 00852 if (vec_ext-- > 0) { 00853 xptr3[rindx].re = xptr1[vindx].re; 00854 xptr3[rindx].im = xptr1[vindx].im; 00855 vindx += vec_strd; 00856 } else { 00857 _lerror (_LELVL_ABORT, FEVECUNP); 00858 } 00859 } else { 00860 findx = indx2_fld + (j * fld_strd[0]); 00861 xptr3[rindx].re = xptr2[findx].re; 00862 xptr3[rindx].im = xptr2[findx].im; 00863 } 00864 indx1_msk += msk_strd[0]; 00865 indx1_res += res_strd[0]; 00866 } 00867 } 00868 break; 00869 #endif 00870 00871 default : 00872 _lerror (_LELVL_ABORT, FEINTDTY); 00873 } 00874 } else { 00875 00876 /* 00877 * Ranks 3-7 are all handled in this section. All of them are done as 00878 * a double nested loop, with the first dimension being the inner loop, 00879 * and the outer loop being the product of the remaining extents. 00880 * 00881 * Two macros are used in this section. INCREMENT determines the values 00882 * for each of the outer dimensions, as well as the offset for each 00883 * dimension. FIND_INDX calculates the sum total of all of these offsets. 00884 * 00885 * Calculate the product of the extents which will be used as the loop 00886 * terminator. Also initialize the offset variables. 00887 */ 00888 00889 #ifdef _UNICOS 00890 #pragma _CRI shortloop 00891 #endif 00892 for (i = 1, tot_ext = 1; i < rank; i++) { 00893 j = i - 1; 00894 tot_ext *= mask->dimension[i].extent; 00895 curdim[j] = 0; 00896 msk_off[j] = 0; 00897 fld_off[j] = 0; 00898 res_off[j] = 0; 00899 } 00900 00901 switch (subtype) { 00902 case DVSUBTYPE_BIT64 : 00903 uptr1 = (_f_int8 *) vptr; 00904 vindx = 0; 00905 for (i = 0; i < tot_ext; i++) { 00906 FIND_INDX(); 00907 uptr2 = (_f_int8 *) fptr + indx1_fld; 00908 uptr3 = (_f_int8 *) rptr + indx1_res; 00909 iptr4 = (_f_mask *) mptr + indx1_msk; 00910 for (j = 0; j < msk_ext[0]; j++) { 00911 mindx = j * msk_strd[0]; 00912 if (LTOB(mask_el_len, &iptr4[mindx])) { 00913 if (vec_ext-- > 0) { 00914 rindx = j * res_strd[0]; 00915 uptr3[rindx] = uptr1[vindx]; 00916 vindx += vec_strd; 00917 } else { 00918 _lerror (_LELVL_ABORT, FEVECUNP); 00919 } 00920 } else { 00921 findx = j * fld_strd[0]; 00922 rindx = j * res_strd[0]; 00923 uptr3[rindx] = uptr2[findx]; 00924 } 00925 } 00926 INCREMENT(); 00927 } 00928 break; 00929 00930 case DVSUBTYPE_BIT32 : 00931 hptr1 = (_f_int4 *) vptr; 00932 vindx = 0; 00933 for (i = 0; i < tot_ext; i++) { 00934 FIND_INDX(); 00935 hptr2 = (_f_int4 *) fptr + indx1_fld; 00936 hptr3 = (_f_int4 *) rptr + indx1_res; 00937 iptr4 = (_f_mask *) mptr + indx1_msk; 00938 for (j = 0; j < msk_ext[0]; j++) { 00939 mindx = j * msk_strd[0]; 00940 if (LTOB(mask_el_len, &iptr4[mindx])) { 00941 if (vec_ext-- > 0) { 00942 rindx = j * res_strd[0]; 00943 hptr3[rindx] = hptr1[vindx]; 00944 vindx += vec_strd; 00945 } else { 00946 _lerror (_LELVL_ABORT, FEVECUNP); 00947 } 00948 } else { 00949 findx = j * fld_strd[0]; 00950 rindx = j * res_strd[0]; 00951 hptr3[rindx] = hptr2[findx]; 00952 } 00953 } 00954 INCREMENT(); 00955 } 00956 break; 00957 00958 case DVSUBTYPE_BIT128 : 00959 dptr1 = (_f_real16 *) vptr; 00960 vindx = 0; 00961 for (i = 0; i < tot_ext; i++) { 00962 FIND_INDX(); 00963 dptr2 = (_f_real16 *) fptr + indx1_fld; 00964 dptr3 = (_f_real16 *) rptr + indx1_res; 00965 iptr4 = (_f_mask *) mptr + indx1_msk; 00966 for (j = 0; j < msk_ext[0]; j++) { 00967 mindx = j * msk_strd[0]; 00968 if (LTOB(mask_el_len, &iptr4[mindx])) { 00969 if (vec_ext-- > 0) { 00970 rindx = j * res_strd[0]; 00971 dptr3[rindx] = dptr1[vindx]; 00972 vindx += vec_strd; 00973 } else { 00974 _lerror (_LELVL_ABORT, FEVECUNP); 00975 } 00976 } else { 00977 findx = j * fld_strd[0]; 00978 rindx = j * res_strd[0]; 00979 dptr3[rindx] = dptr2[findx]; 00980 } 00981 } 00982 INCREMENT(); 00983 } 00984 break; 00985 00986 case DVSUBTYPE_CHAR : 00987 vindx = 0; 00988 for (i = 0; i < tot_ext; i++) { 00989 FIND_INDX(); 00990 iptr4 = (_f_mask *) mptr + indx1_msk; 00991 for (j = 0; j < msk_ext[0]; j++) { 00992 mindx = j * msk_strd[0]; 00993 rindx = j * res_strd[0]; 00994 cptr3 = (char *) cr + indx1_res + rindx; 00995 if (LTOB(mask_el_len, &iptr4[mindx])) { 00996 if (vec_ext-- > 0) { 00997 cptr1 = (char *) cv + vindx; 00998 (void) memcpy (cptr3, cptr1, 00999 bucketsize); 01000 vindx += vec_strd; 01001 } else { 01002 _lerror (_LELVL_ABORT, FEVECUNP); 01003 } 01004 } else { 01005 findx = j * fld_strd[0]; 01006 cptr2 = (char *) cf + indx1_fld + findx; 01007 (void) memcpy (cptr3, cptr2, bucketsize); 01008 } 01009 } 01010 INCREMENT(); 01011 } 01012 break; 01013 01014 case DVSUBTYPE_DERIVED : 01015 fptr1 = (_f_int *) vptr; 01016 for (i = 0; i < bucketsize; i++) { 01017 vec_ext = vector->dimension[0].extent; 01018 vindx = i; 01019 for (j = 0; j < MAXDIM; j++) { 01020 curdim[j] = 0; 01021 res_off[j] = 0; 01022 msk_off[j] = 0; 01023 fld_off[j] = 0; 01024 } 01025 for (j = 0; j < tot_ext; j++) { 01026 FIND_INDX(); 01027 fptr2 = (_f_int *) fptr + i + indx1_fld; 01028 fptr3 = (_f_int *) rptr + i + indx1_res; 01029 iptr4 = (_f_mask *) mptr + indx1_msk; 01030 for (k = 0; k < msk_ext[0]; k++) { 01031 mindx = k * msk_strd[0]; 01032 if (LTOB(mask_el_len, &iptr4[mindx])) { 01033 if (vec_ext-- > 0) { 01034 rindx = k * res_strd[0]; 01035 fptr3[rindx] = fptr1[vindx]; 01036 vindx += vec_strd; 01037 } else { 01038 _lerror (_LELVL_ABORT, FEVECUNP); 01039 } 01040 } else { 01041 findx = k * fld_strd[0]; 01042 rindx = k * res_strd[0]; 01043 fptr3[rindx] = fptr2[findx]; 01044 } 01045 } 01046 INCREMENT(); 01047 } 01048 } 01049 break; 01050 01051 01052 #ifdef _F_COMP16 01053 case DVSUBTYPE_BIT256 : 01054 xptr1 = (dblcmplx *) vptr; 01055 vindx = 0; 01056 for (i = 0; i < tot_ext; i++) { 01057 FIND_INDX(); 01058 xptr2 = (dblcmplx *) fptr + indx1_fld; 01059 xptr3 = (dblcmplx *) rptr + indx1_res; 01060 iptr4 = (_f_mask *) mptr + indx1_msk; 01061 for (j = 0; j < msk_ext[0]; j++) { 01062 mindx = j * msk_strd[0]; 01063 if (LTOB(mask_el_len, &iptr4[mindx])) { 01064 if (vec_ext-- > 0) { 01065 rindx = j * res_strd[0]; 01066 xptr3[rindx].re = xptr1[vindx].re; 01067 xptr3[rindx].im = xptr1[vindx].im; 01068 vindx += vec_strd; 01069 } else { 01070 _lerror (_LELVL_ABORT, FEVECUNP); 01071 } 01072 } else { 01073 findx = j * fld_strd[0]; 01074 rindx = j * res_strd[0]; 01075 xptr3[rindx].re = xptr2[findx].re; 01076 xptr3[rindx].im = xptr2[findx].im; 01077 } 01078 } 01079 INCREMENT(); 01080 } 01081 break; 01082 #endif 01083 01084 default : 01085 _lerror (_LELVL_ABORT, FEINTDTY); 01086 } 01087 } 01088 }