Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/pack.c 92.1 07/07/99 15:52:02" 00038 00039 #include <stddef.h> 00040 #include <liberrno.h> 00041 #include <cray/dopevec.h> 00042 #include <cray/portdefs.h> 00043 #include "arraydefs.h" 00044 00045 /* 00046 * Pack an array into an array of rank one under the control of a 00047 * mask. 00048 * 00049 * Calculate the indices which will be used in the outer loop 00050 * of the rank 3-7 block. Indices are calculated for the mask 00051 * and source matrices. 00052 */ 00053 00054 #define FIND_INDX() \ 00055 switch (rank) { \ 00056 case 3 : \ 00057 indx1_msk = msk_off[0] + msk_off[1]; \ 00058 indx1_src = src_off[0] + src_off[1]; \ 00059 break; \ 00060 case 4 : \ 00061 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2]; \ 00062 indx1_src = src_off[0] + src_off[1] + src_off[2]; \ 00063 break; \ 00064 case 5 : \ 00065 indx1_msk = msk_off[0] + msk_off[1] + \ 00066 msk_off[2] + msk_off[3]; \ 00067 indx1_src = src_off[0] + src_off[1] + \ 00068 src_off[2] + src_off[3]; \ 00069 break; \ 00070 case 6 : \ 00071 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] + \ 00072 msk_off[3] + msk_off[4]; \ 00073 indx1_src = src_off[0] + src_off[1] + src_off[2] + \ 00074 src_off[3] + src_off[4]; \ 00075 break; \ 00076 default : \ 00077 indx1_msk = msk_off[0] + msk_off[1] + msk_off[2] + \ 00078 msk_off[3] + msk_off[4] + msk_off[5]; \ 00079 indx1_src = src_off[0] + src_off[1] + src_off[2] + \ 00080 src_off[3] + src_off[4] + src_off[5]; \ 00081 } 00082 00083 /* 00084 * Increment the dimension counters used in the rank 3-7 block. 00085 * In addition, calculate the offsets for each of the 2-rank 00086 * dimensions. These offsets are used in the FIND_INDX macro 00087 * to calculate the index to be used for the mask and the source 00088 * matrices. 00089 */ 00090 00091 #define INCREMENT() \ 00092 curdim[0]++; \ 00093 if (curdim[0] < src_ext[1]) { \ 00094 msk_off[0] = curdim[0] * msk_strd[1]; \ 00095 src_off[0] = curdim[0] * src_strd[1]; \ 00096 } else { \ 00097 curdim[0] = 0; \ 00098 msk_off[0] = 0; \ 00099 src_off[0] = 0; \ 00100 curdim[1]++; \ 00101 if (curdim[1] < src_ext[2]) { \ 00102 msk_off[1] = curdim[1] * msk_strd[2]; \ 00103 src_off[1] = curdim[1] * src_strd[2]; \ 00104 } else { \ 00105 curdim[1] = 0; \ 00106 msk_off[1] = 0; \ 00107 src_off[1] = 0; \ 00108 curdim[2]++; \ 00109 if (curdim[2] < src_ext[3]) { \ 00110 msk_off[2] = curdim[2] * msk_strd[3]; \ 00111 src_off[2] = curdim[2] * src_strd[3]; \ 00112 } else { \ 00113 curdim[2] = 0; \ 00114 msk_off[2] = 0; \ 00115 src_off[2] = 0; \ 00116 curdim[3]++; \ 00117 if (curdim[3] < src_ext[4]) { \ 00118 msk_off[3] = curdim[3] * msk_strd[4]; \ 00119 src_off[3] = curdim[3] * src_strd[4]; \ 00120 } else { \ 00121 curdim[3] = 0; \ 00122 msk_off[3] = 0; \ 00123 src_off[3] = 0; \ 00124 curdim[4]++; \ 00125 if (curdim[4] < src_ext[5]) { \ 00126 msk_off[4] = curdim[4] * msk_strd[5]; \ 00127 src_off[4] = curdim[4] * src_strd[5]; \ 00128 } else { \ 00129 curdim[4] = 0; \ 00130 msk_off[4] = 0; \ 00131 src_off[4] = 0; \ 00132 curdim[5]++; \ 00133 if (curdim[5] < src_ext[6]) { \ 00134 msk_off[5] = curdim[5] * msk_strd[6]; \ 00135 src_off[5] = curdim[5] * src_strd[6]; \ 00136 } \ 00137 } \ 00138 } \ 00139 } \ 00140 } \ 00141 } 00142 00143 #ifdef _UNICOS 00144 #pragma _CRI duplicate _PACK as PACK@ 00145 #endif 00146 void 00147 _PACK ( DopeVectorType * result, 00148 DopeVectorType * source, 00149 DopeVectorType * mask, 00150 DopeVectorType * vector) 00151 00152 { 00153 char *cs; /* char ptr to source array */ 00154 char *cr; /* char ptr to result array */ 00155 char *cv; /* char ptr to vector array */ 00156 char * restrict cptr1; /* char */ 00157 char * restrict cptr2; /* char */ 00158 char * restrict cptr3; /* char */ 00159 _f_int8 * restrict uptr1; /* 64-bit */ 00160 _f_int8 * restrict uptr2; /* 64-bit */ 00161 _f_int8 * restrict uptr3; /* 64-bit */ 00162 _f_int * restrict fptr1; /* default-size */ 00163 _f_int * restrict fptr2; /* default-size */ 00164 _f_int * restrict fptr3; /* default-size */ 00165 _f_real16 * restrict dptr1; /* 128-bit */ 00166 _f_real16 * restrict dptr2; /* 128-bit */ 00167 _f_real16 * restrict dptr3; /* 128-bit */ 00168 #ifdef _F_COMP16 00169 dblcmplx * restrict xptr1; /* 256-bit */ 00170 dblcmplx * restrict xptr2; /* 256-bit */ 00171 dblcmplx * restrict xptr3; /* 256-bit */ 00172 #endif 00173 _f_int4 * restrict hptr1; /* 32-bit */ 00174 _f_int4 * restrict hptr2; /* 32-bit */ 00175 _f_int4 * restrict hptr3; /* 32-bit */ 00176 _f_mask * restrict iptr4; /* def kind mask */ 00177 void * restrict sptr; /* ptr to source */ 00178 void * restrict rptr; /* ptr to result */ 00179 void * restrict mptr; /* ptr to mask */ 00180 void * restrict vptr; /* ptr to vector */ 00181 _f_int bucketsize; /* size of each data element */ 00182 long nbytes; /* # of bytes in data array */ 00183 long nwords; /* # of words in data array */ 00184 long curdim[MAXDIM]; /* current indices */ 00185 _f_int bytealligned; /* byte alligned flag */ 00186 long sindx; /* source index */ 00187 long rindx; /* result index */ 00188 long mindx; /* mask index */ 00189 long vindx; /* vector index */ 00190 _f_int type; /* type scalar */ 00191 _f_int subtype; /* sub-type */ 00192 _f_int arithmetic; /* arithmetic */ 00193 _f_int rank; /* dimension of source scalar */ 00194 long i, j, k; /* index variables */ 00195 long res_strd; /* element stride for result */ 00196 long vec_strd; /* element stride for result */ 00197 long src_ext[MAXDIM]; /* extents for source */ 00198 long src_strd[MAXDIM]; /* element stride for source */ 00199 long src_off[MAXDIM]; /* offset values for source */ 00200 long msk_strd[MAXDIM]; /* element stride for mask */ 00201 long msk_off[MAXDIM]; /* offset values for mask */ 00202 long indx1_src; /* index for dim 1 of source */ 00203 long indx2_src; /* index for dim 2 of source */ 00204 long indx1_vec; /* index for dim 1 of vector */ 00205 long indx2_vec; /* index for dim 2 of vector */ 00206 long indx1_res; /* index for dim 1 of result */ 00207 long indx2_res; /* index for dim 2 of result */ 00208 long indx1_msk; /* index for dim 1 of msk */ 00209 long indx2_msk; /* index for dim 2 of msk */ 00210 long total_ext; /* total extent counter */ 00211 long src_ext1; /* extent for dim 1 of source */ 00212 long src_ext2; /* extent for dim 1 of source */ 00213 long found; /* count of # entries in result */ 00214 long mask_el_len; 00215 _f_int early_exit; /* early exit flag */ 00216 00217 /* Set type and dimension global variables */ 00218 00219 type = source->type_lens.type; 00220 rank = source->n_dim; 00221 mask_el_len = mask->base_addr.a.el_len; 00222 00223 /* 00224 * Check to see if any of the matrices have size 0. If any do, 00225 * return without doing anything. 00226 */ 00227 00228 early_exit = 0; 00229 #ifdef _UNICOS 00230 #pragma _CRI shortloop 00231 #endif 00232 for (i = 0; i < rank; i++) { 00233 if (!source->dimension[i].extent) 00234 early_exit = 1; 00235 } 00236 if (result->assoc) { 00237 if (!result->dimension[0].extent) 00238 early_exit = 1; 00239 } 00240 if (vector) { 00241 if (!vector->dimension[0].extent) 00242 early_exit = 1; 00243 } 00244 if (mask) { 00245 if (mask->n_dim > 1) { 00246 #ifdef _UNICOS 00247 #pragma _CRI shortloop 00248 #endif 00249 for (i = 0; i < rank; i++) 00250 if (!mask->dimension[i].extent) 00251 early_exit = 1; 00252 } 00253 } 00254 00255 /* 00256 * Initialize every array element to 0. 00257 */ 00258 00259 #ifdef _UNICOS 00260 #pragma _CRI shortloop 00261 #endif 00262 for (i = 0; i < MAXDIM; i++) { 00263 curdim[i] = 0; 00264 src_ext[i] = 0; 00265 src_strd[i] = 0; 00266 src_off[i] = 0; 00267 msk_strd[i] = 0; 00268 msk_off[i] = 0; 00269 } 00270 00271 /* Size calculation is based on variable type */ 00272 00273 switch (type) { 00274 case DVTYPE_ASCII : 00275 bytealligned = 1; 00276 bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */ 00277 subtype = DVSUBTYPE_CHAR; 00278 arithmetic = 0; 00279 break; 00280 case DVTYPE_DERIVEDBYTE : 00281 bytealligned = 1; 00282 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE; 00283 subtype = DVSUBTYPE_CHAR; 00284 arithmetic = 0; 00285 break; 00286 case DVTYPE_DERIVEDWORD : 00287 bytealligned = 0; 00288 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD; 00289 subtype = DVSUBTYPE_DERIVED; 00290 arithmetic = 0; 00291 break; 00292 default : 00293 bytealligned = 0; 00294 bucketsize = source->type_lens.int_len / BITS_PER_WORD; 00295 if (source->type_lens.int_len == 64) { 00296 subtype = DVSUBTYPE_BIT64; 00297 } else if (source->type_lens.int_len == 32) { 00298 subtype = DVSUBTYPE_BIT32; 00299 bucketsize = 1; 00300 } else if (source->type_lens.int_len == 256) { 00301 subtype = DVSUBTYPE_BIT256; 00302 } else { 00303 subtype = DVSUBTYPE_BIT128; 00304 } 00305 arithmetic = 1; 00306 } 00307 00308 /* If necessary, fill result dope vector */ 00309 00310 if (!result->assoc) { 00311 result->base_addr.a.ptr = (void *) NULL; 00312 result->orig_base = 0; 00313 result->orig_size = 0; 00314 00315 /* Determine size of space to allocate */ 00316 00317 if (!bytealligned) { 00318 nbytes = bucketsize * BYTES_PER_WORD; 00319 #ifdef _CRAYMPP 00320 if (subtype == DVSUBTYPE_BIT32) 00321 nbytes /= 2; 00322 #endif 00323 } else { 00324 nbytes = bucketsize; 00325 } 00326 if (vector) { 00327 nbytes *= vector->dimension[0].extent; 00328 nwords = vector->dimension[0].extent; 00329 } else { 00330 #ifdef _UNICOS 00331 #pragma _CRI shortloop 00332 #endif 00333 for (i = 0; i < rank; i++) 00334 nbytes *= source->dimension[i].extent; 00335 nwords = nbytes / BYTES_PER_WORD; 00336 } 00337 if (nbytes > 0) { 00338 result->base_addr.a.ptr = (void *) malloc (nbytes); 00339 if (result->base_addr.a.ptr == NULL) 00340 _lerror (_LELVL_ABORT, FENOMEMY); 00341 } 00342 00343 result->assoc = 1; 00344 result->base_addr.a.el_len = source->base_addr.a.el_len; 00345 if (type == DVTYPE_ASCII) { 00346 cr = (char *) result->base_addr.a.ptr; 00347 result->base_addr.charptr = _cptofcd (cr, bucketsize); 00348 } 00349 result->orig_size = nbytes * BITS_PER_BYTE; 00350 00351 /* 00352 * These are initial values which may be changed when it is 00353 * determined how big the result array actually is. 00354 */ 00355 result->dimension[0].low_bound = 1; 00356 result->dimension[0].extent = nwords; 00357 result->dimension[0].stride_mult = bucketsize; 00358 00359 /* if result array is already allocated */ 00360 00361 } else { 00362 if (!bytealligned) 00363 nbytes = bucketsize * BYTES_PER_WORD; 00364 else 00365 nbytes = bucketsize; 00366 if (vector) { 00367 nbytes *= vector->dimension[0].extent; 00368 nwords = vector->dimension[0].extent; 00369 } else { 00370 nwords = 1; 00371 for (i = 0; i < rank; i++) { 00372 nbytes *= source->dimension[i].extent; 00373 nwords *= source->dimension[i].extent; 00374 } 00375 } 00376 } 00377 00378 /* If early exit is required, exit now */ 00379 00380 if (early_exit) 00381 return; 00382 if (mask) { 00383 iptr4 = (_f_mask *) mask->base_addr.a.ptr; 00384 if (mask->n_dim == 0 && !(vector) && 00385 !LTOB(mask_el_len, &iptr4[0])) { 00386 result->dimension[0].extent = 0; 00387 return; 00388 } 00389 } 00390 00391 /* Set up scalar pointers to all of the argument data areas */ 00392 00393 if (mask) 00394 mptr = (void *) mask->base_addr.a.ptr; 00395 if (!bytealligned) { 00396 sptr = (void *) source->base_addr.a.ptr; 00397 rptr = (void *) result->base_addr.a.ptr; 00398 if (vector) 00399 vptr = (void *) vector->base_addr.a.ptr; 00400 } else { 00401 if (type == DVTYPE_ASCII) { 00402 cs = _fcdtocp (source->base_addr.charptr); 00403 cr = _fcdtocp (result->base_addr.charptr); 00404 if (vector) 00405 cv = _fcdtocp (vector->base_addr.charptr); 00406 } else { 00407 cs = (char *) source->base_addr.a.ptr; 00408 cr = (char *) result->base_addr.a.ptr; 00409 if (vector) 00410 cv = (char *) vector->base_addr.a.ptr; 00411 } 00412 } 00413 00414 /* Set up some 'shortcut' variables used for index calculation */ 00415 00416 if (bucketsize > 1 && arithmetic) { 00417 res_strd = result->dimension[0].stride_mult / bucketsize; 00418 if (vector) 00419 vec_strd = vector->dimension[0].stride_mult / bucketsize; 00420 } else { 00421 res_strd = result->dimension[0].stride_mult; 00422 if (vector) 00423 vec_strd = vector->dimension[0].stride_mult; 00424 } 00425 00426 #ifdef _UNICOS 00427 #pragma _CRI shortloop 00428 #endif 00429 for (i = 0; i < rank; i++) { 00430 src_ext[i] = source->dimension[i].extent; 00431 if (bucketsize > 1 && arithmetic) { 00432 src_strd[i] = source->dimension[i].stride_mult / bucketsize; 00433 } else { 00434 src_strd[i] = source->dimension[i].stride_mult; 00435 } 00436 } 00437 if (mask->n_dim > 0) { 00438 #ifdef _UNICOS 00439 #pragma _CRI shortloop 00440 #endif 00441 for (i = 0; i < rank; i++) { 00442 msk_strd[i] = mask->dimension[i].stride_mult; 00443 iptr4 = (_f_mask *) mptr; 00444 #ifdef _CRAYMPP 00445 if (mask_el_len == 64 && sizeof (iptr4[0]) == 4) 00446 msk_strd[i] <<= 1; 00447 #endif 00448 } 00449 } 00450 00451 /* 00452 * The program is divided up into three blocks. The first block deals 00453 * with arrays of rank 1. Inside each block, the data types are broken 00454 * up into groups based on container size. Integer, real, and logical 00455 * types are all one word, and the actual value is not used, so they 00456 * are all grouped together and treated as long. The same is 00457 * true for double and complex, as well as ascii and derivedbyte. 00458 * 00459 * For each group, the mask array is checked for true values. When one 00460 * is encountered, the corresponding value from the source array is put 00461 * into the next available position in the result array. If no vector 00462 * is passed, the routine is finished at this point with the result 00463 * array length set to the number of true elements in the mask. If a 00464 * vector is furnished, the size of the vector determines the size of 00465 * the result array. If this size has been reached, the routine is done. 00466 * If not, elements from the vector array are put into the result array 00467 * until it is full. 00468 */ 00469 00470 if (rank == 1) { 00471 found = 0; 00472 iptr4 = (_f_mask *) mptr; 00473 switch (subtype) { 00474 case DVSUBTYPE_BIT64 : 00475 uptr1 = (_f_int8 *) sptr; 00476 uptr2 = (_f_int8 *) vptr; 00477 uptr3 = (_f_int8 *) rptr; 00478 rindx = 0; 00479 mindx = 0; 00480 vindx = 0; 00481 sindx = 0; 00482 src_ext1 = source->dimension[0].extent; 00483 for (i = 0; i < src_ext1; i++) { 00484 if (LTOB(mask_el_len, &iptr4[mindx])) { 00485 sindx = i * src_strd[0]; 00486 uptr3[rindx] = uptr1[sindx]; 00487 rindx += res_strd; 00488 found++; 00489 } 00490 mindx += msk_strd[0]; 00491 } 00492 if (!vector || found == nwords) { 00493 result->dimension[0].extent = found; 00494 } else { 00495 vindx = found * vec_strd; 00496 for ( ; found < nwords; found++) { 00497 uptr3[rindx] = uptr2[vindx]; 00498 rindx += res_strd; 00499 vindx += vec_strd; 00500 } 00501 } 00502 break; 00503 00504 case DVSUBTYPE_BIT32 : 00505 hptr1 = (_f_int4 *) sptr; 00506 hptr2 = (_f_int4 *) vptr; 00507 hptr3 = (_f_int4 *) rptr; 00508 rindx = 0; 00509 mindx = 0; 00510 vindx = 0; 00511 sindx = 0; 00512 src_ext1 = source->dimension[0].extent; 00513 for (i = 0; i < src_ext1; i++) { 00514 if (LTOB(mask_el_len, &iptr4[mindx])) { 00515 sindx = i * src_strd[0]; 00516 hptr3[rindx] = hptr1[sindx]; 00517 rindx += res_strd; 00518 found++; 00519 } 00520 mindx += msk_strd[0]; 00521 } 00522 if (!vector || found == nwords) { 00523 result->dimension[0].extent = found; 00524 } else { 00525 vindx = found * vec_strd; 00526 for ( ; found < nwords; found++) { 00527 hptr3[rindx] = hptr2[vindx]; 00528 rindx += res_strd; 00529 vindx += vec_strd; 00530 } 00531 } 00532 break; 00533 00534 case DVSUBTYPE_BIT128 : 00535 dptr1 = (_f_real16 *) sptr; 00536 dptr2 = (_f_real16 *) vptr; 00537 dptr3 = (_f_real16 *) rptr; 00538 rindx = 0; 00539 mindx = 0; 00540 vindx = 0; 00541 sindx = 0; 00542 src_ext1 = source->dimension[0].extent; 00543 for (i = 0; i < src_ext1; i++) { 00544 if (LTOB(mask_el_len, &iptr4[mindx])) { 00545 sindx = i * src_strd[0]; 00546 dptr3[rindx] = dptr1[sindx]; 00547 rindx += res_strd; 00548 found++; 00549 } 00550 mindx += msk_strd[0]; 00551 } 00552 if (!vector || found == nwords) { 00553 result->dimension[0].extent = found; 00554 } else { 00555 vindx = found * vec_strd; 00556 for ( ; found < nwords; found++) { 00557 dptr3[rindx] = dptr2[vindx]; 00558 rindx += res_strd; 00559 vindx += vec_strd; 00560 } 00561 } 00562 break; 00563 00564 case DVSUBTYPE_CHAR : 00565 rindx = 0; 00566 mindx = 0; 00567 vindx = 0; 00568 sindx = 0; 00569 src_ext1 = source->dimension[0].extent; 00570 for (i = 0; i < src_ext1; i++) { 00571 if (LTOB(mask_el_len, &iptr4[mindx])) { 00572 cptr3 = (char *) cr + rindx; 00573 cptr1 = (char *) cs + (i * src_strd[0]); 00574 (void) memcpy (cptr3, cptr1, bucketsize); 00575 rindx += res_strd; 00576 found++; 00577 } 00578 mindx += msk_strd[0]; 00579 } 00580 if (!vector || found == nwords) { 00581 result->dimension[0].extent = found; 00582 } else { 00583 vindx = found * vec_strd; 00584 for ( ; found < nwords; found++) { 00585 cptr3 = (char *) cr + rindx; 00586 cptr2 = (char *) cv + vindx; 00587 (void) memcpy (cptr3, cptr2, bucketsize); 00588 rindx += res_strd; 00589 vindx += vec_strd; 00590 } 00591 } 00592 break; 00593 00594 case DVSUBTYPE_DERIVED : 00595 fptr1 = (_f_int *) sptr; 00596 fptr2 = (_f_int *) vptr; 00597 fptr3 = (_f_int *) rptr; 00598 src_ext1 = source->dimension[0].extent; 00599 indx1_res = 0; 00600 /* 00601 * The derived word type is handled the same as the other types except 00602 * that another loop is added. The assumption was made that extent of 00603 * the array would be larger than the number of words in the derived 00604 * type. Therefore, to try and make this routine optimal, the first 00605 * loop uses the extent as its inner loop, which should provide better 00606 * optimization. The second loop is also done this way. 00607 */ 00608 for (i = 0; i < bucketsize; i++) { 00609 rindx = i; 00610 mindx = 0; 00611 for (j = 0; j < src_ext1; j++) { 00612 if (LTOB(mask_el_len, &iptr4[mindx])) { 00613 sindx = i + (j * src_strd[0]); 00614 fptr3[rindx] = fptr1[sindx]; 00615 rindx += res_strd; 00616 if (i == 0) { 00617 indx1_res = rindx; 00618 found++; 00619 } 00620 } 00621 mindx += msk_strd[0]; 00622 } 00623 } 00624 if (!vector || found == nwords) { 00625 result->dimension[0].extent = found; 00626 } else { 00627 indx1_vec = found * vec_strd; 00628 found = nwords - found; 00629 for (i = 0; i < bucketsize; i++) { 00630 rindx = indx1_res + i; 00631 vindx = indx1_vec + i; 00632 for (j = 0; j < found; j++) { 00633 fptr3[rindx] = fptr2[vindx]; 00634 rindx += res_strd; 00635 vindx += vec_strd; 00636 } 00637 } 00638 } 00639 break; 00640 00641 #ifdef _F_COMP16 00642 case DVSUBTYPE_BIT256 : 00643 xptr1 = (dblcmplx *) sptr; 00644 xptr2 = (dblcmplx *) vptr; 00645 xptr3 = (dblcmplx *) rptr; 00646 rindx = 0; 00647 mindx = 0; 00648 vindx = 0; 00649 sindx = 0; 00650 src_ext1 = source->dimension[0].extent; 00651 for (i = 0; i < src_ext1; i++) { 00652 if (LTOB(mask_el_len, &iptr4[mindx])) { 00653 sindx = i * src_strd[0]; 00654 xptr3[rindx].re = xptr1[sindx].re; 00655 xptr3[rindx].im = xptr1[sindx].im; 00656 rindx += res_strd; 00657 found++; 00658 } 00659 mindx += msk_strd[0]; 00660 } 00661 if (!vector || found == nwords) { 00662 result->dimension[0].extent = found; 00663 } else { 00664 vindx = found * vec_strd; 00665 for ( ; found < nwords; found++) { 00666 xptr3[rindx].re = xptr2[vindx].re; 00667 xptr3[rindx].im = xptr2[vindx].im; 00668 rindx += res_strd; 00669 vindx += vec_strd; 00670 } 00671 } 00672 break; 00673 #endif 00674 00675 default : 00676 _lerror (_LELVL_ABORT, FEINTDTY); 00677 } 00678 } else if (rank == 2) { 00679 00680 /* 00681 * Rank 2 matrices are handled in a manner similar to rank 1 arrays, 00682 * except that the first loop in each data type is a nested loop, with 00683 * the outer loop being the second dimension, and the inner loop being 00684 * the first. This preserves the storage order which is necessary for 00685 * pack to work. The second part of each block is not affected by the 00686 * number of dimensions in the source matrix. 00687 */ 00688 00689 found = 0; 00690 iptr4 = (_f_mask *) mptr; 00691 switch (subtype) { 00692 case DVSUBTYPE_BIT64 : 00693 uptr1 = (_f_int8 *) sptr; 00694 uptr2 = (_f_int8 *) vptr; 00695 uptr3 = (_f_int8 *) rptr; 00696 indx2_msk = 0; 00697 indx2_src = 0; 00698 rindx = 0; 00699 src_ext1 = src_ext[0]; 00700 src_ext2 = src_ext[1]; 00701 for (i = 0; i < src_ext2; i++) { 00702 indx1_msk = 0; 00703 for (j = 0; j < src_ext1; j++) { 00704 mindx = indx1_msk + indx2_msk; 00705 if (LTOB(mask_el_len, &iptr4[mindx])) { 00706 sindx = indx2_src + (j * src_strd[0]); 00707 uptr3[rindx] = uptr1[sindx]; 00708 rindx += res_strd; 00709 found++; 00710 } 00711 indx1_msk += msk_strd[0]; 00712 } 00713 indx2_msk += msk_strd[1]; 00714 indx2_src += src_strd[1]; 00715 } 00716 if (!vector || found == nwords) { 00717 result->dimension[0].extent = found; 00718 } else { 00719 vindx = found * vec_strd; 00720 for ( ; found < nwords; found++) { 00721 uptr3[rindx] = uptr2[vindx]; 00722 rindx += res_strd; 00723 vindx += vec_strd; 00724 } 00725 } 00726 break; 00727 00728 case DVSUBTYPE_BIT32 : 00729 hptr1 = (_f_int4 *) sptr; 00730 hptr2 = (_f_int4 *) vptr; 00731 hptr3 = (_f_int4 *) rptr; 00732 indx2_msk = 0; 00733 indx2_src = 0; 00734 rindx = 0; 00735 src_ext1 = src_ext[0]; 00736 src_ext2 = src_ext[1]; 00737 for (i = 0; i < src_ext2; i++) { 00738 indx1_msk = 0; 00739 for (j = 0; j < src_ext1; j++) { 00740 mindx = indx1_msk + indx2_msk; 00741 if (LTOB(mask_el_len, &iptr4[mindx])) { 00742 sindx = indx2_src + (j * src_strd[0]); 00743 hptr3[rindx] = hptr1[sindx]; 00744 rindx += res_strd; 00745 found++; 00746 } 00747 indx1_msk += msk_strd[0]; 00748 } 00749 indx2_msk += msk_strd[1]; 00750 indx2_src += src_strd[1]; 00751 } 00752 if (!vector || found == nwords) { 00753 result->dimension[0].extent = found; 00754 } else { 00755 vindx = found * vec_strd; 00756 for ( ; found < nwords; found++) { 00757 hptr3[rindx] = hptr2[vindx]; 00758 rindx += res_strd; 00759 vindx += vec_strd; 00760 } 00761 } 00762 break; 00763 00764 case DVSUBTYPE_BIT128 : 00765 dptr1 = (_f_real16 *) sptr; 00766 dptr2 = (_f_real16 *) vptr; 00767 dptr3 = (_f_real16 *) rptr; 00768 indx2_msk = 0; 00769 indx2_src = 0; 00770 rindx = 0; 00771 src_ext1 = src_ext[0]; 00772 src_ext2 = src_ext[1]; 00773 for (i = 0; i < src_ext2; i++) { 00774 indx1_msk = 0; 00775 for (j = 0; j < src_ext1; j++) { 00776 mindx = indx1_msk + indx2_msk; 00777 if (LTOB(mask_el_len, &iptr4[mindx])) { 00778 sindx = indx2_src + (j * src_strd[0]); 00779 dptr3[rindx] = dptr1[sindx]; 00780 rindx += res_strd; 00781 found++; 00782 } 00783 indx1_msk += msk_strd[0]; 00784 } 00785 indx2_msk += msk_strd[1]; 00786 indx2_src += src_strd[1]; 00787 } 00788 if (!vector || found == nwords) { 00789 result->dimension[0].extent = found; 00790 } else { 00791 vindx = found * vec_strd; 00792 for ( ; found < nwords; found++) { 00793 dptr3[rindx] = dptr2[vindx]; 00794 rindx += res_strd; 00795 vindx += vec_strd; 00796 } 00797 } 00798 break; 00799 00800 case DVSUBTYPE_CHAR : 00801 indx2_msk = 0; 00802 indx2_src = 0; 00803 rindx = 0; 00804 src_ext1 = src_ext[0]; 00805 src_ext2 = src_ext[1]; 00806 for (i = 0; i < src_ext2; i++) { 00807 indx1_msk = 0; 00808 for (j = 0; j < src_ext1; j++) { 00809 mindx = indx1_msk + indx2_msk; 00810 if (LTOB(mask_el_len, &iptr4[mindx])) { 00811 sindx = indx2_src + (j * src_strd[0]); 00812 cptr1 = (char *) cs + sindx; 00813 cptr3 = (char *) cr + rindx; 00814 (void) memcpy (cptr3, cptr1, bucketsize); 00815 rindx += res_strd; 00816 found++; 00817 } 00818 indx1_msk += msk_strd[0]; 00819 } 00820 indx2_msk += msk_strd[1]; 00821 indx2_src += src_strd[1]; 00822 } 00823 if (!vector || found == nwords) { 00824 result->dimension[0].extent = found; 00825 } else { 00826 vindx = found * vec_strd; 00827 for ( ; found < nwords; found++) { 00828 cptr2 = (char *) cv + vindx; 00829 cptr3 = (char *) cr + rindx; 00830 (void) memcpy (cptr3, cptr2, bucketsize); 00831 rindx += res_strd; 00832 vindx += vec_strd; 00833 } 00834 } 00835 break; 00836 00837 case DVSUBTYPE_DERIVED : 00838 fptr1 = (_f_int *) sptr; 00839 fptr2 = (_f_int *) vptr; 00840 fptr3 = (_f_int *) rptr; 00841 src_ext1 = src_ext[0]; 00842 src_ext2 = src_ext[1]; 00843 for (i = 0; i < bucketsize; i++) { 00844 indx2_msk = 0; 00845 indx2_src = 0; 00846 rindx = i; 00847 for (j = 0; j < src_ext2; j++) { 00848 indx1_msk = 0; 00849 for (k = 0; k < src_ext1; k++) { 00850 mindx = indx1_msk + indx2_msk; 00851 if (LTOB(mask_el_len, &iptr4[mindx])) { 00852 sindx = indx2_src + i + (k * src_strd[0]); 00853 fptr3[rindx] = fptr1[sindx]; 00854 rindx += res_strd; 00855 if (i == 0) 00856 found++; 00857 } 00858 indx1_msk += msk_strd[0]; 00859 } 00860 indx2_msk += msk_strd[1]; 00861 indx2_src += src_strd[1]; 00862 } 00863 } 00864 if (!vector || found == nwords) { 00865 result->dimension[0].extent = found; 00866 } else { 00867 indx1_res = found * res_strd; 00868 indx1_vec = found * vec_strd; 00869 found = nwords - found; 00870 for (i = 0; i < bucketsize; i++) { 00871 rindx = indx1_res + i; 00872 vindx = indx1_vec + i; 00873 for (j = 0; j < found; j++) { 00874 fptr3[rindx] = fptr2[vindx]; 00875 rindx += res_strd; 00876 vindx += vec_strd; 00877 } 00878 } 00879 } 00880 break; 00881 00882 #ifdef _F_COMP16 00883 case DVSUBTYPE_BIT256 : 00884 xptr1 = (dblcmplx *) sptr; 00885 xptr2 = (dblcmplx *) vptr; 00886 xptr3 = (dblcmplx *) rptr; 00887 indx2_msk = 0; 00888 indx2_src = 0; 00889 rindx = 0; 00890 src_ext1 = src_ext[0]; 00891 src_ext2 = src_ext[1]; 00892 for (i = 0; i < src_ext2; i++) { 00893 indx1_msk = 0; 00894 for (j = 0; j < src_ext1; j++) { 00895 mindx = indx1_msk + indx2_msk; 00896 if (LTOB(mask_el_len, &iptr4[mindx])) { 00897 sindx = indx2_src + (j * src_strd[0]); 00898 xptr3[rindx].re = xptr1[sindx].re; 00899 xptr3[rindx].im = xptr1[sindx].im; 00900 rindx += res_strd; 00901 found++; 00902 } 00903 indx1_msk += msk_strd[0]; 00904 } 00905 indx2_msk += msk_strd[1]; 00906 indx2_src += src_strd[1]; 00907 } 00908 if (!vector || found == nwords) { 00909 result->dimension[0].extent = found; 00910 } else { 00911 vindx = found * vec_strd; 00912 for ( ; found < nwords; found++) { 00913 xptr3[rindx].re = xptr2[vindx].re; 00914 xptr3[rindx].im = xptr2[vindx].im; 00915 rindx += res_strd; 00916 vindx += vec_strd; 00917 } 00918 } 00919 break; 00920 #endif 00921 00922 default : 00923 _lerror (_LELVL_ABORT, FEINTDTY); 00924 } 00925 } else { /* rank 3-7 */ 00926 00927 /* 00928 * Ranks 3 through 7 are all handled in this last block. It was assumed 00929 * that ranks 1 and 2 would account for the majority of calls to pack, 00930 * and that the remaining ranks could be done in one block. 00931 * 00932 * The logic behind these blocks is the same as for the other ranks. 00933 * The first part of the routine uses two nested loops, with the inner 00934 * loop being the first dimension, and the outer loop being the product 00935 * of all of the remaining dimensions. A array of counters keeps track 00936 * of the values for each of the dimensions. Two macros are used in 00937 * this block. INCREMENT are used to calculate the values of each of 00938 * the dimension counters, and to calculate the offsets into the array 00939 * for each index. FIND_INDX sums these offsets into one offset, which 00940 * is used for each iteration of the inner loop. As with the other two 00941 * blocks, the second part of each section is not affected by the number 00942 * of dimensions in the source matrix. 00943 * 00944 * Calculate the product of each of the dimensions 2-n. This is the 00945 * number of times the outer loop will be executed. Also, initialize 00946 * the offset and dimension counter arrays. 00947 */ 00948 total_ext = 1; 00949 #ifdef _UNICOS 00950 #pragma _CRI shortloop 00951 #endif 00952 for (i = 0; i < MAXDIM; i++) { 00953 curdim[i] = 0; 00954 msk_off[i] = 0; 00955 src_off[i] = 0; 00956 } 00957 00958 #ifdef _UNICOS 00959 #pragma _CRI shortloop 00960 #endif 00961 for (i = 1; i < rank; i++) 00962 total_ext *= source->dimension[i].extent; 00963 iptr4 = (_f_mask *) mptr; 00964 found = 0; 00965 00966 switch (subtype) { 00967 case DVSUBTYPE_BIT64 : 00968 uptr2 = (_f_int8 *) vptr; 00969 uptr3 = (_f_int8 *) rptr; 00970 rindx = 0; 00971 for (i = 0; i < total_ext; i++) { 00972 FIND_INDX(); 00973 uptr1 = (_f_int8 *) sptr + indx1_src; 00974 iptr4 = (_f_mask *) mptr + indx1_msk; 00975 for (j = 0; j < src_ext[0]; j++) { 00976 mindx = j * msk_strd[0]; 00977 if (LTOB(mask_el_len, &iptr4[mindx])) { 00978 sindx = j * src_strd[0]; 00979 uptr3[rindx] = uptr1[sindx]; 00980 rindx += res_strd; 00981 found++; 00982 } 00983 } 00984 INCREMENT(); 00985 } 00986 if (!vector || found == nwords) { 00987 result->dimension[0].extent = found; 00988 } else { 00989 vindx = found * vec_strd; 00990 for ( ; found < nwords; found++) { 00991 uptr3[rindx] = uptr2[vindx]; 00992 rindx += res_strd; 00993 vindx += vec_strd; 00994 } 00995 } 00996 break; 00997 00998 case DVSUBTYPE_BIT32 : 00999 hptr2 = (_f_int4 *) vptr; 01000 hptr3 = (_f_int4 *) rptr; 01001 rindx = 0; 01002 for (i = 0; i < total_ext; i++) { 01003 FIND_INDX(); 01004 hptr1 = (_f_int4 *) sptr + indx1_src; 01005 iptr4 = (_f_mask *) mptr + indx1_msk; 01006 for (j = 0; j < src_ext[0]; j++) { 01007 mindx = j * msk_strd[0]; 01008 if (LTOB(mask_el_len, &iptr4[mindx])) { 01009 sindx = j * src_strd[0]; 01010 hptr3[rindx] = hptr1[sindx]; 01011 rindx += res_strd; 01012 found++; 01013 } 01014 } 01015 INCREMENT(); 01016 } 01017 if (!vector || found == nwords) { 01018 result->dimension[0].extent = found; 01019 } else { 01020 vindx = found * vec_strd; 01021 for ( ; found < nwords; found++) { 01022 hptr3[rindx] = hptr2[vindx]; 01023 rindx += res_strd; 01024 vindx += vec_strd; 01025 } 01026 } 01027 break; 01028 01029 case DVSUBTYPE_BIT128 : 01030 dptr2 = (_f_real16 *) vptr; 01031 dptr3 = (_f_real16 *) rptr; 01032 rindx = 0; 01033 for (i = 0; i < total_ext; i++) { 01034 FIND_INDX(); 01035 dptr1 = (_f_real16 *) sptr + indx1_src; 01036 iptr4 = (_f_mask *) mptr + indx1_msk; 01037 for (j = 0; j < src_ext[0]; j++) { 01038 mindx = j * msk_strd[0]; 01039 if (LTOB(mask_el_len, &iptr4[mindx])) { 01040 sindx = j * src_strd[0]; 01041 dptr3[rindx] = dptr1[sindx]; 01042 rindx += res_strd; 01043 found++; 01044 } 01045 } 01046 INCREMENT(); 01047 } 01048 if (!vector || found == nwords) { 01049 result->dimension[0].extent = found; 01050 } else { 01051 vindx = found * vec_strd; 01052 for ( ; found < nwords; found++) { 01053 dptr3[rindx] = dptr2[vindx]; 01054 rindx += res_strd; 01055 vindx += vec_strd; 01056 } 01057 } 01058 break; 01059 01060 case DVSUBTYPE_CHAR : 01061 cptr2 = (char *) vptr; 01062 cptr3 = (char *) rptr; 01063 rindx = 0; 01064 for (i = 0; i < total_ext; i++) { 01065 FIND_INDX(); 01066 iptr4 = (_f_mask *) mptr + indx1_msk; 01067 for (j = 0; j < src_ext[0]; j++) { 01068 mindx = j * msk_strd[0]; 01069 if (LTOB(mask_el_len, &iptr4[mindx])) { 01070 sindx = indx1_src + (j * src_strd[0]); 01071 cptr1 = (char *) cs + sindx; 01072 cptr3 = (char *) cr + rindx; 01073 (void) memcpy (cptr3, cptr1, bucketsize); 01074 rindx += res_strd; 01075 found++; 01076 } 01077 } 01078 INCREMENT(); 01079 } 01080 if (!vector || found == nwords) { 01081 result->dimension[0].extent = found; 01082 } else { 01083 vindx = found * vec_strd; 01084 for ( ; found < nwords; found++) { 01085 cptr3 = (char *) cr + rindx; 01086 cptr2 = (char *) cv + vindx; 01087 (void) memcpy (cptr3, cptr2, bucketsize); 01088 rindx += res_strd; 01089 vindx += vec_strd; 01090 } 01091 } 01092 break; 01093 01094 case DVSUBTYPE_DERIVED : 01095 fptr2 = (_f_int *) vptr; 01096 fptr3 = (_f_int *) rptr; 01097 for (i = 0; i < bucketsize; i++) { 01098 rindx = i; 01099 for (j = 0; j < rank; j++) { 01100 msk_off[j] = 0; 01101 src_off[j] = 0; 01102 curdim[j] = 0; 01103 } 01104 for (j = 0; j < total_ext; j++) { 01105 FIND_INDX(); 01106 fptr1 = (_f_int *) sptr + i + indx1_src; 01107 iptr4 = (_f_mask *) mptr + indx1_msk; 01108 for (k = 0; k < src_ext[0]; k++) { 01109 mindx = k * msk_strd[0]; 01110 if (LTOB(mask_el_len, &iptr4[mindx])) { 01111 sindx = k * src_strd[0]; 01112 fptr3[rindx] = fptr1[sindx]; 01113 rindx += res_strd; 01114 if (i == 0) 01115 found++; 01116 } 01117 } 01118 INCREMENT(); 01119 } 01120 } 01121 if (!vector || found == nwords) { 01122 result->dimension[0].extent = found; 01123 } else { 01124 indx1_res = found * res_strd; 01125 indx1_vec = found * vec_strd; 01126 found = nwords - found; 01127 for (i = 0; i < bucketsize; i++) { 01128 rindx = indx1_res + i; 01129 vindx = indx1_vec + i; 01130 for (j = 0; j < found; j++) { 01131 fptr3[rindx] = fptr2[vindx]; 01132 rindx += res_strd; 01133 vindx += vec_strd; 01134 } 01135 } 01136 } 01137 break; 01138 01139 #ifdef _F_COMP16 01140 case DVSUBTYPE_BIT256 : 01141 xptr2 = (dblcmplx *) vptr; 01142 xptr3 = (dblcmplx *) rptr; 01143 rindx = 0; 01144 for (i = 0; i < total_ext; i++) { 01145 FIND_INDX(); 01146 xptr1 = (dblcmplx *) sptr + indx1_src; 01147 iptr4 = (_f_mask *) mptr + indx1_msk; 01148 for (j = 0; j < src_ext[0]; j++) { 01149 mindx = j * msk_strd[0]; 01150 if (LTOB(mask_el_len, &iptr4[mindx])) { 01151 sindx = j * src_strd[0]; 01152 xptr3[rindx].re = xptr1[sindx].re; 01153 xptr3[rindx].im = xptr1[sindx].im; 01154 rindx += res_strd; 01155 found++; 01156 } 01157 } 01158 INCREMENT(); 01159 } 01160 if (!vector || found == nwords) { 01161 result->dimension[0].extent = found; 01162 } else { 01163 vindx = found * vec_strd; 01164 for ( ; found < nwords; found++) { 01165 xptr3[rindx].re = xptr2[vindx].re; 01166 xptr3[rindx].im = xptr2[vindx].im; 01167 rindx += res_strd; 01168 vindx += vec_strd; 01169 } 01170 } 01171 if (!vector || found == nwords) { 01172 result->dimension[0].extent = found; 01173 } else { 01174 vindx = found * vec_strd; 01175 for ( ; found < nwords; found++) { 01176 xptr3[rindx].re = xptr2[vindx].re; 01177 xptr3[rindx].im = xptr2[vindx].im; 01178 rindx += res_strd; 01179 vindx += vec_strd; 01180 } 01181 } 01182 break; 01183 #endif 01184 01185 default : 01186 _lerror (_LELVL_ABORT, FEINTDTY); 01187 } 01188 } 01189 }