00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #pragma ident "@(#) libfi/array/count.c 92.1 07/07/99 15:52:02"
00038 #include <stddef.h>
00039 #include <stdlib.h>
00040 #include <liberrno.h>
00041 #include <cray/dopevec.h>
00042 #include <cray/portdefs.h>
00043 #include "logical.h"
00044
00045
00046
00047 #ifdef _UNICOS
00048 #pragma _CRI duplicate _COUNT as COUNT@
00049 #endif
00050 void
00051 _COUNT (DopeVectorType * result,
00052 DopeVectorType * mask,
00053 _f_int *dimension)
00054 {
00055 void __count();
00056 (void) __count (result, mask, dimension);
00057 }
00058
00059 #ifdef _UNICOS
00060 #pragma _CRI duplicate _COUNT0 as COUNT0@
00061 #endif
00062 _f_int
00063 _COUNT0(DopeVectorType * mask,
00064 _f_int *dimension)
00065 {
00066 void __count();
00067 DopeVectorType result, *res_ptr;
00068 _f_int intres;
00069
00070 res_ptr = (DopeVectorType *) &result;
00071 res_ptr->base_addr.a.ptr = &intres;
00072 res_ptr->base_addr.a.el_len = sizeof(_f_int8) * BITS_PER_BYTE;
00073 res_ptr->assoc = 1;
00074 res_ptr->ptr_alloc = 0;
00075 res_ptr->p_or_a = NOT_P_OR_A;
00076 res_ptr->n_dim = 0;
00077 res_ptr->type_lens.type = DVTYPE_INTEGER;
00078 res_ptr->type_lens.dpflag = 0;
00079 res_ptr->type_lens.kind_or_star = DVD_DEFAULT;
00080 res_ptr->type_lens.int_len = sizeof(_f_int8) * BITS_PER_BYTE;
00081 res_ptr->type_lens.dec_len = 0;
00082 res_ptr->orig_base = res_ptr->base_addr.a.ptr;
00083 res_ptr->orig_size = 0;
00084 __count (res_ptr, mask, dimension);
00085 return (*(_f_int *) res_ptr->base_addr.a.ptr);
00086 }
00087
00088
00089 void
00090 _COUNT_4 (DopeVectorType * result,
00091 DopeVectorType * mask,
00092 _f_int *dimension)
00093 {
00094 void __count();
00095 (void) __count (result, mask, dimension);
00096 }
00097
00098 _f_int
00099 _COUNT0_4(DopeVectorType * mask,
00100 _f_int *dimension)
00101 {
00102 void __count();
00103 DopeVectorType result, *res_ptr;
00104 _f_int intres;
00105
00106 res_ptr = (DopeVectorType *) &result;
00107 res_ptr->base_addr.a.ptr = &intres;
00108 res_ptr->base_addr.a.el_len = sizeof(_f_int) * BITS_PER_BYTE;
00109 res_ptr->assoc = 1;
00110 res_ptr->ptr_alloc = 0;
00111 res_ptr->p_or_a = NOT_P_OR_A;
00112 res_ptr->n_dim = 0;
00113 res_ptr->type_lens.type = DVTYPE_INTEGER;
00114 res_ptr->type_lens.dpflag = 0;
00115 res_ptr->type_lens.kind_or_star = DVD_DEFAULT;
00116 res_ptr->type_lens.int_len = sizeof(_f_int4) * BITS_PER_BYTE;
00117 res_ptr->type_lens.dec_len = 0;
00118 res_ptr->orig_base = res_ptr->base_addr.a.ptr;
00119 res_ptr->orig_size = 0;
00120 __count (res_ptr, mask, dimension);
00121 return (*(_f_int4 *) res_ptr->base_addr.a.ptr);
00122 }
00123
00124
00125 void
00126 _COUNT_8 (DopeVectorType * result,
00127 DopeVectorType * mask,
00128 _f_int *dimension)
00129 {
00130 void __count();
00131 (void) __count (result, mask, dimension);
00132 }
00133
00134 _f_int
00135 _COUNT0_8(DopeVectorType * mask,
00136 _f_int *dimension)
00137 {
00138 void __count();
00139 DopeVectorType result, *res_ptr;
00140 _f_int8 intres;
00141
00142 res_ptr = (DopeVectorType *) &result;
00143 res_ptr->base_addr.a.ptr = &intres;
00144 res_ptr->base_addr.a.el_len = sizeof(_f_int8) * BITS_PER_BYTE;
00145 res_ptr->assoc = 1;
00146 res_ptr->ptr_alloc = 0;
00147 res_ptr->p_or_a = NOT_P_OR_A;
00148 res_ptr->n_dim = 0;
00149 res_ptr->type_lens.type = DVTYPE_INTEGER;
00150 res_ptr->type_lens.dpflag = 0;
00151 res_ptr->type_lens.kind_or_star = DVD_DEFAULT;
00152 res_ptr->type_lens.int_len = sizeof(_f_int8) * BITS_PER_BYTE;
00153 res_ptr->type_lens.dec_len = 0;
00154 res_ptr->orig_base = res_ptr->base_addr.a.ptr;
00155 res_ptr->orig_size = 0;
00156 __count (res_ptr, mask, dimension);
00157 return (*(_f_int8 *) res_ptr->base_addr.a.ptr);
00158 }
00159
00160
00161 void
00162 _COUNT_1 (DopeVectorType * result,
00163 DopeVectorType * mask,
00164 _f_int *dimension)
00165 {
00166 void __count();
00167 (void) __count (result, mask, dimension);
00168 }
00169
00170 _f_int1
00171 _COUNT0_1(DopeVectorType * mask,
00172 _f_int *dimension)
00173 {
00174 void __count();
00175 DopeVectorType result, *res_ptr;
00176 _f_int1 intres;
00177
00178 res_ptr = (DopeVectorType *) &result;
00179 res_ptr->base_addr.a.ptr = &intres;
00180 res_ptr->base_addr.a.el_len = sizeof(_f_int1) * BITS_PER_BYTE;
00181 res_ptr->assoc = 1;
00182 res_ptr->ptr_alloc = 0;
00183 res_ptr->p_or_a = NOT_P_OR_A;
00184 res_ptr->n_dim = 0;
00185 res_ptr->type_lens.type = DVTYPE_INTEGER;
00186 res_ptr->type_lens.dpflag = 0;
00187 res_ptr->type_lens.kind_or_star = DVD_DEFAULT;
00188 res_ptr->type_lens.int_len = sizeof(_f_int1) * BITS_PER_BYTE;
00189 res_ptr->type_lens.dec_len = 0;
00190 res_ptr->orig_base = res_ptr->base_addr.a.ptr;
00191 res_ptr->orig_size = 0;
00192 __count (res_ptr, mask, dimension);
00193 return (*(_f_int1 *) res_ptr->base_addr.a.ptr);
00194 }
00195
00196
00197 void
00198 _COUNT_2 (DopeVectorType * result,
00199 DopeVectorType * mask,
00200 _f_int *dimension)
00201 {
00202 void __count();
00203 (void) __count (result, mask, dimension);
00204 }
00205
00206 _f_int2
00207 _COUNT0_2(DopeVectorType * mask,
00208 _f_int *dimension)
00209 {
00210 void __count();
00211 DopeVectorType result, *res_ptr;
00212 _f_int2 intres;
00213
00214 res_ptr = (DopeVectorType *) &result;
00215 res_ptr->base_addr.a.ptr = &intres;
00216 res_ptr->base_addr.a.el_len = sizeof(_f_int2) * BITS_PER_BYTE;
00217 res_ptr->assoc = 1;
00218 res_ptr->ptr_alloc = 0;
00219 res_ptr->p_or_a = NOT_P_OR_A;
00220 res_ptr->n_dim = 0;
00221 res_ptr->type_lens.type = DVTYPE_INTEGER;
00222 res_ptr->type_lens.dpflag = 0;
00223 res_ptr->type_lens.kind_or_star = DVD_DEFAULT;
00224 res_ptr->type_lens.int_len = sizeof(_f_int2) * BITS_PER_BYTE;
00225 res_ptr->type_lens.dec_len = 0;
00226 res_ptr->orig_base = res_ptr->base_addr.a.ptr;
00227 res_ptr->orig_size = 0;
00228 __count (res_ptr, mask, dimension);
00229 return (*(_f_int2 *) res_ptr->base_addr.a.ptr);
00230 }
00231
00232
00233 void
00234 __count (DopeVectorType * result,
00235 DopeVectorType * mask,
00236 _f_int *dimension)
00237 {
00238 int c_dim;
00239 int other_dim;
00240 int num_strides = 1;
00241 int num_elts = 1;
00242 long nbytes = 0;
00243 long trues;
00244 _f_int * rptr;
00245 #ifdef _F_INT1
00246 _f_int1 * i1rptr;
00247 _f_log1 * i1mptr;
00248 #endif
00249 #ifdef _F_INT2
00250 _f_int2 * i2rptr;
00251 _f_log2 * i2mptr;
00252 #endif
00253 _f_int4 * i4rptr;
00254 _f_int8 * i8rptr;
00255 _f_log * imptr;
00256 _f_log4 * i4mptr;
00257 _f_log8 * i8mptr;
00258 long i, j;
00259 long indx, jndx;
00260 int done;
00261 int el_len;
00262 int mshftct=0;
00263 int rshftct=0;
00264
00265
00266 long current_place[MAXDIM-1];
00267 long mask_offset[MAXDIM-1];
00268 long mask_extent[MAXDIM-1];
00269 long mask_stride[MAXDIM-1];
00270 long result_offset[MAXDIM-1];
00271 long result_stride[MAXDIM-1];
00272 long cdim_mask_stride;
00273
00274
00275 if (dimension != NULL && mask->n_dim > 1) {
00276 c_dim = *dimension - 1;
00277 if (c_dim < 0 || c_dim >= mask->n_dim)
00278 _lerror (_LELVL_ABORT, FESCIDIM);
00279 } else {
00280 c_dim = 0;
00281 if (dimension != NULL) {
00282 if (*dimension < 1 || *dimension > mask->n_dim)
00283 _lerror (_LELVL_ABORT, FESCIDIM);
00284 }
00285 }
00286
00287
00288 if (!result->assoc) {
00289 int sm = 1;
00290 if (result->base_addr.a.el_len >= BITS_PER_WORD)
00291 sm = result->base_addr.a.el_len / BITS_PER_WORD;
00292 for (i = 0; i < c_dim; i++) {
00293 if (dimension != NULL) {
00294 result->dimension[i].extent =
00295 mask->dimension[i].extent;
00296 result->dimension[i].low_bound = 1;
00297 result->dimension[i].stride_mult =
00298 num_strides * sm;
00299 }
00300 num_strides *= mask->dimension[i].extent;
00301 }
00302 for ( ; i < mask->n_dim - 1; i++) {
00303 if (dimension != NULL) {
00304 result->dimension[i].extent =
00305 mask->dimension[i+1].extent;
00306 result->dimension[i].low_bound = 1;
00307 result->dimension[i].stride_mult =
00308 num_strides * sm;
00309 }
00310 num_strides *= mask->dimension[i+1].extent;
00311 }
00312 if (dimension != NULL)
00313 num_elts = num_strides;
00314
00315 result->base_addr.a.ptr = (void *) NULL;
00316
00317 nbytes = ((num_elts * result->base_addr.a.el_len) /
00318 BITS_PER_BYTE);
00319 if (nbytes != 0) {
00320 result->base_addr.a.ptr = (void *) malloc (nbytes);
00321 if (result->base_addr.a.ptr == NULL)
00322 _lerror(_LELVL_ABORT, FENOMEMY);
00323 result->assoc = 1;
00324 }
00325
00326 result->orig_base = result->base_addr.a.ptr;
00327 result->orig_size = nbytes * BITS_PER_BYTE;
00328 } else {
00329 int rank;
00330 rank = mask->n_dim;
00331 for (i = 0; i < rank-1; i++)
00332 num_strides *= mask->dimension[i+1].extent;
00333 }
00334
00335
00336
00337
00338
00339 rptr = (void *) result->base_addr.a.ptr;
00340 switch (result->type_lens.int_len) {
00341 case 64 :
00342 i8rptr = (_f_int8 *) rptr;
00343 #ifdef _F_INT4
00344 if (sizeof(_f_int) == sizeof(_f_log4))
00345 rshftct = 1;
00346 #endif
00347 #ifdef _UNICOS
00348 #pragma _CRI ivdep
00349 #endif
00350 for (i = 0; i < num_elts; i++)
00351 i8rptr[i] = 0;
00352 break;
00353 #ifdef _F_INT2
00354 case 16 :
00355 i2rptr = (_f_int2 *) rptr;
00356 for (i = 0; i < num_elts; i++)
00357 i2rptr[i] = 0;
00358 break;
00359 #endif
00360 #ifdef _F_INT1
00361 case 8 :
00362 i1rptr = (_f_int1 *) rptr;
00363 for (i = 0; i < num_elts; i++)
00364 i1rptr[i] = 0;
00365 break;
00366 #endif
00367 case 32 :
00368 default :
00369 i4rptr = (_f_int4 *) rptr;
00370 #ifdef _UNICOS
00371 #pragma _CRI ivdep
00372 #endif
00373 for (i = 0; i < num_elts; i++)
00374 i4rptr[i] = 0;
00375 }
00376
00377
00378
00379 imptr = (void *) mask->base_addr.a.ptr;
00380 switch (mask->type_lens.int_len) {
00381 case 64 :
00382 el_len = sizeof(_f_log8) * BITS_PER_BYTE;
00383 i8mptr = (_f_log8 *) imptr;
00384 #ifdef _F_LOG4
00385
00386
00387
00388
00389
00390
00391
00392 if (sizeof(_f_int) == sizeof(_f_log4))
00393 mshftct = 1;
00394 #endif
00395 break;
00396 #ifdef _F_LOG2
00397 case 16 :
00398 el_len = sizeof(_f_log2) * BITS_PER_BYTE;
00399 i2mptr = (_f_log2 *) imptr;
00400 break;
00401 #endif
00402 #ifdef _F_LOG1
00403 case 8 :
00404 el_len = sizeof(_f_log1) * BITS_PER_BYTE;
00405 i1mptr = (_f_log1 *) imptr;
00406 break;
00407 #endif
00408 case 32 :
00409 default :
00410 el_len = sizeof(_f_log4) * BITS_PER_BYTE;
00411 i4mptr = (_f_log4 *) imptr;
00412 }
00413
00414
00415
00416 if (mask->n_dim == 1) {
00417
00418
00419
00420 #ifdef _F_LOG4
00421 mask_stride[0] = (mask->dimension[0].stride_mult) >> mshftct;
00422 #else
00423 mask_stride[0] = mask->dimension[0].stride_mult;
00424 #endif
00425
00426 trues = 0;
00427 for (i = 0; i < mask->dimension[0].extent; i++) {
00428 indx = i * mask_stride[0];
00429 switch (mask->type_lens.int_len) {
00430 case 64 :
00431 if (LTOB(el_len, (i8mptr + indx))) {
00432
00433 trues++;
00434 }
00435 break;
00436 #ifdef _F_LOG2
00437 case 16 :
00438 if (LTOB(el_len, (i2mptr + indx))) {
00439
00440 trues++;
00441 }
00442 break;
00443 #endif
00444 #ifdef _F_LOG1
00445 case 8 :
00446 if (LTOB(el_len, (i1mptr + indx))) {
00447
00448 trues++;
00449 }
00450 break;
00451 #endif
00452 case 32 :
00453 default :
00454 if (LTOB(el_len, (i4mptr + indx))) {
00455
00456 trues++;
00457 }
00458 }
00459 }
00460
00461
00462 switch (result->type_lens.int_len) {
00463 case 64 :
00464 i8rptr[0] = trues;
00465 break;
00466 #ifdef _F_INT2
00467 case 16 :
00468 i2rptr[0] = trues;
00469 break;
00470 #endif
00471 #ifdef _F_INT1
00472 case 8 :
00473 i1rptr[0] = trues;
00474 break;
00475 #endif
00476 case 32 :
00477 default :
00478 i4rptr[0] = trues;
00479 }
00480
00481
00482 } else if (mask->n_dim == 2) {
00483
00484
00485 if (c_dim == 0)
00486 other_dim = 1;
00487 else
00488 other_dim = 0;
00489 #ifdef _F_LOG4
00490 mask_stride[0] = (mask->dimension[0].stride_mult) >> mshftct;
00491 mask_stride[1] = (mask->dimension[1].stride_mult) >> mshftct;
00492 #else
00493 mask_stride[0] = mask->dimension[0].stride_mult;
00494 mask_stride[1] = mask->dimension[1].stride_mult;
00495 #endif
00496
00497 trues = 0;
00498 for (i = 0; i < num_strides; i++) {
00499 indx = i * mask_stride[other_dim];
00500 for (j = 0; j < mask->dimension[c_dim].extent; j++) {
00501 jndx = indx + j *
00502 mask_stride[c_dim];
00503 switch (mask->type_lens.int_len) {
00504 case 64 :
00505 if (LTOB(el_len, (i8mptr +
00506 jndx))) {
00507
00508 trues++;
00509 }
00510 break;
00511 #ifdef _F_LOG2
00512 case 16 :
00513 if (LTOB(el_len, (i2mptr +
00514 jndx))) {
00515
00516 trues++;
00517 }
00518 break;
00519 #endif
00520 #ifdef _F_LOG1
00521 case 8 :
00522 if (LTOB(el_len, (i1mptr +
00523 jndx))) {
00524
00525 trues++;
00526 }
00527 break;
00528 #endif
00529 case 32 :
00530 default :
00531 if (LTOB(el_len, (i4mptr +
00532 jndx))) {
00533
00534 trues++;
00535 }
00536 }
00537 }
00538
00539
00540 if (result->n_dim != 0) {
00541 switch (result->type_lens.int_len) {
00542 case 64 :
00543 i8rptr[i] = trues;
00544 break;
00545 #ifdef _F_INT2
00546 case 16 :
00547 i2rptr[i] = trues;
00548 break;
00549 #endif
00550 #ifdef _F_INT1
00551 case 8 :
00552 i1rptr[i] = trues;
00553 break;
00554 #endif
00555 case 32 :
00556 default :
00557 i4rptr[i] = trues;
00558 }
00559 trues = 0;
00560 }
00561 }
00562 if (result->n_dim == 0) {
00563 switch (result->type_lens.int_len) {
00564 case 64 :
00565 i8rptr[0] = trues;
00566 break;
00567 #ifdef _F_INT2
00568 case 16 :
00569 i2rptr[0] = trues;
00570 break;
00571 #endif
00572 #ifdef _F_INT1
00573 case 8 :
00574 i1rptr[0] = trues;
00575 break;
00576 #endif
00577 case 32 :
00578 default :
00579 i4rptr[0] = trues;
00580 }
00581 }
00582
00583
00584 } else {
00585
00586
00587 if (result->n_dim != 0)
00588 #ifdef _UNICOS
00589 #pragma _CRI shortloop
00590 #endif
00591 for (i = 0; i < result->n_dim; i++) {
00592 result_offset[i] = 0;
00593 #ifdef _F_LOG4
00594 result_stride[i] =
00595 result->dimension[i].stride_mult >> rshftct;
00596 #else
00597 result_stride[i] =
00598 result->dimension[i].stride_mult;
00599 #endif
00600 }
00601
00602
00603
00604
00605 if (c_dim == 0)
00606 i = 0;
00607 else
00608 #ifdef _UNICOS
00609 #pragma _CRI shortloop
00610 #endif
00611 for (i = 0; i < c_dim; i++) {
00612 current_place[i] = 0;
00613 mask_offset[i] = 0;
00614 mask_extent[i] = mask->dimension[i].extent;
00615 #ifdef _F_LOG4
00616 mask_stride[i] =
00617 (mask->dimension[i].stride_mult) >> mshftct;
00618 #else
00619 mask_stride[i] = mask->dimension[i].stride_mult;
00620 #endif
00621 }
00622 if (i < (mask->n_dim - 1))
00623 #ifdef _UNICOS
00624 #pragma _CRI shortloop
00625 #endif
00626 for ( ; i < mask->n_dim - 1; i++) {
00627 current_place[i] = 0;
00628 mask_offset[i] = 0;
00629 mask_extent[i] = mask->dimension[i+1].extent;
00630 #ifdef _F_LOG4
00631 mask_stride[i] =
00632 (mask->dimension[i+1].stride_mult) >> mshftct;
00633 #else
00634 mask_stride[i] = mask->dimension[i+1].stride_mult;
00635 #endif
00636 }
00637 #ifdef _F_LOG4
00638 cdim_mask_stride = mask->dimension[c_dim].stride_mult >> mshftct;
00639 #else
00640 cdim_mask_stride = mask->dimension[c_dim].stride_mult;
00641 #endif
00642
00643
00644
00645 trues = 0;
00646 for (i = 0; i < num_strides; i++) {
00647
00648
00649
00650 indx = 0;
00651 #ifdef _UNICOS
00652 #pragma _CRI shortloop
00653 #endif
00654 for (j = 0; j < mask->n_dim - 1; j++)
00655 indx += mask_offset[j];
00656
00657
00658 for (j = 0; j < mask->dimension[c_dim].extent; j++) {
00659 jndx = indx + j * cdim_mask_stride;
00660 switch (mask->type_lens.int_len) {
00661 case 64 :
00662 if (LTOB(el_len, (i8mptr +
00663 jndx))) {
00664
00665 trues++;
00666 }
00667 break;
00668 #ifdef _F_LOG2
00669 case 16 :
00670 if (LTOB(el_len, (i2mptr +
00671 jndx))) {
00672
00673 trues++;
00674 }
00675 break;
00676 #endif
00677 #ifdef _F_LOG1
00678 case 8 :
00679 if (LTOB(el_len, (i1mptr +
00680 jndx))) {
00681
00682 trues++;
00683 }
00684 break;
00685 #endif
00686 case 32 :
00687 default :
00688 if (LTOB(el_len, (i4mptr +
00689 jndx))) {
00690
00691 trues++;
00692 }
00693 }
00694 }
00695 if (result->n_dim != 0) {
00696 indx = 0;
00697 #ifdef _UNICOS
00698 #pragma _CRI shortloop
00699 #endif
00700 for (j = 0; j < mask->n_dim - 1; j++)
00701 indx += result_offset[j];
00702
00703
00704 switch (result->type_lens.int_len) {
00705 case 64 :
00706 i8rptr[indx] = trues;
00707 break;
00708 #ifdef _F_INT2
00709 case 16 :
00710 i2rptr[indx] = trues;
00711 break;
00712 #endif
00713 #ifdef _F_INT1
00714 case 8 :
00715 i1rptr[indx] = trues;
00716 break;
00717 #endif
00718 case 32 :
00719 default :
00720 i4rptr[indx] = trues;
00721 }
00722 trues = 0;
00723 }
00724
00725
00726 j = 0;
00727 done = FALSE;
00728 while (done == FALSE && j < mask->n_dim - 1) {
00729 if (current_place[j] == mask_extent[j] - 1) {
00730 current_place[j] = 0;
00731 mask_offset[j] = 0;
00732 if (result->n_dim != 0)
00733 result_offset[j] = 0;
00734 } else {
00735 current_place[j]++;
00736 mask_offset[j] =
00737 current_place[j] * mask_stride[j];
00738 if (result->n_dim != 0)
00739 result_offset[j] =
00740 current_place[j] *
00741 result_stride[j];
00742 done = TRUE;
00743 }
00744 j++;
00745 }
00746 }
00747 if (result->n_dim == 0) {
00748
00749 switch (result->type_lens.int_len) {
00750 case 64 :
00751 i8rptr[0] = trues;
00752 break;
00753 #ifdef _F_INT2
00754 case 16 :
00755 i2rptr[0] = trues;
00756 break;
00757 #endif
00758 #ifdef _F_INT1
00759 case 8 :
00760 i1rptr[0] = trues;
00761 break;
00762 #endif
00763 case 32 :
00764 default :
00765 i4rptr[0] = trues;
00766 }
00767 }
00768 }
00769 }