Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 static const char USMID[] = "@(#) libfi/array/reshape.c 92.0 10/08/98 14:37:14"; 00038 00039 #include <stddef.h> 00040 #include <liberrno.h> 00041 #include <stddef.h> 00042 #include <cray/dopevec.h> 00043 #include <cray/portdefs.h> 00044 #include "arraydefs.h" 00045 00046 /* 00047 * INCR_PAD increments the index for the pad array, and calculates 00048 * the offset into the pad array. 00049 */ 00050 00051 #define INCR_PAD() \ 00052 pad_indx[0]++; \ 00053 if (pad_indx[0] < pad_ext[0]) \ 00054 pad_off[0] = pad_indx[0] * pad_strd[0]; \ 00055 else { \ 00056 pad_indx[0] = 0; \ 00057 pad_off[0] = 0; \ 00058 pad_indx[1]++; \ 00059 if (pad_indx[1] < pad_ext[1]) \ 00060 pad_off[1] = pad_indx[1] * pad_strd[1]; \ 00061 else { \ 00062 pad_indx[1] = 0; \ 00063 pad_off[1] = 0; \ 00064 pad_indx[2]++; \ 00065 if (pad_indx[2] < pad_ext[2]) \ 00066 pad_off[2] = pad_indx[2] * pad_strd[2]; \ 00067 else { \ 00068 pad_indx[2] = 0; \ 00069 pad_off[2] = 0; \ 00070 pad_indx[3]++; \ 00071 if (pad_indx[3] < pad_ext[3]) \ 00072 pad_off[3] = pad_indx[3] * pad_strd[3]; \ 00073 else { \ 00074 pad_indx[3] = 0; \ 00075 pad_off[3] = 0; \ 00076 pad_indx[4]++; \ 00077 if (pad_indx[4] < pad_ext[4]) \ 00078 pad_off[4] = pad_indx[4] * pad_strd[4]; \ 00079 else { \ 00080 pad_indx[4] = 0; \ 00081 pad_off[4] = 0; \ 00082 pad_indx[5]++; \ 00083 if (pad_indx[5] < pad_ext[5]) \ 00084 pad_off[5] = pad_indx[5] * pad_strd[5]; \ 00085 else { \ 00086 pad_indx[5] = 0; \ 00087 pad_off[5] = 0; \ 00088 pad_indx[6]++; \ 00089 if (pad_indx[6] < pad_ext[6]) \ 00090 pad_off[6] = pad_indx[6] * pad_strd[6]; \ 00091 else \ 00092 pad_indx[6] = 0; \ 00093 pad_off[6] = 0; \ 00094 } \ 00095 } \ 00096 } \ 00097 } \ 00098 } \ 00099 } 00100 00101 /* 00102 * ADD_INDEX calculates the index from the begging of an array. The 00103 * array is passed in as the first argument. 00104 */ 00105 00106 #define ADD_INDEX(indx,off,rank) \ 00107 if (rank == 1) { \ 00108 indx = off[0]; \ 00109 } else if (rank == 2) { \ 00110 indx = off[0] + off[1]; \ 00111 } else if (rank == 3) { \ 00112 indx = off[0] + off[1] + off[2]; \ 00113 } else if (rank == 4) { \ 00114 indx = off[0] + off[1] + off[2] + off[3]; \ 00115 } else if (rank == 5) { \ 00116 indx = off[0] + off[1] + off[2] + off[3] + off[4]; \ 00117 } else if (rank == 6) { \ 00118 indx = off[0] + off[1] + off[2] + \ 00119 off[3] + off[4] + off[5]; \ 00120 } else { \ 00121 indx = off[0] + off[1] + off[2] + off[3] + \ 00122 off[4] + off[5] + off[6]; \ 00123 } 00124 00125 /* 00126 * INCR_SRC increments the index for the source array, and calculates 00127 * the offset into the source array. 00128 */ 00129 00130 #define INCR_SRC() \ 00131 src_indx[0]++; \ 00132 if (src_indx[0] < src_ext[0]) \ 00133 src_off[0] = src_indx[0] * src_strd[0]; \ 00134 else { \ 00135 src_indx[0] = 0; \ 00136 src_off[0] = 0; \ 00137 src_indx[1]++; \ 00138 if (src_indx[1] < src_ext[1]) \ 00139 src_off[1] = src_indx[1] * src_strd[1]; \ 00140 else { \ 00141 src_indx[1] = 0; \ 00142 src_off[1] = 0; \ 00143 src_indx[2]++; \ 00144 if (src_indx[2] < src_ext[2]) \ 00145 src_off[2] = src_indx[2] * src_strd[2]; \ 00146 else { \ 00147 src_indx[2] = 0; \ 00148 src_off[2] = 0; \ 00149 src_indx[3]++; \ 00150 if (src_indx[3] < src_ext[3]) \ 00151 src_off[3] = src_indx[3] * src_strd[3]; \ 00152 else { \ 00153 src_indx[3] = 0; \ 00154 src_off[3] = 0; \ 00155 src_indx[4]++; \ 00156 if (src_indx[4] < src_ext[4]) \ 00157 src_off[4] = src_indx[4] * src_strd[4]; \ 00158 else { \ 00159 src_indx[4] = 0; \ 00160 src_off[4] = 0; \ 00161 src_indx[5]++; \ 00162 if (src_indx[5] < src_ext[5]) \ 00163 src_off[5] = src_indx[5] * src_strd[5]; \ 00164 else { \ 00165 src_indx[5] = 0; \ 00166 src_off[5] = 0; \ 00167 src_indx[6]++; \ 00168 if (src_indx[6] < src_ext[6]) \ 00169 src_off[6] = src_indx[6] * src_strd[6]; \ 00170 } \ 00171 } \ 00172 } \ 00173 } \ 00174 } \ 00175 } 00176 00177 /* 00178 * INCR_RES increments the index for the result array, and calculates 00179 * the offset into the result array. 00180 */ 00181 00182 #define INCR_RES() \ 00183 res_indx[0]++; \ 00184 if (res_indx[0] < res_ext[0]) \ 00185 res_off[0] = res_indx[0] * res_strd[0]; \ 00186 else { \ 00187 res_indx[0] = 0; \ 00188 res_off[0] = 0; \ 00189 res_indx[1]++; \ 00190 if (res_indx[1] < res_ext[1]) \ 00191 res_off[1] = res_indx[1] * res_strd[1]; \ 00192 else { \ 00193 res_indx[1] = 0; \ 00194 res_off[1] = 0; \ 00195 res_indx[2]++; \ 00196 if (res_indx[2] < res_ext[2]) \ 00197 res_off[2] = res_indx[2] * res_strd[2]; \ 00198 else { \ 00199 res_indx[2] = 0; \ 00200 res_off[2] = 0; \ 00201 res_indx[3]++; \ 00202 if (res_indx[3] < res_ext[3]) \ 00203 res_off[3] = res_indx[3] * res_strd[3]; \ 00204 else { \ 00205 res_indx[3] = 0; \ 00206 res_off[3] = 0; \ 00207 res_indx[4]++; \ 00208 if (res_indx[4] < res_ext[4]) \ 00209 res_off[4] = res_indx[4] * res_strd[4]; \ 00210 else { \ 00211 res_indx[4] = 0; \ 00212 res_off[4] = 0; \ 00213 res_indx[5]++; \ 00214 if (res_indx[5] < res_ext[5]) \ 00215 res_off[5] = res_indx[5] * res_strd[5]; \ 00216 else { \ 00217 res_indx[5] = 0; \ 00218 res_off[5] = 0; \ 00219 res_indx[6]++; \ 00220 if (res_indx[6] < res_ext[6]) \ 00221 res_off[6] = res_indx[6] * res_strd[6]; \ 00222 } \ 00223 } \ 00224 } \ 00225 } \ 00226 } \ 00227 } 00228 00229 #if defined _F_INT4 && defined _F_INT8 00230 #define VALUE(size, ptr) \ 00231 ((int) (size == 64 ? (*((_f_int8 *) (ptr))) : (*((_f_int4 *) (ptr))))) 00232 #else 00233 #define VALUE(size, ptr) ((int) (*((_f_int *) (ptr)))) 00234 #endif 00235 00236 #ifdef _UNICOS 00237 #pragma _CRI duplicate _RESHAPE as RESHAPE@ 00238 #endif 00239 void 00240 _RESHAPE ( DopeVectorType *result, 00241 DopeVectorType *source, 00242 DopeVectorType *shape, 00243 DopeVectorType *pad, 00244 DopeVectorType *order) 00245 00246 { 00247 int *sh; /* ptr to shape array */ 00248 char *cs; /* char ptr to source array */ 00249 char *cr; /* char ptr to result array */ 00250 char *cp; /* char ptr to pad array */ 00251 char * restrict cptr1; /* char */ 00252 char * restrict cptr2; /* char */ 00253 char * restrict cptr3; /* char */ 00254 _f_int8 * restrict uptr1; /* 64-bit */ 00255 _f_int8 * restrict uptr2; /* 64-bit */ 00256 _f_int8 * restrict uptr3; /* 64-bit */ 00257 _f_int * restrict fptr1; /* def kind int */ 00258 _f_int * restrict fptr2; /* def kind int */ 00259 _f_int * restrict fptr3; /* def kind int */ 00260 _f_real16 * restrict dptr1; /* 128-bit */ 00261 _f_real16 * restrict dptr2; /* 128-bit */ 00262 _f_real16 * restrict dptr3; /* 128-bit */ 00263 #ifdef _F_COMP16 00264 dblcmplx * restrict xptr1; /* 256-bit */ 00265 dblcmplx * restrict xptr2; /* 256-bit */ 00266 dblcmplx * restrict xptr3; /* 256-bit */ 00267 #endif 00268 _f_int4 * restrict hptr1; /* 32-bit */ 00269 _f_int4 * restrict hptr2; /* 32-bit */ 00270 _f_int4 * restrict hptr3; /* 32-bit */ 00271 void * restrict sptr; /* ptr to src */ 00272 void * restrict rptr; /* ptr to res */ 00273 void * restrict pptr; /* ptr to pad */ 00274 _f_int4 * restrict optr4; /* ptr to odr */ 00275 _f_int8 * restrict optr8; /* ptr to odr */ 00276 _f_int4 * restrict shptr4; /* ptr to odr */ 00277 _f_int8 * restrict shptr8; /* ptr to odr */ 00278 long sindx; /* source index */ 00279 long rindx; /* result index */ 00280 long shindx; /* shape index */ 00281 long pindx; /* pad index */ 00282 long oindx; /* order index */ 00283 _f_int bucketsize; /* size of each data element */ 00284 long nbytes; /* # of bytes in result array */ 00285 _f_int bytealligned; /* byte aligned flag */ 00286 _f_int type; /* data type */ 00287 _f_int subtype; /* sub-type */ 00288 _f_int arithmetic; /* arithmetic data type */ 00289 _f_int rank; /* rank of result matrix */ 00290 long src_ext[MAXDIM]; /* extents for source */ 00291 long src_strd[MAXDIM]; /* stride for source */ 00292 long src_off[MAXDIM]; /* source offset */ 00293 long src_indx[MAXDIM]; /* source index */ 00294 long res_ext[MAXDIM]; /* extents for result */ 00295 long res_strd[MAXDIM]; /* stride for result */ 00296 long res_off[MAXDIM]; /* result offset */ 00297 long res_indx[MAXDIM]; /* result index */ 00298 long pad_ext[MAXDIM]; /* extents for pad */ 00299 long pad_strd[MAXDIM]; /* stride for pad */ 00300 long pad_off[MAXDIM]; /* pad offset */ 00301 long pad_indx[MAXDIM]; /* pad index */ 00302 _f_int src_rank; /* rank of source matrix */ 00303 _f_int res_rank; /* rank of result matrix */ 00304 _f_int pad_rank; /* rank of pad matrix */ 00305 long shp_strd; /* stride for shape */ 00306 long ord_strd; /* stride for order */ 00307 long tot_ext; /* total extent counter */ 00308 long tot_src; 00309 long tot_shp; 00310 long total; 00311 _f_int early_src; 00312 _f_int early_pad; 00313 _f_int early_shp; 00314 _f_int early_ord; 00315 long i, j, k; 00316 long shape_len; 00317 long order_len; 00318 _f_int shbucket; 00319 _f_int obucket; 00320 _f_int order_chk[7]; 00321 long shp_vals[MAXDIM]; 00322 00323 /* Set type and rank variables */ 00324 00325 type = source->type_lens.type; 00326 rank = shape->dimension[0].extent; 00327 shape_len = shape->base_addr.a.el_len; 00328 00329 /* 00330 * If the extent for any index of any array is 0, we can exit now without 00331 * having to spend time doing any work. 00332 * 00333 * If any of the indices passed in the shape array are less than 0, 00334 * call the error routine. 00335 */ 00336 00337 if (shape->dimension[0].extent == 0) { 00338 _lerror (_LELVL_ABORT, FESHPSZZ); 00339 } 00340 /* 00341 * Set up an array of the values of shape. This will be done so that 00342 * they can be done once, and referenced wherever they are needed. 00343 */ 00344 00345 if (shape->type_lens.int_len == 64) { 00346 shptr8 = (_f_int8 *) shape->base_addr.a.ptr; 00347 #ifdef _UNICOS 00348 #pragma _CRI shortloop 00349 #endif 00350 for (i = 0; i < rank; i++) { 00351 shindx = i * (shape->dimension[0].stride_mult / 00352 (shape->type_lens.int_len / BITS_PER_WORD)); 00353 shp_vals[i] = VALUE(shape_len,(shptr8 + shindx)); 00354 } 00355 } else { 00356 shptr4 = (_f_int4 *) shape->base_addr.a.ptr; 00357 shbucket = shape->type_lens.int_len / BITS_PER_WORD; 00358 if (shbucket == 0) 00359 shbucket = 1; 00360 #ifdef _UNICOS 00361 #pragma _CRI shortloop 00362 #endif 00363 for (i = 0; i < rank; i++) { 00364 shindx = i * (shape->dimension[0].stride_mult / shbucket); 00365 shp_vals[i] = VALUE(shape_len,(shptr4 + shindx)); 00366 } 00367 } 00368 00369 #ifdef _UNICOS 00370 #pragma _CRI shortloop 00371 #endif 00372 for (i = 0; i < rank; i++) { 00373 if (shp_vals[i] < 0) 00374 _lerror (_LELVL_ABORT, FERSHNEG); 00375 } 00376 early_shp = 0; 00377 #ifdef _UNICOS 00378 #pragma _CRI shortloop 00379 #endif 00380 for (i = 0; i < rank; i++) { 00381 if (shp_vals[i] == 0) 00382 early_shp = 1; 00383 } 00384 00385 early_src = 0; 00386 #ifdef _UNICOS 00387 #pragma _CRI shortloop 00388 #endif 00389 for (i = 0; i < source->n_dim; i++) { 00390 if (source->dimension[i].extent == 0) { 00391 early_src = 1; 00392 } 00393 } 00394 00395 early_pad = 0; 00396 if (pad) { 00397 #ifdef _UNICOS 00398 #pragma _CRI shortloop 00399 #endif 00400 for (i = 0; i < pad->n_dim; i++) { 00401 if (pad->dimension[i].extent == 0) 00402 early_pad = 1; 00403 } 00404 } 00405 00406 early_ord = 0; 00407 if (order) { 00408 if (order->dimension[0].extent == 0) 00409 early_ord = 1; 00410 } 00411 00412 /* 00413 * Initialize every array element to 0. 00414 */ 00415 00416 #ifdef _UNICOS 00417 #pragma _CRI shortloop 00418 #endif 00419 for (i = 0; i < MAXDIM; i++) { 00420 src_ext[i] = 0; 00421 src_strd[i] = 0; 00422 src_off[i] = 0; 00423 src_indx[i] = 0; 00424 res_ext[i] = 0; 00425 res_strd[i] = 0; 00426 res_off[i] = 0; 00427 res_indx[i] = 0; 00428 pad_ext[i] = 0; 00429 pad_strd[i] = 0; 00430 pad_off[i] = 0; 00431 pad_indx[i] = 0; 00432 } 00433 00434 /* Size calculation is based on variable type */ 00435 00436 switch (type) { 00437 case DVTYPE_ASCII : 00438 bytealligned = 1; 00439 bucketsize = _fcdlen(source->base_addr.charptr); /* bytes */ 00440 subtype = DVSUBTYPE_CHAR; 00441 arithmetic = 0; 00442 break; 00443 case DVTYPE_DERIVEDBYTE : 00444 bytealligned = 1; 00445 bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE; 00446 subtype = DVSUBTYPE_CHAR; 00447 arithmetic = 0; 00448 break; 00449 case DVTYPE_DERIVEDWORD : 00450 bytealligned = 0; 00451 bucketsize = source->base_addr.a.el_len / BITS_PER_WORD; 00452 subtype = DVSUBTYPE_DERIVED; 00453 arithmetic = 0; 00454 break; 00455 default : 00456 bytealligned = 0; 00457 bucketsize = source->type_lens.int_len / BITS_PER_WORD; 00458 if (source->type_lens.int_len == 64) { 00459 subtype = DVSUBTYPE_BIT64; 00460 } else if (source->type_lens.int_len == 32) { 00461 subtype = DVSUBTYPE_BIT32; 00462 bucketsize = 1; 00463 #ifdef _F_COMP16 00464 } else if (source->type_lens.int_len == 256) { 00465 subtype = DVSUBTYPE_BIT256; 00466 #endif 00467 } else { 00468 subtype = DVSUBTYPE_BIT128; 00469 } 00470 arithmetic = 1; 00471 } 00472 00473 /* If not allocated, set up dope vector for result */ 00474 00475 if (!result->assoc) { 00476 result->base_addr.a.ptr = (void *) NULL; 00477 result->n_dim = rank; 00478 result->orig_base = 0; 00479 result->orig_size = 0; 00480 00481 /* Determine size of space to allocate */ 00482 00483 if (!bytealligned) 00484 nbytes = bucketsize * BYTES_PER_WORD; 00485 else 00486 nbytes = bucketsize; 00487 shindx = 0; 00488 if (shape->type_lens.int_len == 64) 00489 shptr8 = (_f_int8 *) shape->base_addr.a.ptr; 00490 else 00491 shptr4 = (_f_int4 *) shape->base_addr.a.ptr; 00492 shbucket = shape->type_lens.int_len / BITS_PER_WORD; 00493 if (shbucket == 0) 00494 shbucket = 1; 00495 shp_strd = shape->dimension[0].stride_mult / shbucket; 00496 tot_ext = 1; 00497 for (i = 0; i < rank; i++) { 00498 result->dimension[i].extent = shp_vals[i]; 00499 result->dimension[i].low_bound = 1; 00500 result->dimension[i].stride_mult = tot_ext * bucketsize; 00501 tot_ext *= shp_vals[i]; 00502 nbytes *= shp_vals[i]; 00503 } 00504 if (nbytes > 0 && early_ord != 1) { 00505 result->base_addr.a.ptr = (void *) malloc (nbytes); 00506 if (result->base_addr.a.ptr == NULL) 00507 _lerror (_LELVL_ABORT, FENOMEMY); 00508 result->assoc = 1; 00509 } else 00510 result->base_addr.a.ptr = (void *) NULL; 00511 00512 result->base_addr.a.el_len = source->base_addr.a.el_len; 00513 if (type == DVTYPE_ASCII) { 00514 cr = (char *) result->base_addr.a.ptr; 00515 result->base_addr.charptr = _cptofcd (cr, bucketsize); 00516 } 00517 result->orig_base = (void *) result->base_addr.a.ptr; 00518 result->orig_size = nbytes * BITS_PER_BYTE; 00519 } 00520 00521 /* Check to see if we can exit early */ 00522 00523 if (early_shp || early_ord) 00524 return; 00525 if (early_src && (!pad || early_pad)) 00526 _lerror (_LELVL_ABORT, FERSHNPD); 00527 00528 /* Set up non-dopevector variables to hold index information */ 00529 00530 for (i = 0, tot_src = 1; i < source->n_dim; i++) { 00531 src_ext[i] = source->dimension[i].extent; 00532 if (bucketsize > 1 && arithmetic) { 00533 src_strd[i] = source->dimension[i].stride_mult / bucketsize; 00534 } else { 00535 src_strd[i] = source->dimension[i].stride_mult; 00536 } 00537 tot_src *= src_ext[i]; 00538 } 00539 if (order == NULL) { 00540 for (i = 0; i < result->n_dim; i++) { 00541 res_ext[i] = result->dimension[i].extent; 00542 if (bucketsize > 1 && arithmetic) { 00543 res_strd[i] = result->dimension[i].stride_mult / bucketsize; 00544 } else { 00545 res_strd[i] = result->dimension[i].stride_mult; 00546 } 00547 } 00548 } else { 00549 oindx = 0; 00550 obucket = order->type_lens.int_len / BITS_PER_WORD; 00551 if (obucket == 0) 00552 obucket = 1; 00553 ord_strd = order->dimension[0].stride_mult / obucket; 00554 if (order->type_lens.int_len == 64) 00555 optr8 = (_f_int8 *) order->base_addr.a.ptr; 00556 else 00557 optr4 = (_f_int4 *) order->base_addr.a.ptr; 00558 order_len = order->base_addr.a.el_len; 00559 for (i = 0; i < result->n_dim; i++) 00560 order_chk[i] = -1; 00561 for (i = 0; i < result->n_dim; i++) { 00562 if (order->type_lens.int_len == 64) 00563 j = VALUE(order_len, (optr8 + oindx)) - 1; 00564 else 00565 j = VALUE(order_len, (optr4 + oindx)) - 1; 00566 if (j < 0 || j >= result->n_dim) 00567 _lerror (_LELVL_ABORT, FEBDORDR); 00568 order_chk[j] = 1; 00569 oindx += ord_strd; 00570 res_ext[i] = result->dimension[j].extent; 00571 if (bucketsize > 1 && arithmetic) { 00572 res_strd[i] = result->dimension[j].stride_mult / bucketsize; 00573 } else { 00574 res_strd[i] = result->dimension[j].stride_mult; 00575 } 00576 } 00577 for (i = 0; i < result->n_dim; i++) { 00578 if (order_chk[i] != 1) { 00579 _lerror (_LELVL_ABORT, FEBDORDR); 00580 } 00581 } 00582 } 00583 if (pad && !early_pad) { 00584 for (i = 0; i < pad->n_dim; i++) { 00585 pad_ext[i] = pad->dimension[i].extent; 00586 if (bucketsize > 1 && arithmetic) { 00587 pad_strd[i] = pad->dimension[i].stride_mult / bucketsize; 00588 } else { 00589 pad_strd[i] = pad->dimension[i].stride_mult; 00590 } 00591 } 00592 } else { 00593 tot_shp = 1; 00594 for (i = 0, tot_shp = 1; i < shape->dimension[0].extent; i++) { 00595 tot_shp *= shp_vals[i]; 00596 } 00597 if (tot_shp > tot_src) { 00598 _lerror (_LELVL_ABORT, FERSHNPD); 00599 } 00600 } 00601 00602 00603 /* Initialize pointers to data areas */ 00604 00605 if (!bytealligned) { 00606 sptr = (void *) source->base_addr.a.ptr; 00607 rptr = (void *) result->base_addr.a.ptr; 00608 if (pad) 00609 pptr = (void *) pad->base_addr.a.ptr; 00610 } else { 00611 if (type == DVTYPE_ASCII) { 00612 cs = _fcdtocp (source->base_addr.charptr); 00613 cr = _fcdtocp (result->base_addr.charptr); 00614 if (pad) 00615 cp = _fcdtocp (pad->base_addr.charptr); 00616 } else { 00617 cs = (char *) source->base_addr.a.ptr; 00618 cr = (char *) result->base_addr.a.ptr; 00619 if (pad) 00620 cp = (char *) pad->base_addr.a.ptr; 00621 } 00622 } 00623 00624 /* Initialize index counter variables */ 00625 00626 for (i = 0; i < MAXDIM; i++) { 00627 res_off[i] = 0; 00628 src_off[i] = 0; 00629 pad_off[i] = 0; 00630 res_indx[i] = 0; 00631 src_indx[i] = 0; 00632 pad_indx[i] = 0; 00633 } 00634 00635 /* Initialize rank scalars */ 00636 00637 src_rank = source->n_dim; 00638 res_rank = result->n_dim; 00639 if (pad) 00640 pad_rank = pad->n_dim; 00641 else 00642 pad_rank = 0; 00643 00644 /* Determine how many elements from source need to be moved */ 00645 00646 if (tot_src < tot_ext) 00647 total = tot_src; 00648 else 00649 total = tot_ext; 00650 00651 /* 00652 * RESHAPE cannot be broken down by rank, because any rank can be put 00653 * into any other rank. Therefore, the only breakdown has been made by 00654 * data type. Since the values of the source array are not used, the 00655 * breakdown is made by element size. 32-bit data is handled as one 00656 * type, 64-bit data is another type, etc. Character and derived byte 00657 * are handled as one type, and derived word is a separate type. 00658 * 00659 * The work is done by first determining how many source elements need 00660 * to be moved. This value is used as the loop counter for the first 00661 * loop. Inside the loop, the index values for the source and result 00662 * elements are calculated, and that value is moved. Then, the new 00663 * values for source and result are calculated. When all of the source 00664 * elements have been moved, determine whether any pad values must be 00665 * used. If so, use a similar loop as the first one, but with the pad 00666 * array rather than the source array. 00667 */ 00668 00669 switch (subtype) { 00670 00671 case DVSUBTYPE_BIT64 : 00672 uptr1 = (_f_int8 *) sptr; 00673 uptr2 = (_f_int8 *) rptr; 00674 for (i = 0; i < total; i++) { /* move source */ 00675 ADD_INDEX(sindx,src_off,src_rank); 00676 ADD_INDEX(rindx,res_off,res_rank); 00677 uptr2[rindx] = uptr1[sindx]; 00678 INCR_SRC(); 00679 INCR_RES(); 00680 } 00681 if (tot_src < tot_ext) { 00682 uptr3 = (_f_int8 *) pptr; 00683 for ( ; i < tot_ext; i++) { /* move pad */ 00684 ADD_INDEX(pindx,pad_off,pad_rank); 00685 ADD_INDEX(rindx,res_off,res_rank); 00686 uptr2[rindx] = uptr3[pindx]; 00687 INCR_PAD(); 00688 INCR_RES(); 00689 } 00690 } 00691 break; 00692 00693 case DVSUBTYPE_BIT32 : 00694 hptr1 = (_f_int4 *) sptr; 00695 hptr2 = (_f_int4 *) rptr; 00696 for (i = 0; i < total; i++) { /* move source */ 00697 ADD_INDEX(sindx,src_off,src_rank); 00698 ADD_INDEX(rindx,res_off,res_rank); 00699 hptr2[rindx] = hptr1[sindx]; 00700 INCR_SRC(); 00701 INCR_RES(); 00702 } 00703 if (tot_src < tot_ext) { 00704 hptr3 = (_f_int4 *) pptr; 00705 for ( ; i < tot_ext; i++) { /* move pad */ 00706 ADD_INDEX(pindx,pad_off,pad_rank); 00707 ADD_INDEX(rindx,res_off,res_rank); 00708 hptr2[rindx] = hptr3[pindx]; 00709 INCR_PAD(); 00710 INCR_RES(); 00711 } 00712 } 00713 break; 00714 00715 case DVSUBTYPE_BIT128 : 00716 dptr1 = (_f_real16 *) sptr; 00717 dptr2 = (_f_real16 *) rptr; 00718 for (i = 0; i < total; i++) { /* move source */ 00719 ADD_INDEX(sindx,src_off,src_rank); 00720 ADD_INDEX(rindx,res_off,res_rank); 00721 dptr2[rindx] = dptr1[sindx]; 00722 INCR_SRC(); 00723 INCR_RES(); 00724 } 00725 if (tot_src < tot_ext) { 00726 dptr3 = (_f_real16 *) pptr; 00727 for ( ; i < tot_ext; i++) { /* move pad */ 00728 ADD_INDEX(pindx,pad_off,pad_rank); 00729 ADD_INDEX(rindx,res_off,res_rank); 00730 dptr2[rindx] = dptr3[pindx]; 00731 INCR_PAD(); 00732 INCR_RES(); 00733 } 00734 } 00735 break; 00736 00737 case DVSUBTYPE_CHAR : 00738 for (i = 0; i < total; i++) { /* move source */ 00739 ADD_INDEX(sindx,src_off,src_rank); 00740 ADD_INDEX(rindx,res_off,res_rank); 00741 cptr1 = (char *) cs + sindx; 00742 cptr2 = (char *) cr + rindx; 00743 (void) memcpy (cptr2, cptr1, bucketsize); 00744 INCR_SRC(); 00745 INCR_RES(); 00746 } 00747 if (tot_src < tot_ext) { 00748 for ( ; i < tot_ext; i++) { 00749 ADD_INDEX(pindx,pad_off,pad_rank); 00750 ADD_INDEX(rindx,res_off,res_rank); 00751 cptr2 = (char *) cr + rindx; 00752 cptr3 = (char *) cp + pindx; 00753 (void) memcpy (cptr2, cptr3, bucketsize); 00754 INCR_PAD(); 00755 INCR_RES(); 00756 } 00757 } 00758 break; 00759 00760 case DVSUBTYPE_DERIVED : 00761 for (i = 0; i < bucketsize; i++) { 00762 fptr1 = (_f_int *) sptr + i; 00763 fptr2 = (_f_int *) rptr + i; 00764 #ifdef _UNICOS 00765 #pragma _CRI shortloop 00766 #endif 00767 for (j = 0; j < MAXDIM; j++) { 00768 src_indx[j] = 0; 00769 src_off[j] = 0; 00770 } 00771 #ifdef _UNICOS 00772 #pragma _CRI shortloop 00773 #endif 00774 for (j = 0; j < MAXDIM; j++) { 00775 res_indx[j] = 0; 00776 res_off[j] = 0; 00777 } 00778 for (j = 0; j < total; j++) { /* move source */ 00779 ADD_INDEX(sindx,src_off,src_rank) 00780 ADD_INDEX(rindx,res_off,res_rank) 00781 fptr2[rindx] = fptr1[sindx]; 00782 INCR_SRC(); 00783 INCR_RES(); 00784 } 00785 if (tot_src < tot_ext) { 00786 fptr3 = (_f_int *) pptr + i; 00787 #ifdef _UNICOS 00788 #pragma _CRI shortloop 00789 #endif 00790 for (k = 0; k < pad_rank; k++) { 00791 pad_indx[k] = 0; 00792 pad_off[k] = 0; 00793 } 00794 for ( ; j < tot_ext; j++) { /* move pad */ 00795 ADD_INDEX(pindx,pad_off,pad_rank); 00796 ADD_INDEX(rindx,res_off,res_rank); 00797 fptr2[rindx] = fptr3[pindx]; 00798 INCR_PAD(); 00799 INCR_RES(); 00800 } 00801 } 00802 } 00803 break; 00804 00805 #ifdef _F_COMP16 00806 case DVSUBTYPE_BIT256 : 00807 xptr1 = (dblcmplx *) sptr; 00808 xptr2 = (dblcmplx *) rptr; 00809 for (i = 0; i < total; i++) { /* move source */ 00810 ADD_INDEX(sindx,src_off,src_rank); 00811 ADD_INDEX(rindx,res_off,res_rank); 00812 xptr2[rindx].re = xptr1[sindx].re; 00813 xptr2[rindx].im = xptr1[sindx].im; 00814 INCR_SRC(); 00815 INCR_RES(); 00816 } 00817 if (tot_src < tot_ext) { 00818 xptr3 = (dblcmplx *) pptr; 00819 for ( ; i < tot_ext; i++) { /* move pad */ 00820 ADD_INDEX(pindx,pad_off,pad_rank); 00821 ADD_INDEX(rindx,res_off,res_rank); 00822 xptr2[rindx].re = xptr3[pindx].re; 00823 xptr2[rindx].im = xptr3[pindx].im; 00824 INCR_PAD(); 00825 INCR_RES(); 00826 } 00827 } 00828 break; 00829 #endif 00830 00831 default : 00832 _lerror (_LELVL_ABORT, FEINTDTY); 00833 } 00834 }