Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/transfer.c 92.1 07/07/99 15:52:02" 00038 00039 #include <liberrno.h> 00040 #include <stddef.h> 00041 #include <cray/dopevec.h> 00042 #include <cray/portdefs.h> 00043 #include "arraydefs.h" 00044 00045 #if defined _F_INT4 && defined _ADDR64 00046 #define BITS_PER_HALFWORD (BITS_PER_WORD / 2) 00047 #define BYTES_PER_HALFWORD (BYTES_PER_WORD / 2) 00048 #endif 00049 00050 #define CALC_SINDX() \ 00051 if (src_rank == 1) \ 00052 sindx = src_dim[0] * src_strd[0]; \ 00053 else if (src_rank == 2) \ 00054 sindx = src_dim[0] * src_strd[0] + \ 00055 src_dim[1] * src_strd[1]; \ 00056 else if (src_rank == 3) \ 00057 sindx = src_dim[0] * src_strd[0] + \ 00058 src_dim[1] * src_strd[1] + \ 00059 src_dim[2] * src_strd[2]; \ 00060 else if (src_rank == 4) \ 00061 sindx = src_dim[0] * src_strd[0] + \ 00062 src_dim[1] * src_strd[1] + \ 00063 src_dim[2] * src_strd[2] + \ 00064 src_dim[3] * src_strd[3]; \ 00065 else if (src_rank == 5) \ 00066 sindx = src_dim[0] * src_strd[0] + \ 00067 src_dim[1] * src_strd[1] + \ 00068 src_dim[2] * src_strd[2] + \ 00069 src_dim[3] * src_strd[3] + \ 00070 src_dim[4] * src_strd[4]; \ 00071 else if (src_rank == 6) \ 00072 sindx = src_dim[0] * src_strd[0] + \ 00073 src_dim[1] * src_strd[1] + \ 00074 src_dim[2] * src_strd[2] + \ 00075 src_dim[3] * src_strd[3] + \ 00076 src_dim[4] * src_strd[4] + \ 00077 src_dim[5] * src_strd[5]; \ 00078 else \ 00079 sindx = src_dim[0] * src_strd[0] + \ 00080 src_dim[1] * src_strd[1] + \ 00081 src_dim[2] * src_strd[2] + \ 00082 src_dim[3] * src_strd[3] + \ 00083 src_dim[4] * src_strd[4] + \ 00084 src_dim[5] * src_strd[5] + \ 00085 src_dim[6] * src_strd[6]; 00086 00087 #define INCR_SRC() \ 00088 src_dim[0]++; \ 00089 if (src_dim[0] == src_ext[0]) { \ 00090 src_dim[0] = 0; \ 00091 src_dim[1]++; \ 00092 if (src_dim[1] == src_ext[1]) { \ 00093 src_dim[1] = 0; \ 00094 src_dim[2]++; \ 00095 if (src_dim[2] == src_ext[2]) { \ 00096 src_dim[2] = 0; \ 00097 src_dim[3]++; \ 00098 if (src_dim[3] == src_ext[3]) { \ 00099 src_dim[3] = 0; \ 00100 src_dim[4]++; \ 00101 if (src_dim[4] == src_ext[4]) { \ 00102 src_dim[4] = 0; \ 00103 src_dim[5]++; \ 00104 if (src_dim[5] == src_ext[5]) { \ 00105 src_dim[5] = 0; \ 00106 src_dim[6]++; \ 00107 } \ 00108 } \ 00109 } \ 00110 } \ 00111 } \ 00112 } 00113 00114 /* 00115 * TRANSFER returns a result with the physical representation identical 00116 * to that of the source but interpreted with the type of the mold 00117 * argument. 00118 */ 00119 00120 #ifdef _UNICOS 00121 #pragma _CRI duplicate _TRANSFER as TRANSFER_@ 00122 #endif 00123 void 00124 _TRANSFER (DopeVectorType * result, 00125 DopeVectorType * source, 00126 DopeVectorType * mold, 00127 int *siz) 00128 { 00129 void __transfer(); 00130 __transfer (result, source, mold, siz, 0); 00131 } 00132 00133 #if defined(_WORD32) || defined(_MIPSEB) 00134 _f_int 00135 _TRANSFER1_0_4 (DopeVectorType *source, 00136 DopeVectorType *mold, 00137 int *siz) 00138 { 00139 void __transfer(); 00140 DopeVectorType result, *res_ptr; 00141 00142 res_ptr = (DopeVectorType *) &result; 00143 res_ptr->assoc = 0; 00144 res_ptr->base_addr.a.el_len = mold->base_addr.a.el_len; 00145 res_ptr->ptr_alloc = 0; 00146 res_ptr->p_or_a = NOT_P_OR_A; 00147 res_ptr->n_dim = 0; 00148 res_ptr->type_lens.type = mold->type_lens.type; 00149 res_ptr->type_lens.dpflag = 0; 00150 res_ptr->type_lens.kind_or_star = DVD_DEFAULT; 00151 res_ptr->type_lens.int_len = mold->type_lens.int_len; 00152 res_ptr->type_lens.dec_len = 0; 00153 res_ptr->orig_base = (_f_int4 *) NULL; 00154 res_ptr->orig_size = 0; 00155 __transfer (res_ptr, source, mold, siz, 1); 00156 return(*(_f_int *) res_ptr->base_addr.a.ptr); 00157 } 00158 #endif 00159 00160 #if defined(_WORD32) || defined(_MIPSEB) 00161 typedef _f_int RETURN_TYPE; 00162 #else 00163 typedef _f_int RETURN_TYPE; 00164 #endif 00165 00166 #ifdef _UNICOS 00167 #pragma _CRI duplicate _TRANSFER1_0 as TRANSFER1_0@ 00168 #endif 00169 RETURN_TYPE 00170 _TRANSFER1_0 (DopeVectorType *source, 00171 DopeVectorType *mold, 00172 int *siz) 00173 { 00174 void __transfer(); 00175 DopeVectorType result, *res_ptr; 00176 00177 res_ptr = (DopeVectorType *) &result; 00178 res_ptr->assoc = 0; 00179 res_ptr->base_addr.a.el_len = mold->base_addr.a.el_len; 00180 res_ptr->ptr_alloc = 0; 00181 res_ptr->p_or_a = NOT_P_OR_A; 00182 res_ptr->n_dim = 0; 00183 res_ptr->type_lens.type = mold->type_lens.type; 00184 res_ptr->type_lens.dpflag = 0; 00185 res_ptr->type_lens.kind_or_star = DVD_DEFAULT; 00186 res_ptr->type_lens.int_len = mold->type_lens.int_len; 00187 res_ptr->type_lens.dec_len = 0; 00188 res_ptr->orig_base = (_f_int8 *) NULL; 00189 res_ptr->orig_size = 0; 00190 __transfer (res_ptr, source, mold, siz, 1); 00191 return(*(RETURN_TYPE *) res_ptr->base_addr.a.ptr); 00192 } 00193 00194 #ifdef _UNICOS 00195 #pragma _CRI duplicate _TRANSFER2_0 as TRANSFER2_0@ 00196 #endif 00197 _f_dble 00198 _TRANSFER2_0 (DopeVectorType *source, 00199 DopeVectorType *mold, 00200 int *siz) 00201 { 00202 void __transfer(); 00203 DopeVectorType result, *res_ptr; 00204 00205 res_ptr = (DopeVectorType *) &result; 00206 res_ptr->assoc = 0; 00207 res_ptr->base_addr.a.el_len = mold->base_addr.a.el_len; 00208 res_ptr->ptr_alloc = 0; 00209 res_ptr->p_or_a = NOT_P_OR_A; 00210 res_ptr->n_dim = 0; 00211 res_ptr->type_lens.type = mold->type_lens.type; 00212 res_ptr->type_lens.dpflag = 0; 00213 res_ptr->type_lens.kind_or_star = DVD_DEFAULT; 00214 res_ptr->type_lens.int_len = mold->type_lens.int_len; 00215 res_ptr->type_lens.dec_len = 0; 00216 res_ptr->orig_base = (_f_dble *) NULL; 00217 res_ptr->orig_size = 0; 00218 __transfer (res_ptr, source, mold, siz, 2); 00219 return(*(_f_dble *) res_ptr->base_addr.a.ptr); 00220 } 00221 00222 #ifdef _F_COMP16 00223 #ifdef _UNICOS 00224 #pragma _CRI duplicate _TRANSFER4_0 as TRANSFER4_0@ 00225 #endif 00226 dblcmplx 00227 _TRANSFER4_0 (DopeVectorType *source, 00228 DopeVectorType *mold, 00229 int *siz) 00230 { 00231 void __transfer(); 00232 DopeVectorType result, *res_ptr; 00233 00234 res_ptr = (DopeVectorType *) &result; 00235 res_ptr->assoc = 0; 00236 res_ptr->base_addr.a.el_len = mold->base_addr.a.el_len; 00237 res_ptr->ptr_alloc = 0; 00238 res_ptr->p_or_a = NOT_P_OR_A; 00239 res_ptr->n_dim = 0; 00240 res_ptr->type_lens.type = mold->type_lens.type; 00241 res_ptr->type_lens.dpflag = 0; 00242 res_ptr->type_lens.kind_or_star = DVD_DEFAULT; 00243 res_ptr->type_lens.int_len = mold->type_lens.int_len; 00244 res_ptr->type_lens.dec_len = 0; 00245 res_ptr->orig_base = (dblcmplx *) NULL; 00246 res_ptr->orig_size = 0; 00247 __transfer (res_ptr, source, mold, siz, 4); 00248 return(*(dblcmplx *) res_ptr->base_addr.a.ptr); 00249 } 00250 #endif 00251 00252 void 00253 __transfer (DopeVectorType * result, 00254 DopeVectorType * source, 00255 DopeVectorType * mold, 00256 int *siz, 00257 int scalar) 00258 { 00259 void * restrict sptr; /* source pointer */ 00260 void * restrict rptr; /* result pointer */ 00261 char * restrict cptr1; /* char */ 00262 char * restrict cptr2; /* char */ 00263 _f_int * restrict uptr1; /* unsigned */ 00264 _f_int * restrict uptr2; /* unsigned */ 00265 #if defined _F_INT4 && defined _ADDR64 00266 _f_int4 * restrict hptr1; /* halfword */ 00267 _f_int4 * restrict hptr2; /* halfword */ 00268 _f_int src_halfword; /* src halfword flag */ 00269 _f_int mld_halfword; /* mld halfword flag */ 00270 _f_int halfword; /* halfword flag */ 00271 _f_int mld_size_half; /* size in halfwords */ 00272 _f_int src_size_half; /* size in halfwords */ 00273 #endif 00274 char *cr; /* char ptr to result array */ 00275 char *cs; /* char ptr to source array */ 00276 _f_int bytealigned; /* byte aligned flag */ 00277 _f_int src_bytealigned; /* source alignment flag */ 00278 _f_int mld_bytealigned; /* mold alignment flag */ 00279 long nbytes; /* bytes to allocate */ 00280 long nwords; /* bytes to allocate */ 00281 long extent; /* extent counter */ 00282 _f_int mld_size; /* size of each data element */ 00283 _f_int mld_size_bytes; /* size of each data element */ 00284 _f_int src_size; /* size of each data element */ 00285 _f_int src_size_bytes; /* size of each data element */ 00286 long sindx; /* source index */ 00287 long rindx; /* result index */ 00288 _f_int scnt; /* source word count */ 00289 _f_int rcnt; /* result word count */ 00290 long res_dim; /* current result indices */ 00291 long res_strd; /* stride for each dimension */ 00292 long src_dim[MAXDIM]; /* current source indices */ 00293 long src_ext[MAXDIM]; /* extent for each dimension */ 00294 long src_strd[MAXDIM]; /* stride for each dimension */ 00295 long src_xtnt; /* extent of src array */ 00296 long mld_xtnt; /* extent of mold array */ 00297 _f_int src_rank; /* rank of source array */ 00298 long tot_src_bytes; /* bytes in source array */ 00299 long leftover; /* leftover bytes */ 00300 _f_int adjust; /* word to byte adjust flag */ 00301 _f_int early_exit; /* flag for early exit */ 00302 long i, j, k; /* index veriables */ 00303 00304 /* Determine whether byte, word or half-word aligned */ 00305 00306 bytealigned = 0; 00307 #if defined _F_INT4 && defined _ADDR64 00308 halfword = 0; 00309 #endif 00310 if (source->type_lens.type == DVTYPE_ASCII || 00311 source->type_lens.type == DVTYPE_DERIVEDBYTE) { 00312 bytealigned = 1; 00313 src_bytealigned = 1; 00314 } else { 00315 src_bytealigned = 0; 00316 #if defined _F_INT4 && defined _ADDR64 00317 if (source->type_lens.int_len == 32) { 00318 src_halfword = 1; 00319 halfword = 1; 00320 } else 00321 src_halfword = 0; 00322 #endif 00323 } 00324 if (mold->type_lens.type == DVTYPE_ASCII || 00325 mold->type_lens.type == DVTYPE_DERIVEDBYTE) { 00326 bytealigned = 1; 00327 mld_bytealigned = 1; 00328 } else { 00329 mld_bytealigned = 0; 00330 #if defined _F_INT4 && defined _ADDR64 00331 if (mold->type_lens.int_len == 32) { 00332 mld_halfword = 1; 00333 halfword = 1; 00334 } else 00335 mld_halfword = 0; 00336 #endif 00337 } 00338 00339 if (result->assoc) { 00340 if (!result->dimension[0].extent) 00341 return; 00342 } 00343 00344 /* 00345 * Initialize every array element to 0. 00346 */ 00347 00348 #ifdef _UNICOS 00349 #pragma _CRI shortloop 00350 #endif 00351 for (i = 0; i < MAXDIM; i++) { 00352 src_dim[i] = 0; 00353 src_ext[i] = 0; 00354 src_strd[i] = 0; 00355 } 00356 00357 /* 00358 * Determine size of each element, in bytes and words (where appropriate) 00359 * Mold and source types must be done separately. 00360 */ 00361 00362 switch (mold->type_lens.type) { 00363 case DVTYPE_ASCII : 00364 mld_size_bytes = _fcdlen (mold->base_addr.charptr); 00365 break; 00366 case DVTYPE_DERIVEDBYTE : 00367 mld_size_bytes = mold->base_addr.a.el_len / BITS_PER_BYTE; 00368 break; 00369 case DVTYPE_DERIVEDWORD : 00370 #if defined _F_INT4 && defined _ADDR64 00371 if (mld_halfword) { 00372 mld_size = mold->base_addr.a.el_len / BITS_PER_HALFWORD; 00373 mld_size_bytes = mld_size * BYTES_PER_HALFWORD; 00374 mld_size_half = mld_size; 00375 } else { 00376 mld_size = mold->base_addr.a.el_len / BITS_PER_WORD; 00377 mld_size_bytes = mld_size * BYTES_PER_WORD; 00378 if (src_halfword) 00379 mld_size_half = mld_size << 1; 00380 } 00381 #else 00382 mld_size = mold->base_addr.a.el_len / BITS_PER_WORD; 00383 mld_size_bytes = mld_size * BYTES_PER_WORD; 00384 #endif 00385 break; 00386 default : 00387 #if defined _F_INT4 && defined _ADDR64 00388 if (mld_halfword) { 00389 mld_size = mold->type_lens.int_len / BITS_PER_HALFWORD; 00390 mld_size_bytes = mld_size * BYTES_PER_HALFWORD; 00391 mld_size_half = mld_size; 00392 } else { 00393 mld_size = mold->type_lens.int_len / BITS_PER_WORD; 00394 mld_size_bytes = mld_size * BYTES_PER_WORD; 00395 if (src_halfword) 00396 mld_size_half = mld_size << 1; 00397 } 00398 #else 00399 mld_size = mold->type_lens.int_len / BITS_PER_WORD; 00400 mld_size_bytes = mld_size * BYTES_PER_WORD; 00401 #endif 00402 } 00403 switch (source->type_lens.type) { 00404 case DVTYPE_ASCII : 00405 src_size_bytes = _fcdlen (source->base_addr.charptr); 00406 break; 00407 case DVTYPE_DERIVEDBYTE : 00408 src_size_bytes = source->base_addr.a.el_len /BITS_PER_BYTE; 00409 break; 00410 case DVTYPE_DERIVEDWORD : 00411 #if defined _F_INT4 && defined _ADDR64 00412 if (src_halfword) { 00413 src_size = source->base_addr.a.el_len / BITS_PER_HALFWORD; 00414 src_size_bytes = src_size * BYTES_PER_HALFWORD; 00415 src_size_half = src_size; 00416 } else { 00417 src_size = source->base_addr.a.el_len / BITS_PER_WORD; 00418 src_size_bytes = src_size * BYTES_PER_WORD; 00419 if (mld_halfword) 00420 src_size_half = src_size << 1; 00421 } 00422 #else 00423 src_size = source->base_addr.a.el_len / BITS_PER_WORD; 00424 src_size_bytes = src_size * BYTES_PER_WORD; 00425 #endif 00426 break; 00427 default : 00428 #if defined _F_INT4 && defined _ADDR64 00429 if (src_halfword) { 00430 src_size = source->type_lens.int_len / BITS_PER_HALFWORD; 00431 src_size_bytes = src_size * BYTES_PER_HALFWORD; 00432 src_size_half = src_size; 00433 } else { 00434 src_size = source->type_lens.int_len / BITS_PER_WORD; 00435 src_size_bytes = src_size * BYTES_PER_WORD; 00436 if (mld_halfword) 00437 src_size_half = src_size << 1; 00438 } 00439 #else 00440 src_size = source->type_lens.int_len / BITS_PER_WORD; 00441 src_size_bytes = src_size * BYTES_PER_WORD; 00442 #endif 00443 } 00444 00445 /* Calculate total number of bytes in source */ 00446 00447 src_xtnt = 1; 00448 src_rank = source->n_dim; 00449 for (i = 0; i < src_rank; i++) 00450 src_xtnt *= source->dimension[i].extent; 00451 if (bytealigned) 00452 tot_src_bytes = src_xtnt * src_size_bytes; 00453 else 00454 tot_src_bytes = (src_xtnt * src_size_bytes); 00455 00456 /* 00457 * If size is specified, total number of bytes to be moved is set to 00458 * size * element size (in bytes). 00459 */ 00460 00461 if (siz) { /* size is specified */ 00462 nbytes = *siz * mld_size_bytes; 00463 00464 /* 00465 * If size is not specified, and mold is an array, calculate the least 00466 * number of bytes which will contain all of the source. If the total 00467 * size of the mold and source arrays is the same, that will be the 00468 * number of bytes moved. If the total size of the source array is a 00469 * multiple of the mold size, the number of bytes will be the total 00470 * source size, otherwise, calculate the size to be the minimum number 00471 * of mold elements needed to completely contain the entire source array. 00472 */ 00473 } else { 00474 if (mold->n_dim > 0) { /* mold is array */ 00475 if (mld_size_bytes == 0) 00476 nbytes = 0; 00477 else { 00478 if ((tot_src_bytes % mld_size_bytes) == 0) { 00479 nbytes = tot_src_bytes; 00480 } else { 00481 nbytes = ((tot_src_bytes / mld_size_bytes) + 1) * 00482 mld_size_bytes; 00483 } 00484 } 00485 00486 /* 00487 * If mold is scalar, result is scalar, and number of bytes is the 00488 * number of bytes in one element. 00489 */ 00490 } else { /* mold is scalar */ 00491 nbytes = mld_size_bytes; 00492 } 00493 } 00494 00495 /* 00496 * Determine if we can exit the routine early. This can be done if 00497 * either the source or the mold is a zero-sized array, the mold is 00498 * a zero-sized scalar, or the siz argument is illegal. The conditions 00499 * for an early exit are as follows: 00500 * 00501 * SIZ argument present 00502 * value of SIZ <= 0 00503 * SOURCE is array-valued, and one or more extents = 0 00504 * SOURCE is scalar, character oriented, and of size 0 00505 * MOLD is array-valued, and one or more extents = 0 00506 * MOLD is scalar, character oriented, and of size 0 00507 * SIZ argument is not present 00508 * SOURCE is array-valued, and one or more extents = 0 00509 * SOURCE is scalar, character oriented, and of size 0 00510 * 00511 * The following conditions are considered to be an error (FEBADMLD): 00512 * 00513 * SIZ is not present, SOURCE is non-0 sized, MOLD is array- 00514 * valued, character oriented, and of size 0 00515 */ 00516 00517 early_exit = 0; 00518 if (siz) { 00519 if (*siz <= 0) { 00520 result->base_addr.a.ptr = (void *) NULL; 00521 result->dimension[0].extent = 0; 00522 result->dimension[0].low_bound = 1; 00523 if (mld_bytealigned) 00524 result->dimension[0].stride_mult = mld_size_bytes; 00525 else 00526 result->dimension[0].stride_mult = mld_size; 00527 return; 00528 } 00529 if (source->n_dim > 0) { 00530 #ifdef _UNICOS 00531 #pragma _CRI shortloop 00532 #endif 00533 for (i = 0; i < source->n_dim; i++) { 00534 if (!source->dimension[i].extent) 00535 early_exit = 1; 00536 } 00537 } else if (src_bytealigned && src_size_bytes == 0) { 00538 early_exit = 1; 00539 } 00540 if (mold->n_dim > 0) { 00541 #ifdef _UNICOS 00542 #pragma _CRI shortloop 00543 #endif 00544 for (i = 0; i < mold->n_dim; i++) { 00545 if (!mold->dimension[i].extent) 00546 early_exit = 1; 00547 } 00548 } else if (mld_bytealigned && mld_size_bytes == 0) 00549 early_exit = 1; 00550 if (early_exit == 1) { 00551 result->dimension[0].extent = *siz; 00552 result->dimension[0].low_bound = 1; 00553 if (mold->type_lens.type == DVTYPE_ASCII) { 00554 result->base_addr.charptr = _cptofcd (NULL, mld_size_bytes); 00555 result->dimension[0].stride_mult = mld_size_bytes; 00556 } else if (mold->type_lens.type == DVTYPE_DERIVEDBYTE) { 00557 result->base_addr.a.ptr = (void *) NULL; 00558 result->dimension[0].stride_mult = mld_size_bytes; 00559 } else { 00560 result->base_addr.a.ptr = (void *) NULL; 00561 result->dimension[0].stride_mult = mld_size; 00562 } 00563 return; 00564 } 00565 } else { 00566 if (source->n_dim > 0) { 00567 #ifdef _UNICOS 00568 #pragma _CRI shortloop 00569 #endif 00570 for (i = 0; i < source->n_dim; i++) { 00571 if (!source->dimension[i].extent) 00572 early_exit = 1; 00573 } 00574 } else if (src_bytealigned && src_size_bytes == 0) { 00575 early_exit = 1; 00576 } 00577 if (early_exit == 1) { 00578 if (mold->type_lens.type == DVTYPE_ASCII) { 00579 result->base_addr.charptr = _cptofcd (NULL, mld_size_bytes); 00580 } else { 00581 result->base_addr.a.ptr = (void *) NULL; 00582 } 00583 if (mold->n_dim > 0) { 00584 if (mld_size_bytes > 0) 00585 result->dimension[0].extent = nbytes / mld_size_bytes; 00586 else 00587 result->dimension[0].extent = 0; 00588 result->dimension[0].low_bound = 1; 00589 if (mld_bytealigned) 00590 result->dimension[0].stride_mult = mld_size_bytes; 00591 else 00592 result->dimension[0].stride_mult = mld_size; 00593 } 00594 return; 00595 } 00596 if (mold->n_dim > 0) { 00597 if (mld_bytealigned && mld_size_bytes == 0) { 00598 _lerror (_LELVL_ABORT, FEBADMLD); 00599 } 00600 } else { 00601 if (mld_bytealigned && mld_size_bytes == 0) { 00602 result->base_addr.charptr = _cptofcd (NULL, 0); 00603 return; 00604 } 00605 } 00606 } 00607 00608 /* 00609 * If result is not associated, allocate space and set up dimension 00610 * information. 00611 */ 00612 00613 if (!result->assoc) { 00614 result->base_addr.a.ptr = (void *) NULL; 00615 result->orig_base = 0; 00616 result->orig_size = 0; 00617 if (siz) { 00618 result->dimension[0].extent = *siz; 00619 result->dimension[0].low_bound = 1; 00620 if (mld_bytealigned) 00621 result->dimension[0].stride_mult = mld_size_bytes; 00622 else 00623 result->dimension[0].stride_mult = mld_size; 00624 } else { 00625 if (mold->n_dim > 0) { 00626 result->dimension[0].extent = nbytes / mld_size_bytes; 00627 result->dimension[0].low_bound = 1; 00628 if (mld_bytealigned) 00629 result->dimension[0].stride_mult = mld_size_bytes; 00630 else 00631 result->dimension[0].stride_mult = mld_size; 00632 } 00633 } 00634 result->base_addr.a.ptr = (void *) malloc (nbytes); 00635 if (result->base_addr.a.ptr == NULL) 00636 _lerror (_LELVL_ABORT, FENOMEMY); 00637 result->assoc = 1; 00638 result->base_addr.a.el_len = mold->base_addr.a.el_len; 00639 if (mold->type_lens.type == DVTYPE_ASCII) { 00640 cr = (char *) result->base_addr.a.ptr; 00641 result->base_addr.charptr = _cptofcd (cr, mld_size_bytes); 00642 } 00643 result->orig_base = result->base_addr.a.ptr; 00644 result->orig_size = nbytes * BITS_PER_BYTE; 00645 } 00646 00647 /* 00648 * Set up source arrays containing dimension information. These 00649 * temp arrays will be byte/word based, depending on what type of 00650 * transfer will be done. They will not be strictly based on the 00651 * type of the source array. 00652 */ 00653 00654 if (src_rank > 0) { 00655 if (src_bytealigned || !bytealigned) { 00656 for (i = 0; i < src_rank; i++) { 00657 src_ext[i] = source->dimension[i].extent; 00658 src_strd[i] = source->dimension[i].stride_mult; 00659 src_dim[i] = 0; 00660 } 00661 } else { 00662 for (i = 0; i < src_rank; i++) { 00663 src_ext[i] = source->dimension[i].extent; 00664 src_strd[i] = 00665 source->dimension[i].stride_mult * BYTES_PER_WORD; 00666 src_dim[i] = 0; 00667 } 00668 } 00669 } 00670 00671 /* 00672 * The actual work will be broken down by word and byte transfers. 00673 * The first section will be word oriented transfers. Inside this 00674 * block, the work will be divided by whether the mold variable is 00675 * a vector, or a scalar. 00676 */ 00677 00678 if (!bytealigned) { 00679 #if defined _F_INT4 && defined _ADDR64 00680 if (!halfword) { 00681 #endif 00682 uptr1 = (_f_int *) source->base_addr.a.ptr; 00683 uptr2 = (_f_int *) result->base_addr.a.ptr; 00684 if (result->n_dim == 0) { /* scalar mold */ 00685 if (mld_size <= src_size) { 00686 for (i = 0; i < mld_size; i++) 00687 uptr2[i] = uptr1[i]; 00688 } else { 00689 if (src_rank == 0) { /* scalar source */ 00690 for (i = 0; i < src_size; i++) 00691 uptr2[i] = uptr1[i]; 00692 } else { /* vector source */ 00693 extent = mld_size / src_size; 00694 leftover = mld_size % src_size; 00695 rindx = 0; 00696 sindx = 0; 00697 for (i = 0; i < extent; i++) { 00698 CALC_SINDX (); 00699 for (j = 0; j < src_size; j++) 00700 uptr2[rindx++] = uptr1[sindx++]; 00701 INCR_SRC(); 00702 } 00703 if (leftover) { 00704 CALC_SINDX (); 00705 for (j = 0; j < leftover; j++) 00706 uptr2[rindx++] = uptr1[sindx++]; 00707 } 00708 } 00709 } 00710 } else { /* vector mold */ 00711 res_strd = result->dimension[0].stride_mult; 00712 res_dim = 0; 00713 sindx = 0; 00714 rindx = 0; 00715 rcnt = 0; 00716 if (nbytes <= tot_src_bytes) 00717 nwords = nbytes / BYTES_PER_WORD; 00718 else 00719 nwords = tot_src_bytes / BYTES_PER_WORD; 00720 if (src_rank == 0) { /* scalar source */ 00721 for (i = 0; i < nwords; i++) { 00722 uptr2[rindx++] = uptr1[sindx++]; 00723 rcnt++; 00724 if (rcnt == mld_size) { 00725 rcnt = 0; 00726 res_dim++; 00727 rindx = res_dim * res_strd; 00728 } 00729 } 00730 } else { /* vector source */ 00731 scnt = 0; 00732 rcnt = 0; 00733 for (i = 0; i < nwords; i++) { 00734 uptr2[rindx++] = uptr1[sindx++]; 00735 rcnt++; 00736 if (rcnt == mld_size) { 00737 rcnt = 0; 00738 res_dim++; 00739 rindx = res_dim * res_strd; 00740 } 00741 scnt++; 00742 if (scnt == src_size) { 00743 scnt = 0; 00744 INCR_SRC(); 00745 CALC_SINDX(); 00746 } 00747 } 00748 } 00749 } 00750 00751 /* If either of the data types is a 32-bit type, and the word size 00752 * is 64 bits, we will have to treat the whole affair as 32-bit. 00753 * This will be done identically to the 64-bit section, only using 00754 * half-word pointers. 00755 */ 00756 00757 #if defined _F_INT4 && defined _ADDR64 00758 } else { 00759 hptr1 = (_f_int4 *) source->base_addr.a.ptr; 00760 hptr2 = (_f_int4 *) result->base_addr.a.ptr; 00761 00762 /* If source is 64 bits, double all strides to refer to halfwords */ 00763 00764 if (!src_halfword) 00765 for (i = 0; i < src_rank; i++) 00766 src_strd[i] <<= 1; 00767 00768 if (result->n_dim == 0) { /* scalar mold */ 00769 if (mld_size_half <= src_size_half) { 00770 for (i = 0; i < mld_size_half; i++) 00771 hptr2[i] = hptr1[i]; 00772 } else { 00773 if (src_rank == 0) { /* scalar source */ 00774 for (i = 0; i < src_size_half; i++) 00775 hptr2[i] = hptr1[i]; 00776 } else { /* vector source */ 00777 extent = mld_size_half / src_size_half; 00778 leftover = mld_size_half % src_size_half; 00779 rindx = 0; 00780 sindx = 0; 00781 for (i = 0; i < extent; i++) { 00782 CALC_SINDX (); 00783 for (j = 0; j < src_size_half; j++) 00784 hptr2[rindx++] = hptr1[sindx++]; 00785 INCR_SRC(); 00786 } 00787 if (leftover) { 00788 CALC_SINDX (); 00789 for (j = 0; j < leftover; j++) 00790 hptr2[rindx++] = hptr1[sindx++]; 00791 } 00792 } 00793 } 00794 } else { /* vector mold */ 00795 if (!mld_halfword) 00796 res_strd = result->dimension[0].stride_mult << 1; 00797 else 00798 res_strd = result->dimension[0].stride_mult; 00799 res_dim = 0; 00800 sindx = 0; 00801 rindx = 0; 00802 rcnt = 0; 00803 if (nbytes <= tot_src_bytes) 00804 nwords = nbytes / BYTES_PER_HALFWORD; 00805 else 00806 nwords = tot_src_bytes / BYTES_PER_HALFWORD; 00807 if (src_rank == 0) { /* scalar source */ 00808 for (i = 0; i < nwords; i++) { 00809 hptr2[rindx++] = hptr1[sindx++]; 00810 rcnt++; 00811 if (rcnt == mld_size_half) { 00812 rcnt = 0; 00813 res_dim++; 00814 rindx = res_dim * res_strd; 00815 } 00816 } 00817 } else { /* vector source */ 00818 scnt = 0; 00819 rcnt = 0; 00820 for (i = 0; i < nwords; i++) { 00821 hptr2[rindx++] = hptr1[sindx++]; 00822 rcnt++; 00823 if (rcnt == mld_size_half) { 00824 rcnt = 0; 00825 res_dim++; 00826 rindx = res_dim * res_strd; 00827 } 00828 scnt++; 00829 if (scnt == src_size_half) { 00830 scnt = 0; 00831 INCR_SRC(); 00832 CALC_SINDX(); 00833 } 00834 } 00835 } 00836 } 00837 } 00838 #endif 00839 /* 00840 * The second block will be for byte transfers. It is also broken 00841 * down by scalar and vector mold. 00842 */ 00843 00844 } else { 00845 00846 /* Initialize character pointers to source and result */ 00847 00848 if (src_bytealigned) { 00849 cs = _fcdtocp (source->base_addr.charptr); 00850 adjust = 1; 00851 } else { 00852 cs = (char *) source->base_addr.a.ptr; 00853 adjust = BYTES_PER_WORD; 00854 } 00855 if (mld_bytealigned) 00856 cr = _fcdtocp (result->base_addr.charptr); 00857 else 00858 cr = (char *) result->base_addr.a.ptr; 00859 00860 /* Initialize stride dimension variables */ 00861 00862 if (src_rank > 0) { 00863 for (i = 0; i < src_rank; i++) { 00864 src_strd[i] = source->dimension[i].stride_mult * adjust; 00865 src_ext[i] = source->dimension[i].extent; 00866 } 00867 } 00868 00869 if (result->n_dim == 0) { /* scalar mold */ 00870 if (src_rank == 0) { /* scalar source */ 00871 if (src_size_bytes >= mld_size_bytes) 00872 extent = mld_size_bytes; 00873 else 00874 extent = src_size_bytes; 00875 (void) memcpy (cr, cs, extent); 00876 } else { /* vector source */ 00877 extent = mld_size_bytes / src_size_bytes; 00878 leftover = mld_size_bytes % src_size_bytes; 00879 rindx = 0; 00880 for (i = 0; i < extent; i++) { 00881 CALC_SINDX (); 00882 cptr1 = (char *) cs + sindx; 00883 cptr2 = (char *) cr + rindx; 00884 (void) memcpy (cptr2, cptr1, src_size_bytes); 00885 INCR_SRC (); 00886 rindx += src_size_bytes; 00887 } 00888 if (leftover > 0) { 00889 CALC_SINDX (); 00890 cptr1 = (char *) cs + sindx; 00891 cptr2 = (char *) cr + rindx; 00892 (void) memcpy (cptr2, cptr1, leftover); 00893 } 00894 } 00895 } else { /* vector mold */ 00896 if (mld_bytealigned) 00897 res_strd = result->dimension[0].stride_mult; 00898 else 00899 res_strd = 00900 result->dimension[0].stride_mult * BYTES_PER_WORD; 00901 00902 if (src_rank == 0) { /* scalar source */ 00903 extent = src_size_bytes / mld_size_bytes; 00904 leftover = src_size_bytes % mld_size_bytes; 00905 rindx = 0; 00906 sindx = 0; 00907 for (i = 0; i < extent; i++) { 00908 cptr1 = (char *) cs + sindx; 00909 cptr2 = (char *) cr + rindx; 00910 (void) memcpy (cptr2, cptr1, mld_size_bytes); 00911 sindx += mld_size_bytes; 00912 rindx += res_strd; 00913 } 00914 if (leftover) { 00915 cptr1 = (char *) cs + sindx; 00916 cptr2 = (char *) cr + (extent * res_strd); 00917 (void) memcpy (cptr2, cptr1, leftover); 00918 } 00919 } else { /* vector source */ 00920 sindx = 0; 00921 rindx = 0; 00922 scnt = 0; 00923 rcnt = 0; 00924 cptr1 = (char *) cs; 00925 cptr2 = (char *) cr; 00926 for (i = 0; i < tot_src_bytes; i++) { 00927 cptr2[rindx+rcnt] = cptr1[sindx+scnt]; 00928 rcnt++; 00929 if (rcnt == mld_size_bytes) { 00930 rcnt = 0; 00931 rindx += res_strd; 00932 } 00933 scnt ++; 00934 if (scnt == src_size_bytes) { 00935 scnt = 0; 00936 INCR_SRC (); 00937 CALC_SINDX (); 00938 } 00939 } 00940 } 00941 } 00942 } 00943 }