Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "f90_intrinsic.h"
00039
00040 void
00041 _SPREAD(
00042 DopeVectorType *result,
00043 DopeVectorType *array,
00044 i4 *dim,
00045 i4 *ncopies)
00046 {
00047 char * result_p, * result_b ;
00048 char * array_p, * array_b ;
00049
00050 size_t src_extent [MAX_NARY_DIMS] ;
00051 size_t src_stride [MAX_NARY_DIMS] ;
00052 size_t src_offset [MAX_NARY_DIMS] ;
00053 size_t counter[MAX_NARY_DIMS] ;
00054
00055 size_t res_stride [MAX_NARY_DIMS] ;
00056 size_t res_extent [MAX_NARY_DIMS] ;
00057 size_t res_offset [MAX_NARY_DIMS] ;
00058
00059 int32_t j,ii;
00060 char *rp, *ap ;
00061 int32_t res_rank ;
00062 int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;
00063
00064 size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00065
00066 size_t a_size,a_stride,r_stride, i,k ;
00067 int8_t zero_szd_source = FALSE;
00068 int8_t byte_aligned = FALSE;
00069
00070 int32_t ddim ;
00071
00072 size_t num_trues ;
00073 int32_t local_alloc ;
00074 size_t tot_ext ;
00075 size_t str_sz ;
00076
00077 int32_t nc ;
00078 size_t src_size ;
00079
00080 size_t res_sz;
00081 size_t xfer_sz;
00082 size_t tot_sz;
00083
00084 ddim = (*dim) - 1 ;
00085
00086 if ((ddim > src_rank + 1) || (ddim < 0))
00087 ERROR(_LELVL_ABORT,FESCIDIM);
00088
00089 nc = * ncopies ;
00090 if (nc < 0) nc = 0 ;
00091
00092 src_extent[0] = 1;
00093 src_stride[0] = 0;
00094 src_offset[0] = 0;
00095
00096 for ( j = 0 ; j <= src_rank ; j ++ ) {
00097 src_extent[j] = GET_EXTENT_FROM_DESC(array,j) ;
00098 src_stride[j] = GET_STRIDE_FROM_DESC(array,j) ;
00099 }
00100
00101 for ( j = 1 ; j <= src_rank ; j ++ ) {
00102 src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1])) ;
00103 }
00104
00105 res_rank = src_rank + 2 ;
00106 if (src_rank < 0 )
00107 src_rank = 0 ;
00108
00109 byte_aligned = GET_BYTEALIGNED_FROM_DESC(result);
00110
00111 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00112
00113 size_t nbytes ;
00114 char *p ;
00115
00116 SET_ADDRESS_IN_DESC(result,NULL);
00117 SET_ORIG_BS_IN_DESC(result,NULL) ;
00118 SET_ORIG_SZ_IN_DESC(result,0) ;
00119
00120 p = NULL ;
00121 tot_ext = 1 ;
00122 nbytes = typ_sz ;
00123
00124 str_sz = MK_STRIDE(byte_aligned,typ_sz);
00125
00126 for ( i = 0 , j = 0 ; i < res_rank ; i ++) {
00127 size_t ex ;
00128 SET_LBOUND_IN_DESC(result,i,1);
00129
00130 if (i != ddim ) {
00131 ex = src_extent[j];
00132 j ++ ;
00133 } else {
00134 ex = nc ;
00135 }
00136 SET_EXTENT_IN_DESC(result,i,ex);
00137 SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
00138 tot_ext *= ex;
00139 nbytes *= ex;
00140 }
00141
00142 if (nbytes > 0) {
00143 p = (void *) malloc (nbytes);
00144 if (p == NULL)
00145 ERROR(_LELVL_ABORT, FENOMEMY);
00146
00147 SET_ADDRESS_IN_DESC(result,p);
00148 }
00149
00150 SET_CONTIG_IN_DESC(result);
00151 SET_ASSOCIATED_IN_DESC(result);
00152 if (GET_DV_ASCII_FROM_DESC(array)) {
00153 SET_CHARPTR_IN_DESC(result,p,typ_sz);
00154 }
00155 SET_ORIG_BS_IN_DESC(result,p) ;
00156 SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ;
00157 }
00158
00159 res_stride[res_rank-1] = GET_STRIDE_FROM_DESC(result,ddim) ;
00160 res_extent[res_rank-1] = GET_EXTENT_FROM_DESC(result,ddim) ;
00161
00162 for ( j = 0 , k = 0; j < res_rank ; j ++ ) {
00163 if (j != ddim ) {
00164 res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ;
00165 res_extent[k] = GET_EXTENT_FROM_DESC(result,j) ;
00166 k ++ ;
00167 }
00168 counter[j] = 0 ;
00169 }
00170 for ( j = 1 ; j < res_rank ; j ++ )
00171 res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1])) ;
00172
00173 if (zero_szd_source)
00174 return ;
00175
00176 a_size = src_extent[0] ;
00177 a_stride = src_stride[0] ;
00178 r_stride = res_stride[0] ;
00179 array_p = GET_ADDRESS_FROM_DESC(array);
00180 result_p = GET_ADDRESS_FROM_DESC(result);
00181
00182 if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p)) {
00183
00184 while (counter[src_rank] < src_extent[src_rank] ) {
00185 for ( i = 0 ; i < a_size ; i ++ ) {
00186 char * rp1;
00187 rp1 = result_p ;
00188 for (k = 0 ; k < nc ; k ++ ) {
00189 *(i1 *)rp1 = *(i1 *)array_p ;
00190 rp1 += res_stride[res_rank-1] ;
00191 }
00192 result_p += r_stride ;
00193 array_p += a_stride ;
00194 }
00195
00196 counter[0] = a_size ;
00197 j = 0 ;
00198 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00199 array_p += src_offset[j] ;
00200 result_p += res_offset[j] ;
00201 counter[j+1]++ ;
00202 counter[j] = 0 ;
00203 j ++ ;
00204 }
00205
00206 }
00207 } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) ) {
00208
00209 while (counter[src_rank] < src_extent[src_rank] ) {
00210 for ( i = 0 ; i < a_size ; i ++ ) {
00211 char * rp1;
00212 rp1 = result_p ;
00213 for (k = 0 ; k < nc ; k ++ ) {
00214 *(i2 *)rp1 = *(i2 *)array_p ;
00215 rp1 += res_stride[res_rank-1] ;
00216 }
00217 result_p += r_stride ;
00218 array_p += a_stride ;
00219 }
00220
00221 counter[0] = a_size ;
00222 j = 0 ;
00223 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00224 array_p += src_offset[j] ;
00225 result_p += res_offset[j] ;
00226 counter[j+1]++ ;
00227 counter[j] = 0 ;
00228 j ++ ;
00229 }
00230
00231 }
00232 } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) ) {
00233
00234 while (counter[src_rank] < src_extent[src_rank] ) {
00235 for ( i = 0 ; i < a_size ; i ++ ) {
00236 char * rp1;
00237 rp1 = result_p ;
00238 for (k = 0 ; k < nc ; k ++ ) {
00239 *(r4 *)rp1 = *(r4 *)array_p ;
00240 rp1 += res_stride[res_rank-1] ;
00241 }
00242 result_p += r_stride ;
00243 array_p += a_stride ;
00244 }
00245
00246 counter[0] = a_size ;
00247 j = 0 ;
00248 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00249 array_p += src_offset[j] ;
00250 result_p += res_offset[j] ;
00251 counter[j+1]++ ;
00252 counter[j] = 0 ;
00253 j ++ ;
00254 }
00255
00256 }
00257 } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) ) {
00258
00259 while (counter[src_rank] < src_extent[src_rank] ) {
00260 for ( i = 0 ; i < a_size ; i ++ ) {
00261 char * rp1;
00262 rp1 = result_p ;
00263 for (k = 0 ; k < nc ; k ++ ) {
00264 *(r8 *)rp1 = *(r8 *)array_p ;
00265 rp1 += res_stride[res_rank-1] ;
00266 }
00267 result_p += r_stride ;
00268 array_p += a_stride ;
00269 }
00270
00271 counter[0] = a_size ;
00272 j = 0 ;
00273 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00274 array_p += src_offset[j] ;
00275 result_p += res_offset[j] ;
00276 counter[j+1]++ ;
00277 counter[j] = 0 ;
00278 j ++ ;
00279 }
00280
00281 }
00282 } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) ) {
00283
00284 while (counter[src_rank] < src_extent[src_rank] ) {
00285 for ( i = 0 ; i < a_size ; i ++ ) {
00286 char * rp1;
00287 rp1 = result_p ;
00288 for (k = 0 ; k < nc ; k ++ ) {
00289 *(r16 *)rp1 = *(r16 *)array_p ;
00290 rp1 += res_stride[res_rank-1] ;
00291 }
00292 result_p += r_stride ;
00293 array_p += a_stride ;
00294 }
00295
00296 counter[0] = a_size ;
00297 j = 0 ;
00298 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00299 array_p += src_offset[j] ;
00300 result_p += res_offset[j] ;
00301 counter[j+1]++ ;
00302 counter[j] = 0 ;
00303 j ++ ;
00304 }
00305
00306 }
00307 } else {
00308 while (counter[src_rank] < src_extent[src_rank] ) {
00309 for ( i = 0 ; i < a_size ; i ++ ) {
00310 char * rp1;
00311 rp1 = result_p ;
00312 for (k = 0 ; k < nc ; k ++ ) {
00313 rp = rp1 ;
00314 ap = array_p ;
00315 if (typ_sz > BIGDEFAULTSZ)
00316 (void) memcpy (rp, ap, typ_sz);
00317 else
00318 for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ;
00319 rp1 += res_stride[res_rank-1] ;
00320 }
00321 result_p += r_stride ;
00322 array_p += a_stride ;
00323 }
00324
00325 counter[0] = a_size ;
00326 j = 0 ;
00327 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00328 array_p += src_offset[j] ;
00329 result_p += res_offset[j] ;
00330 counter[j+1]++ ;
00331 counter[j] = 0 ;
00332 j ++ ;
00333 }
00334
00335 }
00336 }
00337 }