Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #pragma ident "@(#) libfi/array/trans@.c 92.1 07/07/99 15:52:02"
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070 #include <stddef.h>
00071 #include <stdlib.h>
00072 #include <cray/dopevec.h>
00073 #include <cray/portdefs.h>
00074 #include "intrin.h"
00075
00076
00077
00078
00079
00080 #define NAME _TRANS
00081 #define BITS_PER_BYTE (BITS_PER_WORD / BYTES_PER_WORD)
00082
00083 #ifdef _UNICOS
00084 #pragma _CRI duplicate _TRANS as TRANS@
00085 #endif
00086 void
00087 NAME(DopeVectorType * RESULT, DopeVectorType * MATRIX_A)
00088 {
00089 long *A;
00090 long *B;
00091 char *ca;
00092 char *cb;
00093 long n1a, n2a;
00094 long inc1a, inc2a;
00095
00096 long inc1b, inc2b;
00097
00098 int bucketsize;
00099
00100 long nbytes;
00101 long nbits;
00102 int bytealligned;
00103
00104 long i, j, k;
00105
00106
00107
00108
00109
00110 n1a = MATRIX_A->dimension[0].extent;
00111 n2a = MATRIX_A->dimension[1].extent;
00112 inc1a = MATRIX_A->dimension[0].stride_mult;
00113 inc2a = MATRIX_A->dimension[1].stride_mult;
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124 if (MATRIX_A->type_lens.type == DVTYPE_ASCII) {
00125
00126
00127
00128 bytealligned = 1;
00129 ca = _fcdtocp(MATRIX_A->base_addr.charptr);
00130 bucketsize = _fcdlen(MATRIX_A->base_addr.charptr);
00131 nbits = bucketsize * BITS_PER_BYTE;
00132 } else if (MATRIX_A->type_lens.type == DVTYPE_DERIVEDBYTE ||
00133 MATRIX_A->type_lens.type == DVTYPE_DERIVEDWORD) {
00134 bytealligned =
00135 (MATRIX_A->type_lens.type == DVTYPE_DERIVEDBYTE) ? 1 : 0;
00136
00137
00138 nbits = MATRIX_A->base_addr.a.el_len;
00139 if (bytealligned) {
00140 ca = _fcdtocp(MATRIX_A->base_addr.charptr);
00141 bucketsize = nbits / BITS_PER_BYTE;
00142
00143
00144
00145 } else {
00146 A = (long *) MATRIX_A->base_addr.a.ptr;
00147
00148 bucketsize = nbits / BITS_PER_WORD;
00149
00150
00151
00152 }
00153 } else {
00154
00155
00156
00157 bytealligned = 0;
00158 A = (long *) MATRIX_A->base_addr.a.ptr;
00159 nbits = MATRIX_A->type_lens.int_len;
00160 bucketsize = nbits / BITS_PER_WORD;
00161
00162
00163
00164 }
00165
00166
00167
00168
00169
00170 if (!RESULT->assoc) {
00171
00172
00173
00174 RESULT->base_addr.a.ptr = (void *) NULL;
00175 RESULT->orig_base = 0;
00176 RESULT->orig_size = 0;
00177
00178
00179
00180 RESULT->dimension[0].low_bound = 1;
00181 RESULT->dimension[0].extent = MATRIX_A->dimension[1].extent;
00182 RESULT->dimension[0].stride_mult = bucketsize;
00183 RESULT->dimension[1].low_bound = 1;
00184 RESULT->dimension[1].extent = MATRIX_A->dimension[0].extent;
00185 RESULT->dimension[1].stride_mult = bucketsize * n2a;
00186
00187
00188
00189 nbits = nbits *
00190 RESULT->dimension[0].extent * RESULT->dimension[1].extent;
00191 nbytes = nbits / BITS_PER_BYTE;
00192
00193
00194 if (nbits != 0) {
00195 B = (void *) MALLOC(nbytes);
00196 if (B == NULL) {
00197 ERROR(FENOMEMY);
00198 return;
00199 }
00200 }
00201 RESULT->assoc = 1;
00202 if ( MATRIX_A->type_lens.type == DVTYPE_ASCII) {
00203 RESULT->base_addr.charptr = _cptofcd( (char *) B, bucketsize);
00204 } else
00205 RESULT->base_addr.a.ptr = (void *) B;
00206 RESULT->orig_base = (void *) B;
00207 RESULT->orig_size = nbits;
00208 }
00209
00210
00211
00212
00213 if (bytealligned)
00214 cb = _fcdtocp(RESULT->base_addr.charptr);
00215 else
00216 B = (long *) RESULT->base_addr.a.ptr;
00217 inc1b = RESULT->dimension[0].stride_mult;
00218 inc2b = RESULT->dimension[1].stride_mult;
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230 if (!bytealligned) {
00231
00232
00233
00234 for (k = 1; k <= bucketsize; k++) {
00235 for (j = 0; j < n2a; j++)
00236 for (i = 0; i < n1a; i++)
00237 B[j * inc1b + i * inc2b] = A[i * inc1a + j * inc2a];
00238
00239 A++;
00240 B++;
00241 }
00242 } else {
00243
00244
00245
00246 for (k = 1; k <= bucketsize; k++) {
00247 for (j = 0; j < n2a; j++)
00248 for (i = 0; i < n1a; i++)
00249 cb[j * inc1b + i * inc2b] = ca[i * inc1a + j * inc2a];
00250
00251 ca++;
00252 cb++;
00253 }
00254 }
00255 return;
00256 }