Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2.1 of the GNU Lesser General Public License 00007 as published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU Lesser General Public 00021 License along with this program; if not, write the Free Software 00022 Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, 00023 USA. 00024 00025 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00026 Mountain View, CA 94043, or: 00027 00028 http://www.sgi.com 00029 00030 For further information regarding this notice, see: 00031 00032 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00033 00034 */ 00035 00036 00037 #pragma ident "@(#) libfi/array/[email protected] 92.1 07/07/99 15:52:02" 00038 00039 00040 /*********************************************************************** 00041 * Name: TRANS@ * 00042 * * 00043 * Description: * 00044 * * 00045 * MATRIX TRANSPOSE INTRINSIC FUNCTION * 00046 * * 00047 * Set B = TRANSPOSE(A), * 00048 * * 00049 * where: * 00050 * * 00051 * B = the matrix described by dope vector RESULT, and * 00052 * * 00053 * A = the matrix described by dope vector MATRIX_A. * 00054 * * 00055 * Arguments: * 00056 * * 00057 * RESULT Dope vector for RESULT matrix * 00058 * * 00059 * MATRIX_A Dope vector for matrix operand * 00060 * * 00061 * Author: * 00062 * Math Software Group * 00063 * Cray Research, Inc. * 00064 * * 00065 ************************************************************************/ 00066 00067 /* 00068 * header files 00069 */ 00070 #include <stddef.h> 00071 #include <stdlib.h> 00072 #include <cray/dopevec.h> /* description of Fortran 90 Dope Vector */ 00073 #include <cray/portdefs.h> /* portable definitions header file */ 00074 #include "intrin.h" /* general header file for intrinsics */ 00075 00076 /* 00077 * Name of this entry point 00078 */ 00079 00080 #define NAME _TRANS 00081 #define BITS_PER_BYTE (BITS_PER_WORD / BYTES_PER_WORD) 00082 00083 #ifdef _UNICOS 00084 #pragma _CRI duplicate _TRANS as TRANS@ 00085 #endif 00086 void 00087 NAME(DopeVectorType * RESULT, DopeVectorType * MATRIX_A) 00088 { 00089 long *A; /* Matrix A address */ 00090 long *B; /* Matrix B address (RESULT) */ 00091 char *ca; /* character pointer to A */ 00092 char *cb; /* character pointer to B (RESULT) */ 00093 long n1a, n2a; /* dimensions of MATRIX_A */ 00094 long inc1a, inc2a; /* column and row increments, 00095 * matrix A */ 00096 long inc1b, inc2b; /* column and row increments, 00097 * matrix B */ 00098 int bucketsize; /* number of words per array element (or 00099 * number of bytes, if a byte array) */ 00100 long nbytes; /* number of bytes total in RESULT array */ 00101 long nbits; /* number of bits of RESULT array */ 00102 int bytealligned; /* true if byte alligned data 00103 * type, else false */ 00104 long i, j, k; /* subscripts */ 00105 00106 /* 00107 * Parse the Dope Vector for MATRIX_A 00108 */ 00109 00110 n1a = MATRIX_A->dimension[0].extent; 00111 n2a = MATRIX_A->dimension[1].extent; 00112 inc1a = MATRIX_A->dimension[0].stride_mult; 00113 inc2a = MATRIX_A->dimension[1].stride_mult; 00114 00115 /* 00116 * Set "bytealligned = true" iff and only if matrix is byte alligned. 00117 * 00118 * Matrix is byte alligned if and only if the type code is "ASCII" or 00119 * "DERIVEDBYTE". 00120 * 00121 * The bucketsize is the number of words (or bytes) per array element. 00122 */ 00123 00124 if (MATRIX_A->type_lens.type == DVTYPE_ASCII) { 00125 /* 00126 * Byte alligned 00127 */ 00128 bytealligned = 1; 00129 ca = _fcdtocp(MATRIX_A->base_addr.charptr); 00130 bucketsize = _fcdlen(MATRIX_A->base_addr.charptr); /* in bytes */ 00131 nbits = bucketsize * BITS_PER_BYTE; 00132 } else if (MATRIX_A->type_lens.type == DVTYPE_DERIVEDBYTE || 00133 MATRIX_A->type_lens.type == DVTYPE_DERIVEDWORD) { 00134 bytealligned = 00135 (MATRIX_A->type_lens.type == DVTYPE_DERIVEDBYTE) ? 1 : 0; 00136 00137 /* retrieve the number of bits in an element */ 00138 nbits = MATRIX_A->base_addr.a.el_len; 00139 if (bytealligned) { 00140 ca = _fcdtocp(MATRIX_A->base_addr.charptr); 00141 bucketsize = nbits / BITS_PER_BYTE; 00142 /* bucketsize in bytes = 00143 * bits/bits_per_byte 00144 */ 00145 } else { 00146 A = (long *) MATRIX_A->base_addr.a.ptr; 00147 /* base address */ 00148 bucketsize = nbits / BITS_PER_WORD; 00149 /* bucketsize in words = 00150 * bits/bits_per_word 00151 */ 00152 } 00153 } else { 00154 /* 00155 * Word alligned, not byte aligned. 00156 */ 00157 bytealligned = 0; 00158 A = (long *) MATRIX_A->base_addr.a.ptr; /* base address */ 00159 nbits = MATRIX_A->type_lens.int_len; 00160 bucketsize = nbits / BITS_PER_WORD; /* bucketsize in words = 00161 * bucketsize_in_bits 00162 * divided by bits_per_word 00163 */ 00164 } 00165 00166 /* 00167 * Allocate RESULT array, if necessary. 00168 */ 00169 00170 if (!RESULT->assoc) { 00171 /* 00172 * Copy RESULT dope vector fields from MATRIX_A dope vector. 00173 */ 00174 RESULT->base_addr.a.ptr = (void *) NULL; /* no address yet */ 00175 RESULT->orig_base = 0; 00176 RESULT->orig_size = 0; 00177 /* 00178 * Set dimension-specific information for RESULT 00179 */ 00180 RESULT->dimension[0].low_bound = 1; 00181 RESULT->dimension[0].extent = MATRIX_A->dimension[1].extent; 00182 RESULT->dimension[0].stride_mult = bucketsize; 00183 RESULT->dimension[1].low_bound = 1; 00184 RESULT->dimension[1].extent = MATRIX_A->dimension[0].extent; 00185 RESULT->dimension[1].stride_mult = bucketsize * n2a; 00186 /* 00187 * Allocate the space for RESULT 00188 */ 00189 nbits = nbits * 00190 RESULT->dimension[0].extent * RESULT->dimension[1].extent; 00191 nbytes = nbits / BITS_PER_BYTE; /* byte length = bit length 00192 * divided by bits_per_byte 00193 */ 00194 if (nbits != 0) { 00195 B = (void *) MALLOC(nbytes); 00196 if (B == NULL) { 00197 ERROR(FENOMEMY); 00198 return; 00199 } 00200 } 00201 RESULT->assoc = 1; 00202 if ( MATRIX_A->type_lens.type == DVTYPE_ASCII) { 00203 RESULT->base_addr.charptr = _cptofcd( (char *) B, bucketsize); 00204 } else 00205 RESULT->base_addr.a.ptr = (void *) B; 00206 RESULT->orig_base = (void *) B; 00207 RESULT->orig_size = nbits; 00208 } 00209 /* 00210 * RESULT now exists (whether or not we just created it). 00211 * Assign values to pointer and increments. 00212 */ 00213 if (bytealligned) 00214 cb = _fcdtocp(RESULT->base_addr.charptr); 00215 else 00216 B = (long *) RESULT->base_addr.a.ptr; 00217 inc1b = RESULT->dimension[0].stride_mult; 00218 inc2b = RESULT->dimension[1].stride_mult; 00219 /* 00220 * At this point, we have all the data set up, and we just have to do 00221 * the transpose. 00222 * 00223 * There are two cases: 00224 * 00225 * (1) word alligned data, and 00226 * 00227 * (2) byte alligned data. 00228 */ 00229 00230 if (!bytealligned) { 00231 /* 00232 * Word alligned data 00233 */ 00234 for (k = 1; k <= bucketsize; k++) { 00235 for (j = 0; j < n2a; j++) 00236 for (i = 0; i < n1a; i++) 00237 B[j * inc1b + i * inc2b] = A[i * inc1a + j * inc2a]; 00238 00239 A++; /* address next word */ 00240 B++; /* address next word */ 00241 } 00242 } else { 00243 /* 00244 * Byte alligned data 00245 */ 00246 for (k = 1; k <= bucketsize; k++) { 00247 for (j = 0; j < n2a; j++) 00248 for (i = 0; i < n1a; i++) 00249 cb[j * inc1b + i * inc2b] = ca[i * inc1a + j * inc2a]; 00250 00251 ca++; /* address next byte */ 00252 cb++; /* address next byte */ 00253 } 00254 } 00255 return; 00256 }