Actual source code: mattransposematmult.c
1: /*
2: Defines matrix-matrix product routines for
3: C = A^T * B and C = A * B^t
4: with A SeqAIJ and B SeqDense
5: */
7: #include <../src/mat/impls/aij/seq/aij.h>
8: #include <../src/mat/impls/dense/seq/dense.h>
10: static PetscErrorCode MatDestroy_SeqDense_MatTransMatMult(void *data)
11: {
12: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)data;
14: PetscFunctionBegin;
15: PetscCall(MatDestroy(&atb->mA));
16: PetscCall(VecDestroy(&atb->bt));
17: PetscCall(VecDestroy(&atb->ct));
18: PetscCall(PetscFree(atb));
19: PetscFunctionReturn(PETSC_SUCCESS);
20: }
22: static PetscErrorCode MatTMatTMultNumeric_SeqAIJ_SeqDense(Mat, Mat, Mat);
24: PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
25: {
26: Mat_MatTransMatMult *atb;
27: PetscBool cisdense;
28: PetscInt dofm;
30: PetscFunctionBegin;
31: MatCheckProduct(C, 4);
32: PetscCheck(!C->product->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Extra product struct not empty");
33: PetscCheck(C->product->type == MATPRODUCT_ABt || C->product->type == MATPRODUCT_AtB, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[C->product->type]);
35: /* create output dense matrix C */
36: if (C->product->type == MATPRODUCT_AtB) {
37: PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->N, A->cmap->n, B->cmap->N));
38: dofm = B->cmap->n;
39: } else {
40: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->N, A->rmap->n, B->rmap->N));
41: dofm = B->rmap->n;
42: }
43: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
44: if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)B)->type_name));
45: PetscCall(MatSetUp(C));
47: /* create additional data structure for the product */
48: PetscCall(PetscNew(&atb));
49: PetscCall(MatCreateMAIJ(A, dofm, &atb->mA));
50: PetscCall(MatCreateVecs(atb->mA, &atb->ct, &atb->bt));
51: C->product->data = atb;
52: C->product->destroy = MatDestroy_SeqDense_MatTransMatMult;
54: if (C->product->type == MATPRODUCT_AtB) {
55: C->ops->transposematmultnumeric = MatTMatTMultNumeric_SeqAIJ_SeqDense;
56: } else {
57: C->ops->mattransposemultnumeric = MatTMatTMultNumeric_SeqAIJ_SeqDense;
58: }
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: static PetscErrorCode MatTMatTMultNumeric_SeqAIJ_SeqDense(Mat A, Mat B, Mat C)
63: {
64: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, blda, clda;
65: PetscInt mdof = C->cmap->N;
66: const PetscScalar *Barray;
67: PetscScalar *Carray;
68: Mat_MatTransMatMult *atb;
69: Vec bt, ct;
71: PetscFunctionBegin;
72: MatCheckProduct(C, 3);
73: PetscCheck(C->product->type == MATPRODUCT_ABt || C->product->type == MATPRODUCT_AtB, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[C->product->type]);
74: atb = (Mat_MatTransMatMult *)C->product->data;
75: PetscCheck(atb, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct");
76: bt = atb->bt;
77: ct = atb->ct;
79: PetscCall(MatDenseGetArrayRead(B, &Barray));
80: PetscCall(MatDenseGetLDA(B, &blda));
81: PetscCall(MatDenseGetArrayWrite(C, &Carray));
82: PetscCall(MatDenseGetLDA(C, &clda));
83: if (C->product->type == MATPRODUCT_AtB) { /* transpose local array of B, then copy it to vector bt */
84: const PetscScalar *ctarray;
85: PetscScalar *btarray;
87: PetscCall(VecGetArrayWrite(bt, &btarray));
88: for (j = 0; j < mdof; j++) {
89: for (i = 0; i < m; i++) btarray[i * mdof + j] = Barray[j * blda + i];
90: }
91: PetscCall(VecRestoreArrayWrite(bt, &btarray));
93: /* compute ct = mA^T * cb */
94: PetscCall(MatMultTranspose(atb->mA, bt, ct));
96: /* transpose local array of ct to matrix C */
97: PetscCall(VecGetArrayRead(ct, &ctarray));
98: for (j = 0; j < mdof; j++) {
99: for (i = 0; i < n; i++) Carray[j * clda + i] = ctarray[i * mdof + j];
100: }
101: PetscCall(VecRestoreArrayRead(ct, &ctarray));
102: } else {
103: const PetscScalar *btarray;
104: PetscScalar *ctarray;
106: if (blda == B->rmap->n) {
107: PetscCall(VecPlaceArray(ct, Barray));
108: } else {
109: PetscInt bn = B->cmap->n;
110: PetscInt bm = B->rmap->n;
112: PetscCall(VecGetArrayWrite(ct, &ctarray));
113: for (j = 0; j < bn; j++) {
114: for (i = 0; i < bm; i++) ctarray[j * bm + i] = Barray[j * blda + i];
115: }
116: PetscCall(VecRestoreArrayWrite(ct, &ctarray));
117: }
119: PetscCall(MatMult(atb->mA, ct, bt));
120: if (blda == B->rmap->n) PetscCall(VecResetArray(ct));
121: PetscCall(VecGetArrayRead(bt, &btarray));
122: for (j = 0; j < mdof; j++) {
123: for (i = 0; i < m; i++) Carray[j * clda + i] = btarray[i * mdof + j];
124: }
125: PetscCall(VecRestoreArrayRead(bt, &btarray));
126: }
127: PetscCall(MatDenseRestoreArrayRead(B, &Barray));
128: PetscCall(MatDenseRestoreArray(C, &Carray));
129: PetscFunctionReturn(PETSC_SUCCESS);
130: }