Actual source code: matpapt.c
1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = P * A * P^T
5: */
7: #include src/mat/impls/aij/seq/aij.h
8: #include src/mat/utils/freespace.h
10: static int logkey_matapplypapt = 0;
11: static int logkey_matapplypapt_symbolic = 0;
12: static int logkey_matapplypapt_numeric = 0;
14: /*
15: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
16: C = P * A * P^T;
18: Note: C is assumed to be uncreated.
19: If this is not the case, Destroy C before calling this routine.
20: */
23: int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
24: /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
25: /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */
26: int ierr;
27: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
28: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
29: int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
30: int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
31: int an=A->N,am=A->M,pn=P->N,pm=P->M;
32: int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
33: MatScalar *ca;
37: /* some error checking which could be moved into interface layer */
38: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
39: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
41: /* Set up timers */
42: if (!logkey_matapplypapt_symbolic) {
43: PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
44: }
45: PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);
47: /* Create ij structure of P^T */
48: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
50: /* Allocate ci array, arrays for fill computation and */
51: /* free space for accumulating nonzero column info */
52: PetscMalloc(((pm+1)*1)*sizeof(int),&ci);
53: ci[0] = 0;
55: PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);
56: PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));
57: pasparserow = padenserow + an;
58: denserow = pasparserow + an;
59: sparserow = denserow + pm;
61: /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
62: /* This should be reasonable if sparsity of PAPt is similar to that of A. */
63: GetMoreSpace((ai[am]/pn)*pm,&free_space);
64: current_space = free_space;
66: /* Determine fill for each row of C: */
67: for (i=0;i<pm;i++) {
68: pnzi = pi[i+1] - pi[i];
69: panzi = 0;
70: /* Get symbolic sparse row of PA: */
71: for (j=0;j<pnzi;j++) {
72: arow = *pj++;
73: anzj = ai[arow+1] - ai[arow];
74: ajj = aj + ai[arow];
75: for (k=0;k<anzj;k++) {
76: if (!padenserow[ajj[k]]) {
77: padenserow[ajj[k]] = -1;
78: pasparserow[panzi++] = ajj[k];
79: }
80: }
81: }
82: /* Using symbolic row of PA, determine symbolic row of C: */
83: paj = pasparserow;
84: cnzi = 0;
85: for (j=0;j<panzi;j++) {
86: ptrow = *paj++;
87: ptnzj = pti[ptrow+1] - pti[ptrow];
88: ptjj = ptj + pti[ptrow];
89: for (k=0;k<ptnzj;k++) {
90: if (!denserow[ptjj[k]]) {
91: denserow[ptjj[k]] = -1;
92: sparserow[cnzi++] = ptjj[k];
93: }
94: }
95: }
97: /* sort sparse representation */
98: PetscSortInt(cnzi,sparserow);
100: /* If free space is not available, make more free space */
101: /* Double the amount of total space in the list */
102: if (current_space->local_remaining<cnzi) {
103: GetMoreSpace(current_space->total_array_size,¤t_space);
104: }
106: /* Copy data into free space, and zero out dense row */
107: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
108: current_space->array += cnzi;
109: current_space->local_used += cnzi;
110: current_space->local_remaining -= cnzi;
112: for (j=0;j<panzi;j++) {
113: padenserow[pasparserow[j]] = 0;
114: }
115: for (j=0;j<cnzi;j++) {
116: denserow[sparserow[j]] = 0;
117: }
118: ci[i+1] = ci[i] + cnzi;
119: }
120: /* column indices are in the list of free space */
121: /* Allocate space for cj, initialize cj, and */
122: /* destroy list of free space and other temporary array(s) */
123: PetscMalloc((ci[pm]+1)*sizeof(int),&cj);
124: MakeSpaceContiguous(&free_space,cj);
125: PetscFree(padenserow);
126:
127: /* Allocate space for ca */
128: PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
129: PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
130:
131: /* put together the new matrix */
132: MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);
134: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
135: /* Since these are PETSc arrays, change flags to free them as necessary. */
136: c = (Mat_SeqAIJ *)((*C)->data);
137: c->freedata = PETSC_TRUE;
138: c->nonew = 0;
140: /* Clean up. */
141: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
143: PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
144: return(0);
145: }
147: /*
148: MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
149: C = P * A * P^T;
150: Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
151: */
154: int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
155: int ierr,flops=0;
156: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
157: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
158: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
159: int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
160: int *ci=c->i,*cj=c->j;
161: int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
162: int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
163: MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
167: /* This error checking should be unnecessary if the symbolic was performed */
168: if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
169: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
170: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
171: if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
173: /* Set up timers */
174: if (!logkey_matapplypapt_numeric) {
175: PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
176: }
177: PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);
179: PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);
180: PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));
181: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
183: paj = (int *)(paa + an);
184: pajdense = paj + an;
186: for (i=0;i<pm;i++) {
187: /* Form sparse row of P*A */
188: pnzi = pi[i+1] - pi[i];
189: panzj = 0;
190: for (j=0;j<pnzi;j++) {
191: arow = *pj++;
192: anzj = ai[arow+1] - ai[arow];
193: ajj = aj + ai[arow];
194: aaj = aa + ai[arow];
195: for (k=0;k<anzj;k++) {
196: if (!pajdense[ajj[k]]) {
197: pajdense[ajj[k]] = -1;
198: paj[panzj++] = ajj[k];
199: }
200: paa[ajj[k]] += (*pa)*aaj[k];
201: }
202: flops += 2*anzj;
203: pa++;
204: }
206: /* Sort the j index array for quick sparse axpy. */
207: PetscSortInt(panzj,paj);
209: /* Compute P*A*P^T using sparse inner products. */
210: /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
211: cnzi = ci[i+1] - ci[i];
212: for (j=0;j<cnzi;j++) {
213: /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
214: ptcol = *cj++;
215: ptnzj = pi[ptcol+1] - pi[ptcol];
216: ptj = pjj + pi[ptcol];
217: ptaj = pta + pi[ptcol];
218: sum = 0.;
219: k1 = 0;
220: k2 = 0;
221: while ((k1<panzj) && (k2<ptnzj)) {
222: if (paj[k1]==ptj[k2]) {
223: sum += paa[paj[k1++]]*ptaj[k2++];
224: } else if (paj[k1] < ptj[k2]) {
225: k1++;
226: } else /* if (paj[k1] > ptj[k2]) */ {
227: k2++;
228: }
229: }
230: *ca++ = sum;
231: }
233: /* Zero the current row info for P*A */
234: for (j=0;j<panzj;j++) {
235: paa[paj[j]] = 0.;
236: pajdense[paj[j]] = 0;
237: }
238: }
240: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
241: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
242: PetscLogFlops(flops);
243: PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
244: return(0);
245: }
246:
249: int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
253: if (!logkey_matapplypapt) {
254: PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
255: }
256: PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
257: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
258: MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
259: PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
260: return(0);
261: }