Actual source code: matptap.c
1: /*
2: Defines projective product routines where A is a SeqAIJ matrix
3: C = P^T * A * P
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/utils/freespace.h
9: EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*);
10: EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11: EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
12: EXTERN int RegisterMatMatMultRoutines_Private(Mat);
14: static int MATSeqAIJ_PtAP = 0;
15: static int MATSeqAIJ_PtAPSymbolic = 0;
16: static int MATSeqAIJ_PtAPNumeric = 0;
18: /*
19: MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C,
20: of SeqAIJ matrix A and matrix P:
21: C = P^T * A * P;
23: Note: C is assumed to be uncreated.
24: If this is not the case, Destroy C before calling this routine.
25: */
28: int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
30: char funct[80];
33: PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);
35: MatSeqAIJPtAPSymbolic(A,P,C);
37: /* Avoid additional error checking included in */
38: /* MatSeqAIJApplyPtAPNumeric(A,P,*C); */
40: /* Query A for ApplyPtAPNumeric implementation based on types of P */
41: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
42: PetscStrcat(funct,P->type_name);
43: PetscUseMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));
45: PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);
46: return(0);
47: }
49: /*
50: MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C,
51: of SeqAIJ matrix A and matrix P, according to:
52: C = P^T * A * P;
54: Note: C is assumed to be uncreated.
55: If this is not the case, Destroy C before calling this routine.
56: */
59: int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
61: char funct[80];
67: MatPreallocated(A);
68: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
69: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
73: MatPreallocated(P);
74: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
75: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
79: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
80: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
82: /* Query A for ApplyPtAP implementation based on types of P */
83: PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");
84: PetscStrcat(funct,P->type_name);
85: PetscUseMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));
87: return(0);
88: }
90: EXTERN_C_BEGIN
93: int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
94: int ierr;
95: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
96: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
97: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
98: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
99: int an=A->N,am=A->M,pn=P->N,pm=P->M;
100: int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
101: MatScalar *ca;
105: /* Start timer */
106: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
108: /* Get ij structure of P^T */
109: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
110: ptJ=ptj;
112: /* Allocate ci array, arrays for fill computation and */
113: /* free space for accumulating nonzero column info */
114: PetscMalloc((pn+1)*sizeof(int),&ci);
115: ci[0] = 0;
117: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
118: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
119: ptasparserow = ptadenserow + an;
120: denserow = ptasparserow + an;
121: sparserow = denserow + pn;
123: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
124: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
125: GetMoreSpace((ai[am]/pm)*pn,&free_space);
126: current_space = free_space;
128: /* Determine symbolic info for each row of C: */
129: for (i=0;i<pn;i++) {
130: ptnzi = pti[i+1] - pti[i];
131: ptanzi = 0;
132: /* Determine symbolic row of PtA: */
133: for (j=0;j<ptnzi;j++) {
134: arow = *ptJ++;
135: anzj = ai[arow+1] - ai[arow];
136: ajj = aj + ai[arow];
137: for (k=0;k<anzj;k++) {
138: if (!ptadenserow[ajj[k]]) {
139: ptadenserow[ajj[k]] = -1;
140: ptasparserow[ptanzi++] = ajj[k];
141: }
142: }
143: }
144: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
145: ptaj = ptasparserow;
146: cnzi = 0;
147: for (j=0;j<ptanzi;j++) {
148: prow = *ptaj++;
149: pnzj = pi[prow+1] - pi[prow];
150: pjj = pj + pi[prow];
151: for (k=0;k<pnzj;k++) {
152: if (!denserow[pjj[k]]) {
153: denserow[pjj[k]] = -1;
154: sparserow[cnzi++] = pjj[k];
155: }
156: }
157: }
159: /* sort sparserow */
160: PetscSortInt(cnzi,sparserow);
161:
162: /* If free space is not available, make more free space */
163: /* Double the amount of total space in the list */
164: if (current_space->local_remaining<cnzi) {
165: GetMoreSpace(current_space->total_array_size,¤t_space);
166: }
168: /* Copy data into free space, and zero out denserows */
169: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
170: current_space->array += cnzi;
171: current_space->local_used += cnzi;
172: current_space->local_remaining -= cnzi;
173:
174: for (j=0;j<ptanzi;j++) {
175: ptadenserow[ptasparserow[j]] = 0;
176: }
177: for (j=0;j<cnzi;j++) {
178: denserow[sparserow[j]] = 0;
179: }
180: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
181: /* For now, we will recompute what is needed. */
182: ci[i+1] = ci[i] + cnzi;
183: }
184: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
185: /* Allocate space for cj, initialize cj, and */
186: /* destroy list of free space and other temporary array(s) */
187: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
188: MakeSpaceContiguous(&free_space,cj);
189: PetscFree(ptadenserow);
190:
191: /* Allocate space for ca */
192: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
193: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
194:
195: /* put together the new matrix */
196: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
198: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
199: /* Since these are PETSc arrays, change flags to free them as necessary. */
200: c = (Mat_SeqAIJ *)((*C)->data);
201: c->freedata = PETSC_TRUE;
202: c->nonew = 0;
204: /* Clean up. */
205: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
207: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
208: return(0);
209: }
210: EXTERN_C_END
212: #include src/mat/impls/maij/maij.h
213: EXTERN_C_BEGIN
216: int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
217: /* This routine requires testing -- I don't think it works. */
218: int ierr;
219: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
220: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
221: Mat P=pp->AIJ;
222: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
223: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
224: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
225: int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
226: int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
227: MatScalar *ca;
230: /* Start timer */
231: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
233: /* Get ij structure of P^T */
234: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
236: /* Allocate ci array, arrays for fill computation and */
237: /* free space for accumulating nonzero column info */
238: PetscMalloc((pn+1)*sizeof(int),&ci);
239: ci[0] = 0;
241: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
242: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
243: ptasparserow = ptadenserow + an;
244: denserow = ptasparserow + an;
245: sparserow = denserow + pn;
247: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
248: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
249: GetMoreSpace((ai[am]/pm)*pn,&free_space);
250: current_space = free_space;
252: /* Determine symbolic info for each row of C: */
253: for (i=0;i<pn/ppdof;i++) {
254: ptnzi = pti[i+1] - pti[i];
255: ptanzi = 0;
256: ptJ = ptj + pti[i];
257: for (dof=0;dof<ppdof;dof++) {
258: /* Determine symbolic row of PtA: */
259: for (j=0;j<ptnzi;j++) {
260: arow = ptJ[j] + dof;
261: anzj = ai[arow+1] - ai[arow];
262: ajj = aj + ai[arow];
263: for (k=0;k<anzj;k++) {
264: if (!ptadenserow[ajj[k]]) {
265: ptadenserow[ajj[k]] = -1;
266: ptasparserow[ptanzi++] = ajj[k];
267: }
268: }
269: }
270: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
271: ptaj = ptasparserow;
272: cnzi = 0;
273: for (j=0;j<ptanzi;j++) {
274: pdof = *ptaj%dof;
275: prow = (*ptaj++)/dof;
276: pnzj = pi[prow+1] - pi[prow];
277: pjj = pj + pi[prow];
278: for (k=0;k<pnzj;k++) {
279: if (!denserow[pjj[k]+pdof]) {
280: denserow[pjj[k]+pdof] = -1;
281: sparserow[cnzi++] = pjj[k]+pdof;
282: }
283: }
284: }
286: /* sort sparserow */
287: PetscSortInt(cnzi,sparserow);
288:
289: /* If free space is not available, make more free space */
290: /* Double the amount of total space in the list */
291: if (current_space->local_remaining<cnzi) {
292: GetMoreSpace(current_space->total_array_size,¤t_space);
293: }
295: /* Copy data into free space, and zero out denserows */
296: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
297: current_space->array += cnzi;
298: current_space->local_used += cnzi;
299: current_space->local_remaining -= cnzi;
301: for (j=0;j<ptanzi;j++) {
302: ptadenserow[ptasparserow[j]] = 0;
303: }
304: for (j=0;j<cnzi;j++) {
305: denserow[sparserow[j]] = 0;
306: }
307: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
308: /* For now, we will recompute what is needed. */
309: ci[i+1+dof] = ci[i+dof] + cnzi;
310: }
311: }
312: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
313: /* Allocate space for cj, initialize cj, and */
314: /* destroy list of free space and other temporary array(s) */
315: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
316: MakeSpaceContiguous(&free_space,cj);
317: PetscFree(ptadenserow);
318:
319: /* Allocate space for ca */
320: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
321: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
322:
323: /* put together the new matrix */
324: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
326: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
327: /* Since these are PETSc arrays, change flags to free them as necessary. */
328: c = (Mat_SeqAIJ *)((*C)->data);
329: c->freedata = PETSC_TRUE;
330: c->nonew = 0;
332: /* Clean up. */
333: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
335: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
336: return(0);
337: }
338: EXTERN_C_END
340: /*
341: MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C,
342: of SeqAIJ matrix A and matrix P, according to:
343: C = P^T * A * P
344: Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic.
345: */
348: int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
350: char funct[80];
356: MatPreallocated(A);
357: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
358: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
362: MatPreallocated(P);
363: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
364: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
368: MatPreallocated(C);
369: if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
370: if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
372: if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
373: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
374: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
375: if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
377: /* Query A for ApplyPtAP implementation based on types of P */
378: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
379: PetscStrcat(funct,P->type_name);
380: PetscUseMethod(A,funct,(Mat,Mat,Mat),(A,P,C));
382: return(0);
383: }
385: EXTERN_C_BEGIN
388: int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
389: int ierr,flops=0;
390: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
391: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
392: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
393: int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
394: int *ci=c->i,*cj=c->j,*cjj;
395: int am=A->M,cn=C->N,cm=C->M;
396: int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
397: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
400: PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);
402: /* Allocate temporary array for storage of one row of A*P */
403: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);
404: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));
406: apj = (int *)(apa + cn);
407: apjdense = apj + cn;
409: /* Clear old values in C */
410: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
412: for (i=0;i<am;i++) {
413: /* Form sparse row of A*P */
414: anzi = ai[i+1] - ai[i];
415: apnzj = 0;
416: for (j=0;j<anzi;j++) {
417: prow = *aj++;
418: pnzj = pi[prow+1] - pi[prow];
419: pjj = pj + pi[prow];
420: paj = pa + pi[prow];
421: for (k=0;k<pnzj;k++) {
422: if (!apjdense[pjj[k]]) {
423: apjdense[pjj[k]] = -1;
424: apj[apnzj++] = pjj[k];
425: }
426: apa[pjj[k]] += (*aa)*paj[k];
427: }
428: flops += 2*pnzj;
429: aa++;
430: }
432: /* Sort the j index array for quick sparse axpy. */
433: PetscSortInt(apnzj,apj);
435: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
436: pnzi = pi[i+1] - pi[i];
437: for (j=0;j<pnzi;j++) {
438: nextap = 0;
439: crow = *pJ++;
440: cjj = cj + ci[crow];
441: caj = ca + ci[crow];
442: /* Perform sparse axpy operation. Note cjj includes apj. */
443: for (k=0;nextap<apnzj;k++) {
444: if (cjj[k]==apj[nextap]) {
445: caj[k] += (*pA)*apa[apj[nextap++]];
446: }
447: }
448: flops += 2*apnzj;
449: pA++;
450: }
452: /* Zero the current row info for A*P */
453: for (j=0;j<apnzj;j++) {
454: apa[apj[j]] = 0.;
455: apjdense[apj[j]] = 0;
456: }
457: }
459: /* Assemble the final matrix and clean up */
460: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
461: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
462: PetscFree(apa);
463: PetscLogFlops(flops);
464: PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);
466: return(0);
467: }
468: EXTERN_C_END
472: int RegisterApplyPtAPRoutines_Private(Mat A) {
477: if (!MATSeqAIJ_PtAP) {
478: PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);
479: }
481: if (!MATSeqAIJ_PtAPSymbolic) {
482: PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);
483: }
484: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
485: "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
486: MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);
488: if (!MATSeqAIJ_PtAPNumeric) {
489: PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);
490: }
491: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
492: "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
493: MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);
494: RegisterMatMatMultRoutines_Private(A);
495: return(0);
496: }