Actual source code: matpapt.c

  1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = P * A * P^T
  5: */

 7:  #include src/mat/impls/aij/seq/aij.h
 8:  #include src/mat/utils/freespace.h

 10: static int logkey_matapplypapt          = 0;
 11: static int logkey_matapplypapt_symbolic = 0;
 12: static int logkey_matapplypapt_numeric  = 0;

 14: /*
 15:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 16:            C = P * A * P^T;

 18:      Note: C is assumed to be uncreated.
 19:            If this is not the case, Destroy C before calling this routine.
 20: */
 23: int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
 24:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
 25:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
 26:   int            ierr;
 27:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
 28:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
 29:   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
 30:   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
 31:   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
 32:   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
 33:   MatScalar      *ca;


 37:   /* some error checking which could be moved into interface layer */
 38:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
 39:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);

 41:   /* Set up timers */
 42:   if (!logkey_matapplypapt_symbolic) {
 43:     PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
 44:   }
 45:   PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);

 47:   /* Create ij structure of P^T */
 48:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

 50:   /* Allocate ci array, arrays for fill computation and */
 51:   /* free space for accumulating nonzero column info */
 52:   PetscMalloc(((pm+1)*1)*sizeof(int),&ci);
 53:   ci[0] = 0;

 55:   PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);
 56:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));
 57:   pasparserow  = padenserow  + an;
 58:   denserow     = pasparserow + an;
 59:   sparserow    = denserow    + pm;

 61:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
 62:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
 63:   GetMoreSpace((ai[am]/pn)*pm,&free_space);
 64:   current_space = free_space;

 66:   /* Determine fill for each row of C: */
 67:   for (i=0;i<pm;i++) {
 68:     pnzi  = pi[i+1] - pi[i];
 69:     panzi = 0;
 70:     /* Get symbolic sparse row of PA: */
 71:     for (j=0;j<pnzi;j++) {
 72:       arow = *pj++;
 73:       anzj = ai[arow+1] - ai[arow];
 74:       ajj  = aj + ai[arow];
 75:       for (k=0;k<anzj;k++) {
 76:         if (!padenserow[ajj[k]]) {
 77:           padenserow[ajj[k]]   = -1;
 78:           pasparserow[panzi++] = ajj[k];
 79:         }
 80:       }
 81:     }
 82:     /* Using symbolic row of PA, determine symbolic row of C: */
 83:     paj    = pasparserow;
 84:     cnzi   = 0;
 85:     for (j=0;j<panzi;j++) {
 86:       ptrow = *paj++;
 87:       ptnzj = pti[ptrow+1] - pti[ptrow];
 88:       ptjj  = ptj + pti[ptrow];
 89:       for (k=0;k<ptnzj;k++) {
 90:         if (!denserow[ptjj[k]]) {
 91:           denserow[ptjj[k]] = -1;
 92:           sparserow[cnzi++] = ptjj[k];
 93:         }
 94:       }
 95:     }

 97:     /* sort sparse representation */
 98:     PetscSortInt(cnzi,sparserow);

100:     /* If free space is not available, make more free space */
101:     /* Double the amount of total space in the list */
102:     if (current_space->local_remaining<cnzi) {
103:       GetMoreSpace(current_space->total_array_size,&current_space);
104:     }

106:     /* Copy data into free space, and zero out dense row */
107:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
108:     current_space->array           += cnzi;
109:     current_space->local_used      += cnzi;
110:     current_space->local_remaining -= cnzi;

112:     for (j=0;j<panzi;j++) {
113:       padenserow[pasparserow[j]] = 0;
114:     }
115:     for (j=0;j<cnzi;j++) {
116:       denserow[sparserow[j]] = 0;
117:     }
118:     ci[i+1] = ci[i] + cnzi;
119:   }
120:   /* column indices are in the list of free space */
121:   /* Allocate space for cj, initialize cj, and */
122:   /* destroy list of free space and other temporary array(s) */
123:   PetscMalloc((ci[pm]+1)*sizeof(int),&cj);
124:   MakeSpaceContiguous(&free_space,cj);
125:   PetscFree(padenserow);
126: 
127:   /* Allocate space for ca */
128:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
129:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
130: 
131:   /* put together the new matrix */
132:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);

134:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
135:   /* Since these are PETSc arrays, change flags to free them as necessary. */
136:   c = (Mat_SeqAIJ *)((*C)->data);
137:   c->freedata = PETSC_TRUE;
138:   c->nonew    = 0;

140:   /* Clean up. */
141:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

143:   PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
144:   return(0);
145: }

147: /*
148:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
149:            C = P * A * P^T;
150:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
151: */
154: int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
155:   int        ierr,flops=0;
156:   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
157:   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
158:   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
159:   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
160:   int        *ci=c->i,*cj=c->j;
161:   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
162:   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
163:   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;


167:   /* This error checking should be unnecessary if the symbolic was performed */
168:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
169:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
170:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
171:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);

173:   /* Set up timers */
174:   if (!logkey_matapplypapt_numeric) {
175:     PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
176:   }
177:   PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);

179:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);
180:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));
181:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

183:   paj      = (int *)(paa + an);
184:   pajdense = paj + an;

186:   for (i=0;i<pm;i++) {
187:     /* Form sparse row of P*A */
188:     pnzi  = pi[i+1] - pi[i];
189:     panzj = 0;
190:     for (j=0;j<pnzi;j++) {
191:       arow = *pj++;
192:       anzj = ai[arow+1] - ai[arow];
193:       ajj  = aj + ai[arow];
194:       aaj  = aa + ai[arow];
195:       for (k=0;k<anzj;k++) {
196:         if (!pajdense[ajj[k]]) {
197:           pajdense[ajj[k]] = -1;
198:           paj[panzj++]     = ajj[k];
199:         }
200:         paa[ajj[k]] += (*pa)*aaj[k];
201:       }
202:       flops += 2*anzj;
203:       pa++;
204:     }

206:     /* Sort the j index array for quick sparse axpy. */
207:     PetscSortInt(panzj,paj);

209:     /* Compute P*A*P^T using sparse inner products. */
210:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
211:     cnzi = ci[i+1] - ci[i];
212:     for (j=0;j<cnzi;j++) {
213:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
214:       ptcol = *cj++;
215:       ptnzj = pi[ptcol+1] - pi[ptcol];
216:       ptj   = pjj + pi[ptcol];
217:       ptaj  = pta + pi[ptcol];
218:       sum   = 0.;
219:       k1    = 0;
220:       k2    = 0;
221:       while ((k1<panzj) && (k2<ptnzj)) {
222:         if (paj[k1]==ptj[k2]) {
223:           sum += paa[paj[k1++]]*ptaj[k2++];
224:         } else if (paj[k1] < ptj[k2]) {
225:           k1++;
226:         } else /* if (paj[k1] > ptj[k2]) */ {
227:           k2++;
228:         }
229:       }
230:       *ca++ = sum;
231:     }

233:     /* Zero the current row info for P*A */
234:     for (j=0;j<panzj;j++) {
235:       paa[paj[j]]      = 0.;
236:       pajdense[paj[j]] = 0;
237:     }
238:   }

240:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
241:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
242:   PetscLogFlops(flops);
243:   PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
244:   return(0);
245: }
246: 
249: int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {

253:   if (!logkey_matapplypapt) {
254:     PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
255:   }
256:   PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
257:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
258:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
259:   PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
260:   return(0);
261: }