Actual source code: mpiaij.c

  1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/inline/spops.h

  6: /* 
  7:   Local utility routine that creates a mapping from the global column 
  8: number to the local number in the off-diagonal part of the local 
  9: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 10: a slightly higher hash table cost; without it it is not scalable (each processor
 11: has an order N integer array but is fast to acess.
 12: */
 15: int CreateColmap_MPIAIJ_Private(Mat mat)
 16: {
 17:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 18:   int        n = aij->B->n,i,ierr;

 21: #if defined (PETSC_USE_CTABLE)
 22:   PetscTableCreate(n,&aij->colmap);
 23:   for (i=0; i<n; i++){
 24:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 25:   }
 26: #else
 27:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 28:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 29:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 30:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 31: #endif
 32:   return(0);
 33: }

 35: #define CHUNKSIZE   15
 36: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
 37: { \
 38:  \
 39:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 40:     rmax = aimax[row]; nrow = ailen[row];  \
 41:     col1 = col - shift; \
 42:      \
 43:     low = 0; high = nrow; \
 44:     while (high-low > 5) { \
 45:       t = (low+high)/2; \
 46:       if (rp[t] > col) high = t; \
 47:       else             low  = t; \
 48:     } \
 49:       for (_i=low; _i<high; _i++) { \
 50:         if (rp[_i] > col1) break; \
 51:         if (rp[_i] == col1) { \
 52:           if (addv == ADD_VALUES) ap[_i] += value;   \
 53:           else                  ap[_i] = value; \
 54:           goto a_noinsert; \
 55:         } \
 56:       }  \
 57:       if (nonew == 1) goto a_noinsert; \
 58:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
 59:       if (nrow >= rmax) { \
 60:         /* there is no extra room in row, therefore enlarge */ \
 61:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
 62:         PetscScalar *new_a; \
 63:  \
 64:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
 65:  \
 66:         /* malloc new storage space */ \
 67:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
 68:         PetscMalloc(len,&new_a); \
 69:         new_j   = (int*)(new_a + new_nz); \
 70:         new_i   = new_j + new_nz; \
 71:  \
 72:         /* copy over old data into new slots */ \
 73:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
 74:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
 75:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
 76:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
 77:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
 78:                                                            len*sizeof(int)); \
 79:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
 80:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
 81:                                                            len*sizeof(PetscScalar));  \
 82:         /* free up old matrix storage */ \
 83:  \
 84:         PetscFree(a->a);  \
 85:         if (!a->singlemalloc) { \
 86:            PetscFree(a->i); \
 87:            PetscFree(a->j); \
 88:         } \
 89:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
 90:         a->singlemalloc = PETSC_TRUE; \
 91:  \
 92:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 93:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
 94:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
 95:         a->maxnz += CHUNKSIZE; \
 96:         a->reallocs++; \
 97:       } \
 98:       N = nrow++ - 1; a->nz++; \
 99:       /* shift up all the later entries in this row */ \
100:       for (ii=N; ii>=_i; ii--) { \
101:         rp[ii+1] = rp[ii]; \
102:         ap[ii+1] = ap[ii]; \
103:       } \
104:       rp[_i] = col1;  \
105:       ap[_i] = value;  \
106:       a_noinsert: ; \
107:       ailen[row] = nrow; \
108: } 

110: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111: { \
112:  \
113:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114:     rmax = bimax[row]; nrow = bilen[row];  \
115:     col1 = col - shift; \
116:      \
117:     low = 0; high = nrow; \
118:     while (high-low > 5) { \
119:       t = (low+high)/2; \
120:       if (rp[t] > col) high = t; \
121:       else             low  = t; \
122:     } \
123:        for (_i=low; _i<high; _i++) { \
124:         if (rp[_i] > col1) break; \
125:         if (rp[_i] == col1) { \
126:           if (addv == ADD_VALUES) ap[_i] += value;   \
127:           else                  ap[_i] = value; \
128:           goto b_noinsert; \
129:         } \
130:       }  \
131:       if (nonew == 1) goto b_noinsert; \
132:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133:       if (nrow >= rmax) { \
134:         /* there is no extra room in row, therefore enlarge */ \
135:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136:         PetscScalar *new_a; \
137:  \
138:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139:  \
140:         /* malloc new storage space */ \
141:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142:         PetscMalloc(len,&new_a); \
143:         new_j   = (int*)(new_a + new_nz); \
144:         new_i   = new_j + new_nz; \
145:  \
146:         /* copy over old data into new slots */ \
147:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
150:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152:                                                            len*sizeof(int)); \
153:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
154:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155:                                                            len*sizeof(PetscScalar));  \
156:         /* free up old matrix storage */ \
157:  \
158:         PetscFree(b->a);  \
159:         if (!b->singlemalloc) { \
160:           PetscFree(b->i); \
161:           PetscFree(b->j); \
162:         } \
163:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164:         b->singlemalloc = PETSC_TRUE; \
165:  \
166:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169:         b->maxnz += CHUNKSIZE; \
170:         b->reallocs++; \
171:       } \
172:       N = nrow++ - 1; b->nz++; \
173:       /* shift up all the later entries in this row */ \
174:       for (ii=N; ii>=_i; ii--) { \
175:         rp[ii+1] = rp[ii]; \
176:         ap[ii+1] = ap[ii]; \
177:       } \
178:       rp[_i] = col1;  \
179:       ap[_i] = value;  \
180:       b_noinsert: ; \
181:       bilen[row] = nrow; \
182: }

186: int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187: {
188:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189:   PetscScalar  value;
190:   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
191:   int          cstart = aij->cstart,cend = aij->cend,row,col;
192:   PetscTruth   roworiented = aij->roworiented;

194:   /* Some Variables required in the macro */
195:   Mat          A = aij->A;
196:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
197:   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
198:   PetscScalar  *aa = a->a;
199:   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
200:   Mat          B = aij->B;
201:   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
202:   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
203:   PetscScalar  *ba = b->a;

205:   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
206:   int          nonew = a->nonew,shift=0;
207:   PetscScalar  *ap;

210:   for (i=0; i<m; i++) {
211:     if (im[i] < 0) continue;
212: #if defined(PETSC_USE_BOPT_g)
213:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
214: #endif
215:     if (im[i] >= rstart && im[i] < rend) {
216:       row = im[i] - rstart;
217:       for (j=0; j<n; j++) {
218:         if (in[j] >= cstart && in[j] < cend){
219:           col = in[j] - cstart;
220:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
221:           if (ignorezeroentries && value == 0.0) continue;
222:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
223:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
224:         } else if (in[j] < 0) continue;
225: #if defined(PETSC_USE_BOPT_g)
226:         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
227: #endif
228:         else {
229:           if (mat->was_assembled) {
230:             if (!aij->colmap) {
231:               CreateColmap_MPIAIJ_Private(mat);
232:             }
233: #if defined (PETSC_USE_CTABLE)
234:             PetscTableFind(aij->colmap,in[j]+1,&col);
235:             col--;
236: #else
237:             col = aij->colmap[in[j]] - 1;
238: #endif
239:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240:               DisAssemble_MPIAIJ(mat);
241:               col =  in[j];
242:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243:               B = aij->B;
244:               b = (Mat_SeqAIJ*)B->data;
245:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246:               ba = b->a;
247:             }
248:           } else col = in[j];
249:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250:           if (ignorezeroentries && value == 0.0) continue;
251:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
253:         }
254:       }
255:     } else {
256:       if (!aij->donotstash) {
257:         if (roworiented) {
258:           if (ignorezeroentries && v[i*n] == 0.0) continue;
259:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
260:         } else {
261:           if (ignorezeroentries && v[i] == 0.0) continue;
262:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
263:         }
264:       }
265:     }
266:   }
267:   return(0);
268: }

272: int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
273: {
274:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
275:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
276:   int        cstart = aij->cstart,cend = aij->cend,row,col;

279:   for (i=0; i<m; i++) {
280:     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
281:     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
282:     if (idxm[i] >= rstart && idxm[i] < rend) {
283:       row = idxm[i] - rstart;
284:       for (j=0; j<n; j++) {
285:         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
286:         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
287:         if (idxn[j] >= cstart && idxn[j] < cend){
288:           col = idxn[j] - cstart;
289:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
290:         } else {
291:           if (!aij->colmap) {
292:             CreateColmap_MPIAIJ_Private(mat);
293:           }
294: #if defined (PETSC_USE_CTABLE)
295:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
296:           col --;
297: #else
298:           col = aij->colmap[idxn[j]] - 1;
299: #endif
300:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301:           else {
302:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
303:           }
304:         }
305:       }
306:     } else {
307:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
308:     }
309:   }
310:   return(0);
311: }

315: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316: {
317:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
318:   int         ierr,nstash,reallocs;
319:   InsertMode  addv;

322:   if (aij->donotstash) {
323:     return(0);
324:   }

326:   /* make sure all processors are either in INSERTMODE or ADDMODE */
327:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
328:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330:   }
331:   mat->insertmode = addv; /* in case this processor had no cache */

333:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
334:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
335:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336:   return(0);
337: }


342: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343: {
344:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
345:   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
346:   int         i,j,rstart,ncols,n,ierr,flg;
347:   int         *row,*col,other_disassembled;
348:   PetscScalar *val;
349:   InsertMode  addv = mat->insertmode;

352:   if (!aij->donotstash) {
353:     while (1) {
354:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
355:       if (!flg) break;

357:       for (i=0; i<n;) {
358:         /* Now identify the consecutive vals belonging to the same row */
359:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
360:         if (j < n) ncols = j-i;
361:         else       ncols = n-i;
362:         /* Now assemble all these values with a single function call */
363:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
364:         i = j;
365:       }
366:     }
367:     MatStashScatterEnd_Private(&mat->stash);
368:   }
369: 
370:   MatAssemblyBegin(aij->A,mode);
371:   MatAssemblyEnd(aij->A,mode);

373:   /* determine if any processor has disassembled, if so we must 
374:      also disassemble ourselfs, in order that we may reassemble. */
375:   /*
376:      if nonzero structure of submatrix B cannot change then we know that
377:      no processor disassembled thus we can skip this stuff
378:   */
379:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
380:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
381:     if (mat->was_assembled && !other_disassembled) {
382:       DisAssemble_MPIAIJ(mat);
383:     }
384:   }

386:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387:     MatSetUpMultiply_MPIAIJ(mat);
388:   }
389:   MatAssemblyBegin(aij->B,mode);
390:   MatAssemblyEnd(aij->B,mode);

392:   if (aij->rowvalues) {
393:     PetscFree(aij->rowvalues);
394:     aij->rowvalues = 0;
395:   }

397:   /* used by MatAXPY() */
398:   a->xtoy = 0; b->xtoy = 0;
399:   a->XtoY = 0; b->XtoY = 0;

401:   return(0);
402: }

406: int MatZeroEntries_MPIAIJ(Mat A)
407: {
408:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
409:   int        ierr;

412:   MatZeroEntries(l->A);
413:   MatZeroEntries(l->B);
414:   return(0);
415: }

419: int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420: {
421:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
422:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
423:   int            *nprocs,j,idx,nsends,row;
424:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427:   MPI_Comm       comm = A->comm;
428:   MPI_Request    *send_waits,*recv_waits;
429:   MPI_Status     recv_status,*send_status;
430:   IS             istmp;
431:   PetscTruth     found;

434:   ISGetLocalSize(is,&N);
435:   ISGetIndices(is,&rows);

437:   /*  first count number of contributors to each processor */
438:   PetscMalloc(2*size*sizeof(int),&nprocs);
439:   PetscMemzero(nprocs,2*size*sizeof(int));
440:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
441:   for (i=0; i<N; i++) {
442:     idx = rows[i];
443:     found = PETSC_FALSE;
444:     for (j=0; j<size; j++) {
445:       if (idx >= owners[j] && idx < owners[j+1]) {
446:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
447:       }
448:     }
449:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
450:   }
451:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

453:   /* inform other processors of number of messages and max length*/
454:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

456:   /* post receives:   */
457:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
458:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
459:   for (i=0; i<nrecvs; i++) {
460:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
461:   }

463:   /* do sends:
464:       1) starts[i] gives the starting index in svalues for stuff going to 
465:          the ith processor
466:   */
467:   PetscMalloc((N+1)*sizeof(int),&svalues);
468:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
469:   PetscMalloc((size+1)*sizeof(int),&starts);
470:   starts[0] = 0;
471:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
472:   for (i=0; i<N; i++) {
473:     svalues[starts[owner[i]]++] = rows[i];
474:   }
475:   ISRestoreIndices(is,&rows);

477:   starts[0] = 0;
478:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479:   count = 0;
480:   for (i=0; i<size; i++) {
481:     if (nprocs[2*i+1]) {
482:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
483:     }
484:   }
485:   PetscFree(starts);

487:   base = owners[rank];

489:   /*  wait on receives */
490:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
491:   source = lens + nrecvs;
492:   count  = nrecvs; slen = 0;
493:   while (count) {
494:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
495:     /* unpack receives into our local space */
496:     MPI_Get_count(&recv_status,MPI_INT,&n);
497:     source[imdex]  = recv_status.MPI_SOURCE;
498:     lens[imdex]    = n;
499:     slen          += n;
500:     count--;
501:   }
502:   PetscFree(recv_waits);
503: 
504:   /* move the data into the send scatter */
505:   PetscMalloc((slen+1)*sizeof(int),&lrows);
506:   count = 0;
507:   for (i=0; i<nrecvs; i++) {
508:     values = rvalues + i*nmax;
509:     for (j=0; j<lens[i]; j++) {
510:       lrows[count++] = values[j] - base;
511:     }
512:   }
513:   PetscFree(rvalues);
514:   PetscFree(lens);
515:   PetscFree(owner);
516:   PetscFree(nprocs);
517: 
518:   /* actually zap the local rows */
519:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
520:   PetscLogObjectParent(A,istmp);

522:   /*
523:         Zero the required rows. If the "diagonal block" of the matrix
524:      is square and the user wishes to set the diagonal we use seperate
525:      code so that MatSetValues() is not called for each diagonal allocating
526:      new memory, thus calling lots of mallocs and slowing things down.

528:        Contributed by: Mathew Knepley
529:   */
530:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
531:   MatZeroRows(l->B,istmp,0);
532:   if (diag && (l->A->M == l->A->N)) {
533:     MatZeroRows(l->A,istmp,diag);
534:   } else if (diag) {
535:     MatZeroRows(l->A,istmp,0);
536:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
537:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
538: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
539:     }
540:     for (i = 0; i < slen; i++) {
541:       row  = lrows[i] + rstart;
542:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
543:     }
544:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
545:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
546:   } else {
547:     MatZeroRows(l->A,istmp,0);
548:   }
549:   ISDestroy(istmp);
550:   PetscFree(lrows);

552:   /* wait on sends */
553:   if (nsends) {
554:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
555:     MPI_Waitall(nsends,send_waits,send_status);
556:     PetscFree(send_status);
557:   }
558:   PetscFree(send_waits);
559:   PetscFree(svalues);

561:   return(0);
562: }

566: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
567: {
568:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
569:   int        ierr,nt;

572:   VecGetLocalSize(xx,&nt);
573:   if (nt != A->n) {
574:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
575:   }
576:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
577:   (*a->A->ops->mult)(a->A,xx,yy);
578:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
579:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
580:   return(0);
581: }

585: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586: {
587:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588:   int        ierr;

591:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
592:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
593:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
594:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
595:   return(0);
596: }

600: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
601: {
602:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603:   int        ierr;

606:   /* do nondiagonal part */
607:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
608:   /* send it on its way */
609:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
610:   /* do local part */
611:   (*a->A->ops->multtranspose)(a->A,xx,yy);
612:   /* receive remote parts: note this assumes the values are not actually */
613:   /* inserted in yy until the next line, which is true for my implementation*/
614:   /* but is not perhaps always true. */
615:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
616:   return(0);
617: }

619: EXTERN_C_BEGIN
622: int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
623: {
624:   MPI_Comm comm;
625:   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626:   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627:   IS         Me,Notme;
628:   int        M,N,first,last,*notme,ntids,i, ierr;


632:   /* Easy test: symmetric diagonal block */
633:   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634:   MatIsTranspose(Adia,Bdia,f);
635:   if (!*f) return(0);
636:   PetscObjectGetComm((PetscObject)Amat,&comm);
637:   MPI_Comm_size(comm,&ntids);
638:   if (ntids==1) return(0);

640:   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
641:   MatGetSize(Amat,&M,&N);
642:   MatGetOwnershipRange(Amat,&first,&last);
643:   PetscMalloc((N-last+first)*sizeof(int),&notme);
644:   for (i=0; i<first; i++) notme[i] = i;
645:   for (i=last; i<M; i++) notme[i-last+first] = i;
646:   ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
647:   ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
648:   MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
649:   Aoff = Aoffs[0];
650:   MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
651:   Boff = Boffs[0];
652:   MatIsTranspose(Aoff,Boff,f);
653:   MatDestroyMatrices(1,&Aoffs);
654:   MatDestroyMatrices(1,&Boffs);
655:   ISDestroy(Me);
656:   ISDestroy(Notme);

658:   return(0);
659: }
660: EXTERN_C_END

664: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
665: {
666:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
667:   int        ierr;

670:   /* do nondiagonal part */
671:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
672:   /* send it on its way */
673:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
674:   /* do local part */
675:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
676:   /* receive remote parts: note this assumes the values are not actually */
677:   /* inserted in yy until the next line, which is true for my implementation*/
678:   /* but is not perhaps always true. */
679:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
680:   return(0);
681: }

683: /*
684:   This only works correctly for square matrices where the subblock A->A is the 
685:    diagonal block
686: */
689: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
690: {
691:   int        ierr;
692:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

695:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
696:   if (a->rstart != a->cstart || a->rend != a->cend) {
697:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
698:   }
699:   MatGetDiagonal(a->A,v);
700:   return(0);
701: }

705: int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
706: {
707:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
708:   int        ierr;

711:   MatScale(aa,a->A);
712:   MatScale(aa,a->B);
713:   return(0);
714: }

718: int MatDestroy_MPIAIJ(Mat mat)
719: {
720:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721:   int        ierr;

724: #if defined(PETSC_USE_LOG)
725:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
726: #endif
727:   MatStashDestroy_Private(&mat->stash);
728:   PetscFree(aij->rowners);
729:   MatDestroy(aij->A);
730:   MatDestroy(aij->B);
731: #if defined (PETSC_USE_CTABLE)
732:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
733: #else
734:   if (aij->colmap) {PetscFree(aij->colmap);}
735: #endif
736:   if (aij->garray) {PetscFree(aij->garray);}
737:   if (aij->lvec)   {VecDestroy(aij->lvec);}
738:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
739:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
740:   PetscFree(aij);
741:   return(0);
742: }

746: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747: {
748:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749:   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750:   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751:   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
752:   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
753:   PetscScalar       *column_values;

756:   MPI_Comm_rank(mat->comm,&rank);
757:   MPI_Comm_size(mat->comm,&size);
758:   nz   = A->nz + B->nz;
759:   if (rank == 0) {
760:     header[0] = MAT_FILE_COOKIE;
761:     header[1] = mat->M;
762:     header[2] = mat->N;
763:     MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
764:     PetscViewerBinaryGetDescriptor(viewer,&fd);
765:     PetscBinaryWrite(fd,header,4,PETSC_INT,1);
766:     /* get largest number of rows any processor has */
767:     rlen = mat->m;
768:     PetscMapGetGlobalRange(mat->rmap,&range);
769:     for (i=1; i<size; i++) {
770:       rlen = PetscMax(rlen,range[i+1] - range[i]);
771:     }
772:   } else {
773:     MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
774:     rlen = mat->m;
775:   }

777:   /* load up the local row counts */
778:   PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
779:   for (i=0; i<mat->m; i++) {
780:     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781:   }

783:   /* store the row lengths to the file */
784:   if (rank == 0) {
785:     MPI_Status status;
786:     PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
787:     for (i=1; i<size; i++) {
788:       rlen = range[i+1] - range[i];
789:       MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
790:       PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
791:     }
792:   } else {
793:     MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
794:   }
795:   PetscFree(row_lengths);

797:   /* load up the local column indices */
798:   nzmax = nz; /* )th processor needs space a largest processor needs */
799:   MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
800:   PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
801:   cnt  = 0;
802:   for (i=0; i<mat->m; i++) {
803:     for (j=B->i[i]; j<B->i[i+1]; j++) {
804:       if ( (col = garray[B->j[j]]) > cstart) break;
805:       column_indices[cnt++] = col;
806:     }
807:     for (k=A->i[i]; k<A->i[i+1]; k++) {
808:       column_indices[cnt++] = A->j[k] + cstart;
809:     }
810:     for (; j<B->i[i+1]; j++) {
811:       column_indices[cnt++] = garray[B->j[j]];
812:     }
813:   }
814:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

816:   /* store the column indices to the file */
817:   if (rank == 0) {
818:     MPI_Status status;
819:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
820:     for (i=1; i<size; i++) {
821:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
822:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
823:       MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
824:       PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
825:     }
826:   } else {
827:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
828:     MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
829:   }
830:   PetscFree(column_indices);

832:   /* load up the local column values */
833:   PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
834:   cnt  = 0;
835:   for (i=0; i<mat->m; i++) {
836:     for (j=B->i[i]; j<B->i[i+1]; j++) {
837:       if ( garray[B->j[j]] > cstart) break;
838:       column_values[cnt++] = B->a[j];
839:     }
840:     for (k=A->i[i]; k<A->i[i+1]; k++) {
841:       column_values[cnt++] = A->a[k];
842:     }
843:     for (; j<B->i[i+1]; j++) {
844:       column_values[cnt++] = B->a[j];
845:     }
846:   }
847:   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

849:   /* store the column values to the file */
850:   if (rank == 0) {
851:     MPI_Status status;
852:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
853:     for (i=1; i<size; i++) {
854:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
855:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
856:       MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
857:       PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
858:     }
859:   } else {
860:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
861:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
862:   }
863:   PetscFree(column_values);
864:   return(0);
865: }

869: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870: {
871:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872:   int               ierr,rank = aij->rank,size = aij->size;
873:   PetscTruth        isdraw,isascii,flg,isbinary;
874:   PetscViewer       sviewer;
875:   PetscViewerFormat format;

878:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
880:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881:   if (isascii) {
882:     PetscViewerGetFormat(viewer,&format);
883:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884:       MatInfo info;
885:       MPI_Comm_rank(mat->comm,&rank);
886:       MatGetInfo(mat,MAT_LOCAL,&info);
887:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
888:       if (flg) {
889:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
890:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
891:       } else {
892:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
893:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
894:       }
895:       MatGetInfo(aij->A,MAT_LOCAL,&info);
896:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
897:       MatGetInfo(aij->B,MAT_LOCAL,&info);
898:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
899:       PetscViewerFlush(viewer);
900:       VecScatterView(aij->Mvctx,viewer);
901:       return(0);
902:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
903:       return(0);
904:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
905:       return(0);
906:     }
907:   } else if (isbinary) {
908:     if (size == 1) {
909:       PetscObjectSetName((PetscObject)aij->A,mat->name);
910:       MatView(aij->A,viewer);
911:     } else {
912:       MatView_MPIAIJ_Binary(mat,viewer);
913:     }
914:     return(0);
915:   } else if (isdraw) {
916:     PetscDraw  draw;
917:     PetscTruth isnull;
918:     PetscViewerDrawGetDraw(viewer,0,&draw);
919:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
920:   }

922:   if (size == 1) {
923:     PetscObjectSetName((PetscObject)aij->A,mat->name);
924:     MatView(aij->A,viewer);
925:   } else {
926:     /* assemble the entire matrix onto first processor. */
927:     Mat         A;
928:     Mat_SeqAIJ *Aloc;
929:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
930:     PetscScalar *a;

932:     if (!rank) {
933:       MatCreate(mat->comm,M,N,M,N,&A);
934:     } else {
935:       MatCreate(mat->comm,0,0,M,N,&A);
936:     }
937:     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
938:     MatSetType(A,MATMPIAIJ);
939:     MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
940:     PetscLogObjectParent(mat,A);

942:     /* copy over the A part */
943:     Aloc = (Mat_SeqAIJ*)aij->A->data;
944:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
945:     row = aij->rstart;
946:     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
947:     for (i=0; i<m; i++) {
948:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
949:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
950:     }
951:     aj = Aloc->j;
952:     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}

954:     /* copy over the B part */
955:     Aloc = (Mat_SeqAIJ*)aij->B->data;
956:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
957:     row  = aij->rstart;
958:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
959:     ct   = cols;
960:     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
961:     for (i=0; i<m; i++) {
962:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
963:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
964:     }
965:     PetscFree(ct);
966:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
967:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
968:     /* 
969:        Everyone has to call to draw the matrix since the graphics waits are
970:        synchronized across all processors that share the PetscDraw object
971:     */
972:     PetscViewerGetSingleton(viewer,&sviewer);
973:     if (!rank) {
974:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
975:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
976:     }
977:     PetscViewerRestoreSingleton(viewer,&sviewer);
978:     MatDestroy(A);
979:   }
980:   return(0);
981: }

985: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
986: {
987:   int        ierr;
988:   PetscTruth isascii,isdraw,issocket,isbinary;
989: 
991:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
992:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
993:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
994:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
995:   if (isascii || isdraw || isbinary || issocket) {
996:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
997:   } else {
998:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
999:   }
1000:   return(0);
1001: }



1007: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1008: {
1009:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1010:   int          ierr;
1011:   Vec          bb1;
1012:   PetscScalar  mone=-1.0;

1015:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

1017:   VecDuplicate(bb,&bb1);

1019:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1020:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1021:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1022:       its--;
1023:     }
1024: 
1025:     while (its--) {
1026:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1027:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1029:       /* update rhs: bb1 = bb - B*x */
1030:       VecScale(&mone,mat->lvec);
1031:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1033:       /* local sweep */
1034:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1035: 
1036:     }
1037:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1038:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1039:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1040:       its--;
1041:     }
1042:     while (its--) {
1043:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1044:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1046:       /* update rhs: bb1 = bb - B*x */
1047:       VecScale(&mone,mat->lvec);
1048:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1050:       /* local sweep */
1051:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1052: 
1053:     }
1054:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1055:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1056:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1057:       its--;
1058:     }
1059:     while (its--) {
1060:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1061:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1063:       /* update rhs: bb1 = bb - B*x */
1064:       VecScale(&mone,mat->lvec);
1065:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1067:       /* local sweep */
1068:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1069: 
1070:     }
1071:   } else {
1072:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1073:   }

1075:   VecDestroy(bb1);
1076:   return(0);
1077: }

1081: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082: {
1083:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084:   Mat        A = mat->A,B = mat->B;
1085:   int        ierr;
1086:   PetscReal  isend[5],irecv[5];

1089:   info->block_size     = 1.0;
1090:   MatGetInfo(A,MAT_LOCAL,info);
1091:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092:   isend[3] = info->memory;  isend[4] = info->mallocs;
1093:   MatGetInfo(B,MAT_LOCAL,info);
1094:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095:   isend[3] += info->memory;  isend[4] += info->mallocs;
1096:   if (flag == MAT_LOCAL) {
1097:     info->nz_used      = isend[0];
1098:     info->nz_allocated = isend[1];
1099:     info->nz_unneeded  = isend[2];
1100:     info->memory       = isend[3];
1101:     info->mallocs      = isend[4];
1102:   } else if (flag == MAT_GLOBAL_MAX) {
1103:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1104:     info->nz_used      = irecv[0];
1105:     info->nz_allocated = irecv[1];
1106:     info->nz_unneeded  = irecv[2];
1107:     info->memory       = irecv[3];
1108:     info->mallocs      = irecv[4];
1109:   } else if (flag == MAT_GLOBAL_SUM) {
1110:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1111:     info->nz_used      = irecv[0];
1112:     info->nz_allocated = irecv[1];
1113:     info->nz_unneeded  = irecv[2];
1114:     info->memory       = irecv[3];
1115:     info->mallocs      = irecv[4];
1116:   }
1117:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1118:   info->fill_ratio_needed = 0;
1119:   info->factor_mallocs    = 0;
1120:   info->rows_global       = (double)matin->M;
1121:   info->columns_global    = (double)matin->N;
1122:   info->rows_local        = (double)matin->m;
1123:   info->columns_local     = (double)matin->N;

1125:   return(0);
1126: }

1130: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131: {
1132:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1133:   int        ierr;

1136:   switch (op) {
1137:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1138:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1139:   case MAT_COLUMNS_UNSORTED:
1140:   case MAT_COLUMNS_SORTED:
1141:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1142:   case MAT_KEEP_ZEROED_ROWS:
1143:   case MAT_NEW_NONZERO_LOCATION_ERR:
1144:   case MAT_USE_INODES:
1145:   case MAT_DO_NOT_USE_INODES:
1146:   case MAT_IGNORE_ZERO_ENTRIES:
1147:     MatSetOption(a->A,op);
1148:     MatSetOption(a->B,op);
1149:     break;
1150:   case MAT_ROW_ORIENTED:
1151:     a->roworiented = PETSC_TRUE;
1152:     MatSetOption(a->A,op);
1153:     MatSetOption(a->B,op);
1154:     break;
1155:   case MAT_ROWS_SORTED:
1156:   case MAT_ROWS_UNSORTED:
1157:   case MAT_YES_NEW_DIAGONALS:
1158:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1159:     break;
1160:   case MAT_COLUMN_ORIENTED:
1161:     a->roworiented = PETSC_FALSE;
1162:     MatSetOption(a->A,op);
1163:     MatSetOption(a->B,op);
1164:     break;
1165:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1166:     a->donotstash = PETSC_TRUE;
1167:     break;
1168:   case MAT_NO_NEW_DIAGONALS:
1169:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1170:   case MAT_SYMMETRIC:
1171:   case MAT_STRUCTURALLY_SYMMETRIC:
1172:   case MAT_NOT_SYMMETRIC:
1173:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1174:   case MAT_HERMITIAN:
1175:   case MAT_NOT_HERMITIAN:
1176:   case MAT_SYMMETRY_ETERNAL:
1177:   case MAT_NOT_SYMMETRY_ETERNAL:
1178:     break;
1179:   default:
1180:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181:   }
1182:   return(0);
1183: }

1187: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1188: {
1189:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1190:   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1191:   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1192:   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1193:   int          *cmap,*idx_p;

1196:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1197:   mat->getrowactive = PETSC_TRUE;

1199:   if (!mat->rowvalues && (idx || v)) {
1200:     /*
1201:         allocate enough space to hold information from the longest row.
1202:     */
1203:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1204:     int     max = 1,tmp;
1205:     for (i=0; i<matin->m; i++) {
1206:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1207:       if (max < tmp) { max = tmp; }
1208:     }
1209:     PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1210:     mat->rowindices = (int*)(mat->rowvalues + max);
1211:   }

1213:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1214:   lrow = row - rstart;

1216:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1217:   if (!v)   {pvA = 0; pvB = 0;}
1218:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1219:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1220:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1221:   nztot = nzA + nzB;

1223:   cmap  = mat->garray;
1224:   if (v  || idx) {
1225:     if (nztot) {
1226:       /* Sort by increasing column numbers, assuming A and B already sorted */
1227:       int imark = -1;
1228:       if (v) {
1229:         *v = v_p = mat->rowvalues;
1230:         for (i=0; i<nzB; i++) {
1231:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1232:           else break;
1233:         }
1234:         imark = i;
1235:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1236:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1237:       }
1238:       if (idx) {
1239:         *idx = idx_p = mat->rowindices;
1240:         if (imark > -1) {
1241:           for (i=0; i<imark; i++) {
1242:             idx_p[i] = cmap[cworkB[i]];
1243:           }
1244:         } else {
1245:           for (i=0; i<nzB; i++) {
1246:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1247:             else break;
1248:           }
1249:           imark = i;
1250:         }
1251:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1252:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1253:       }
1254:     } else {
1255:       if (idx) *idx = 0;
1256:       if (v)   *v   = 0;
1257:     }
1258:   }
1259:   *nz = nztot;
1260:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1261:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1262:   return(0);
1263: }

1267: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1268: {
1269:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1272:   if (aij->getrowactive == PETSC_FALSE) {
1273:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1274:   }
1275:   aij->getrowactive = PETSC_FALSE;
1276:   return(0);
1277: }

1281: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1282: {
1283:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1284:   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1285:   int          ierr,i,j,cstart = aij->cstart;
1286:   PetscReal    sum = 0.0;
1287:   PetscScalar  *v;

1290:   if (aij->size == 1) {
1291:      MatNorm(aij->A,type,norm);
1292:   } else {
1293:     if (type == NORM_FROBENIUS) {
1294:       v = amat->a;
1295:       for (i=0; i<amat->nz; i++) {
1296: #if defined(PETSC_USE_COMPLEX)
1297:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298: #else
1299:         sum += (*v)*(*v); v++;
1300: #endif
1301:       }
1302:       v = bmat->a;
1303:       for (i=0; i<bmat->nz; i++) {
1304: #if defined(PETSC_USE_COMPLEX)
1305:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1306: #else
1307:         sum += (*v)*(*v); v++;
1308: #endif
1309:       }
1310:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1311:       *norm = sqrt(*norm);
1312:     } else if (type == NORM_1) { /* max column norm */
1313:       PetscReal *tmp,*tmp2;
1314:       int    *jj,*garray = aij->garray;
1315:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1316:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1317:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1318:       *norm = 0.0;
1319:       v = amat->a; jj = amat->j;
1320:       for (j=0; j<amat->nz; j++) {
1321:         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1322:       }
1323:       v = bmat->a; jj = bmat->j;
1324:       for (j=0; j<bmat->nz; j++) {
1325:         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1326:       }
1327:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1328:       for (j=0; j<mat->N; j++) {
1329:         if (tmp2[j] > *norm) *norm = tmp2[j];
1330:       }
1331:       PetscFree(tmp);
1332:       PetscFree(tmp2);
1333:     } else if (type == NORM_INFINITY) { /* max row norm */
1334:       PetscReal ntemp = 0.0;
1335:       for (j=0; j<aij->A->m; j++) {
1336:         v = amat->a + amat->i[j];
1337:         sum = 0.0;
1338:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1339:           sum += PetscAbsScalar(*v); v++;
1340:         }
1341:         v = bmat->a + bmat->i[j];
1342:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1343:           sum += PetscAbsScalar(*v); v++;
1344:         }
1345:         if (sum > ntemp) ntemp = sum;
1346:       }
1347:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1348:     } else {
1349:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1350:     }
1351:   }
1352:   return(0);
1353: }

1357: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1358: {
1359:   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1360:   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1361:   int          ierr;
1362:   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1363:   Mat          B;
1364:   PetscScalar  *array;

1367:   if (!matout && M != N) {
1368:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1369:   }

1371:   MatCreate(A->comm,A->n,A->m,N,M,&B);
1372:   MatSetType(B,A->type_name);
1373:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

1375:   /* copy over the A part */
1376:   Aloc = (Mat_SeqAIJ*)a->A->data;
1377:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1378:   row = a->rstart;
1379:   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1380:   for (i=0; i<m; i++) {
1381:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1382:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383:   }
1384:   aj = Aloc->j;
1385:   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}

1387:   /* copy over the B part */
1388:   Aloc = (Mat_SeqAIJ*)a->B->data;
1389:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1390:   row  = a->rstart;
1391:   PetscMalloc((1+ai[m])*sizeof(int),&cols);
1392:   ct   = cols;
1393:   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1394:   for (i=0; i<m; i++) {
1395:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1396:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1397:   }
1398:   PetscFree(ct);
1399:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1400:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1401:   if (matout) {
1402:     *matout = B;
1403:   } else {
1404:     MatHeaderCopy(A,B);
1405:   }
1406:   return(0);
1407: }

1411: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1412: {
1413:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1414:   Mat        a = aij->A,b = aij->B;
1415:   int        ierr,s1,s2,s3;

1418:   MatGetLocalSize(mat,&s2,&s3);
1419:   if (rr) {
1420:     VecGetLocalSize(rr,&s1);
1421:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1422:     /* Overlap communication with computation. */
1423:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1424:   }
1425:   if (ll) {
1426:     VecGetLocalSize(ll,&s1);
1427:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1428:     (*b->ops->diagonalscale)(b,ll,0);
1429:   }
1430:   /* scale  the diagonal block */
1431:   (*a->ops->diagonalscale)(a,ll,rr);

1433:   if (rr) {
1434:     /* Do a scatter end and then right scale the off-diagonal block */
1435:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1436:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1437:   }
1438: 
1439:   return(0);
1440: }


1445: int MatPrintHelp_MPIAIJ(Mat A)
1446: {
1447:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1448:   int        ierr;

1451:   if (!a->rank) {
1452:     MatPrintHelp_SeqAIJ(a->A);
1453:   }
1454:   return(0);
1455: }

1459: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1460: {
1462:   *bs = 1;
1463:   return(0);
1464: }
1467: int MatSetUnfactored_MPIAIJ(Mat A)
1468: {
1469:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1470:   int        ierr;

1473:   MatSetUnfactored(a->A);
1474:   return(0);
1475: }

1479: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1480: {
1481:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1482:   Mat        a,b,c,d;
1483:   PetscTruth flg;
1484:   int        ierr;

1487:   a = matA->A; b = matA->B;
1488:   c = matB->A; d = matB->B;

1490:   MatEqual(a,c,&flg);
1491:   if (flg == PETSC_TRUE) {
1492:     MatEqual(b,d,&flg);
1493:   }
1494:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1495:   return(0);
1496: }

1500: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1501: {
1502:   int        ierr;
1503:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1504:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;

1507:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1508:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1509:     /* because of the column compression in the off-processor part of the matrix a->B,
1510:        the number of columns in a->B and b->B may be different, hence we cannot call
1511:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1512:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1513:        then copying the submatrices */
1514:     MatCopy_Basic(A,B,str);
1515:   } else {
1516:     MatCopy(a->A,b->A,str);
1517:     MatCopy(a->B,b->B,str);
1518:   }
1519:   return(0);
1520: }

1524: int MatSetUpPreallocation_MPIAIJ(Mat A)
1525: {
1526:   int        ierr;

1529:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1530:   return(0);
1531: }

1533:  #include petscblaslapack.h
1536: int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1537: {
1538:   int        ierr,one=1,i;
1539:   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1540:   Mat_SeqAIJ *x,*y;

1543:   if (str == SAME_NONZERO_PATTERN) {
1544:     x = (Mat_SeqAIJ *)xx->A->data;
1545:     y = (Mat_SeqAIJ *)yy->A->data;
1546:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1547:     x = (Mat_SeqAIJ *)xx->B->data;
1548:     y = (Mat_SeqAIJ *)yy->B->data;
1549:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1550:   } else if (str == SUBSET_NONZERO_PATTERN) {
1551:     MatAXPY_SeqAIJ(a,xx->A,yy->A,str);

1553:     x = (Mat_SeqAIJ *)xx->B->data;
1554:     y = (Mat_SeqAIJ *)yy->B->data;
1555:     if (y->xtoy && y->XtoY != xx->B) {
1556:       PetscFree(y->xtoy);
1557:       MatDestroy(y->XtoY);
1558:     }
1559:     if (!y->xtoy) { /* get xtoy */
1560:       MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1561:       y->XtoY = xx->B;
1562:     }
1563:     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1564:   } else {
1565:     MatAXPY_Basic(a,X,Y,str);
1566:   }
1567:   return(0);
1568: }

1570: /* -------------------------------------------------------------------*/
1571: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1572:        MatGetRow_MPIAIJ,
1573:        MatRestoreRow_MPIAIJ,
1574:        MatMult_MPIAIJ,
1575: /* 4*/ MatMultAdd_MPIAIJ,
1576:        MatMultTranspose_MPIAIJ,
1577:        MatMultTransposeAdd_MPIAIJ,
1578:        0,
1579:        0,
1580:        0,
1581: /*10*/ 0,
1582:        0,
1583:        0,
1584:        MatRelax_MPIAIJ,
1585:        MatTranspose_MPIAIJ,
1586: /*15*/ MatGetInfo_MPIAIJ,
1587:        MatEqual_MPIAIJ,
1588:        MatGetDiagonal_MPIAIJ,
1589:        MatDiagonalScale_MPIAIJ,
1590:        MatNorm_MPIAIJ,
1591: /*20*/ MatAssemblyBegin_MPIAIJ,
1592:        MatAssemblyEnd_MPIAIJ,
1593:        0,
1594:        MatSetOption_MPIAIJ,
1595:        MatZeroEntries_MPIAIJ,
1596: /*25*/ MatZeroRows_MPIAIJ,
1597: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1598:        MatLUFactorSymbolic_MPIAIJ_TFS,
1599: #else
1600:        0,
1601: #endif
1602:        0,
1603:        0,
1604:        0,
1605: /*30*/ MatSetUpPreallocation_MPIAIJ,
1606:        0,
1607:        0,
1608:        0,
1609:        0,
1610: /*35*/ MatDuplicate_MPIAIJ,
1611:        0,
1612:        0,
1613:        0,
1614:        0,
1615: /*40*/ MatAXPY_MPIAIJ,
1616:        MatGetSubMatrices_MPIAIJ,
1617:        MatIncreaseOverlap_MPIAIJ,
1618:        MatGetValues_MPIAIJ,
1619:        MatCopy_MPIAIJ,
1620: /*45*/ MatPrintHelp_MPIAIJ,
1621:        MatScale_MPIAIJ,
1622:        0,
1623:        0,
1624:        0,
1625: /*50*/ MatGetBlockSize_MPIAIJ,
1626:        0,
1627:        0,
1628:        0,
1629:        0,
1630: /*55*/ MatFDColoringCreate_MPIAIJ,
1631:        0,
1632:        MatSetUnfactored_MPIAIJ,
1633:        0,
1634:        0,
1635: /*60*/ MatGetSubMatrix_MPIAIJ,
1636:        MatDestroy_MPIAIJ,
1637:        MatView_MPIAIJ,
1638:        MatGetPetscMaps_Petsc,
1639:        0,
1640: /*65*/ 0,
1641:        0,
1642:        0,
1643:        0,
1644:        0,
1645: /*70*/ 0,
1646:        0,
1647:        MatSetColoring_MPIAIJ,
1648:        MatSetValuesAdic_MPIAIJ,
1649:        MatSetValuesAdifor_MPIAIJ,
1650: /*75*/ 0,
1651:        0,
1652:        0,
1653:        0,
1654:        0,
1655: /*80*/ 0,
1656:        0,
1657:        0,
1658:        0,
1659: /*85*/ MatLoad_MPIAIJ};

1661: /* ----------------------------------------------------------------------------------------*/

1663: EXTERN_C_BEGIN
1666: int MatStoreValues_MPIAIJ(Mat mat)
1667: {
1668:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1669:   int        ierr;

1672:   MatStoreValues(aij->A);
1673:   MatStoreValues(aij->B);
1674:   return(0);
1675: }
1676: EXTERN_C_END

1678: EXTERN_C_BEGIN
1681: int MatRetrieveValues_MPIAIJ(Mat mat)
1682: {
1683:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1684:   int        ierr;

1687:   MatRetrieveValues(aij->A);
1688:   MatRetrieveValues(aij->B);
1689:   return(0);
1690: }
1691: EXTERN_C_END

1693:  #include petscpc.h
1694: EXTERN_C_BEGIN
1697: int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1698: {
1699:   Mat_MPIAIJ   *b;
1700:   int          ierr,i;

1703:   B->preallocated = PETSC_TRUE;
1704:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1705:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1706:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1707:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1708:   if (d_nnz) {
1709:     for (i=0; i<B->m; i++) {
1710:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1711:     }
1712:   }
1713:   if (o_nnz) {
1714:     for (i=0; i<B->m; i++) {
1715:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1716:     }
1717:   }
1718:   b = (Mat_MPIAIJ*)B->data;
1719:   MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
1720:   MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);

1722:   return(0);
1723: }
1724: EXTERN_C_END

1726: /*MC
1727:    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.

1729:    Options Database Keys:
1730: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()

1732:   Level: beginner

1734: .seealso: MatCreateMPIAIJ
1735: M*/

1737: EXTERN_C_BEGIN
1740: int MatCreate_MPIAIJ(Mat B)
1741: {
1742:   Mat_MPIAIJ *b;
1743:   int        ierr,i,size;

1746:   MPI_Comm_size(B->comm,&size);

1748:   PetscNew(Mat_MPIAIJ,&b);
1749:   B->data         = (void*)b;
1750:   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1751:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1752:   B->factor       = 0;
1753:   B->assembled    = PETSC_FALSE;
1754:   B->mapping      = 0;

1756:   B->insertmode      = NOT_SET_VALUES;
1757:   b->size            = size;
1758:   MPI_Comm_rank(B->comm,&b->rank);

1760:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1761:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1763:   /* the information in the maps duplicates the information computed below, eventually 
1764:      we should remove the duplicate information that is not contained in the maps */
1765:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1766:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1768:   /* build local table of row and column ownerships */
1769:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1770:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771:   b->cowners = b->rowners + b->size + 2;
1772:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1773:   b->rowners[0] = 0;
1774:   for (i=2; i<=b->size; i++) {
1775:     b->rowners[i] += b->rowners[i-1];
1776:   }
1777:   b->rstart = b->rowners[b->rank];
1778:   b->rend   = b->rowners[b->rank+1];
1779:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1780:   b->cowners[0] = 0;
1781:   for (i=2; i<=b->size; i++) {
1782:     b->cowners[i] += b->cowners[i-1];
1783:   }
1784:   b->cstart = b->cowners[b->rank];
1785:   b->cend   = b->cowners[b->rank+1];

1787:   /* build cache for off array entries formed */
1788:   MatStashCreate_Private(B->comm,1,&B->stash);
1789:   b->donotstash  = PETSC_FALSE;
1790:   b->colmap      = 0;
1791:   b->garray      = 0;
1792:   b->roworiented = PETSC_TRUE;

1794:   /* stuff used for matrix vector multiply */
1795:   b->lvec      = PETSC_NULL;
1796:   b->Mvctx     = PETSC_NULL;

1798:   /* stuff for MatGetRow() */
1799:   b->rowindices   = 0;
1800:   b->rowvalues    = 0;
1801:   b->getrowactive = PETSC_FALSE;

1803:   /* Explicitly create 2 MATSEQAIJ matrices. */
1804:   MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
1805:   MatSetType(b->A,MATSEQAIJ);
1806:   PetscLogObjectParent(B,b->A);
1807:   MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);
1808:   MatSetType(b->B,MATSEQAIJ);
1809:   PetscLogObjectParent(B,b->B);

1811:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1812:                                      "MatStoreValues_MPIAIJ",
1813:                                      MatStoreValues_MPIAIJ);
1814:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1815:                                      "MatRetrieveValues_MPIAIJ",
1816:                                      MatRetrieveValues_MPIAIJ);
1817:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1818:                                      "MatGetDiagonalBlock_MPIAIJ",
1819:                                      MatGetDiagonalBlock_MPIAIJ);
1820:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1821:                                      "MatIsTranspose_MPIAIJ",
1822:                                      MatIsTranspose_MPIAIJ);
1823:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1824:                                      "MatMPIAIJSetPreallocation_MPIAIJ",
1825:                                      MatMPIAIJSetPreallocation_MPIAIJ);
1826:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1827:                                      "MatDiagonalScaleLocal_MPIAIJ",
1828:                                      MatDiagonalScaleLocal_MPIAIJ);
1829:   return(0);
1830: }
1831: EXTERN_C_END

1833: /*MC
1834:    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.

1836:    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1837:    and MATMPIAIJ otherwise.

1839:    Options Database Keys:
1840: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()

1842:   Level: beginner

1844: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1845: M*/

1847: EXTERN_C_BEGIN
1850: int MatCreate_AIJ(Mat A) {
1851:   int ierr,size;

1854:   PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1855:   MPI_Comm_size(A->comm,&size);
1856:   if (size == 1) {
1857:     MatSetType(A,MATSEQAIJ);
1858:   } else {
1859:     MatSetType(A,MATMPIAIJ);
1860:   }
1861:   return(0);
1862: }
1863: EXTERN_C_END

1867: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1868: {
1869:   Mat        mat;
1870:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1871:   int        ierr;

1874:   *newmat       = 0;
1875:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1876:   MatSetType(mat,matin->type_name);
1877:   a    = (Mat_MPIAIJ*)mat->data;
1878:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1879:   mat->factor       = matin->factor;
1880:   mat->assembled    = PETSC_TRUE;
1881:   mat->insertmode   = NOT_SET_VALUES;
1882:   mat->preallocated = PETSC_TRUE;

1884:   a->rstart       = oldmat->rstart;
1885:   a->rend         = oldmat->rend;
1886:   a->cstart       = oldmat->cstart;
1887:   a->cend         = oldmat->cend;
1888:   a->size         = oldmat->size;
1889:   a->rank         = oldmat->rank;
1890:   a->donotstash   = oldmat->donotstash;
1891:   a->roworiented  = oldmat->roworiented;
1892:   a->rowindices   = 0;
1893:   a->rowvalues    = 0;
1894:   a->getrowactive = PETSC_FALSE;

1896:   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1897:   MatStashCreate_Private(matin->comm,1,&mat->stash);
1898:   if (oldmat->colmap) {
1899: #if defined (PETSC_USE_CTABLE)
1900:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1901: #else
1902:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1903:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1904:     PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1905: #endif
1906:   } else a->colmap = 0;
1907:   if (oldmat->garray) {
1908:     int len;
1909:     len  = oldmat->B->n;
1910:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1911:     PetscLogObjectMemory(mat,len*sizeof(int));
1912:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1913:   } else a->garray = 0;
1914: 
1915:    VecDuplicate(oldmat->lvec,&a->lvec);
1916:   PetscLogObjectParent(mat,a->lvec);
1917:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1918:   PetscLogObjectParent(mat,a->Mvctx);
1919:    MatDestroy(a->A);
1920:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1921:   PetscLogObjectParent(mat,a->A);
1922:    MatDestroy(a->B);
1923:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1924:   PetscLogObjectParent(mat,a->B);
1925:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1926:   *newmat = mat;
1927:   return(0);
1928: }

1930:  #include petscsys.h

1934: int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1935: {
1936:   Mat          A;
1937:   PetscScalar  *vals,*svals;
1938:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1939:   MPI_Status   status;
1940:   int          i,nz,ierr,j,rstart,rend,fd;
1941:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1942:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1943:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1946:   MPI_Comm_size(comm,&size);
1947:   MPI_Comm_rank(comm,&rank);
1948:   if (!rank) {
1949:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1950:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1951:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1952:     if (header[3] < 0) {
1953:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1954:     }
1955:   }

1957:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1958:   M = header[1]; N = header[2];
1959:   /* determine ownership of all rows */
1960:   m = M/size + ((M % size) > rank);
1961:   PetscMalloc((size+2)*sizeof(int),&rowners);
1962:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1963:   rowners[0] = 0;
1964:   for (i=2; i<=size; i++) {
1965:     rowners[i] += rowners[i-1];
1966:   }
1967:   rstart = rowners[rank];
1968:   rend   = rowners[rank+1];

1970:   /* distribute row lengths to all processors */
1971:   PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1972:   offlens = ourlens + (rend-rstart);
1973:   if (!rank) {
1974:     PetscMalloc(M*sizeof(int),&rowlengths);
1975:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1976:     PetscMalloc(size*sizeof(int),&sndcounts);
1977:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1978:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1979:     PetscFree(sndcounts);
1980:   } else {
1981:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1982:   }

1984:   if (!rank) {
1985:     /* calculate the number of nonzeros on each processor */
1986:     PetscMalloc(size*sizeof(int),&procsnz);
1987:     PetscMemzero(procsnz,size*sizeof(int));
1988:     for (i=0; i<size; i++) {
1989:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1990:         procsnz[i] += rowlengths[j];
1991:       }
1992:     }
1993:     PetscFree(rowlengths);

1995:     /* determine max buffer needed and allocate it */
1996:     maxnz = 0;
1997:     for (i=0; i<size; i++) {
1998:       maxnz = PetscMax(maxnz,procsnz[i]);
1999:     }
2000:     PetscMalloc(maxnz*sizeof(int),&cols);

2002:     /* read in my part of the matrix column indices  */
2003:     nz   = procsnz[0];
2004:     PetscMalloc(nz*sizeof(int),&mycols);
2005:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

2007:     /* read in every one elses and ship off */
2008:     for (i=1; i<size; i++) {
2009:       nz   = procsnz[i];
2010:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
2011:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
2012:     }
2013:     PetscFree(cols);
2014:   } else {
2015:     /* determine buffer space needed for message */
2016:     nz = 0;
2017:     for (i=0; i<m; i++) {
2018:       nz += ourlens[i];
2019:     }
2020:     PetscMalloc((nz+1)*sizeof(int),&mycols);

2022:     /* receive message of column indices*/
2023:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2024:     MPI_Get_count(&status,MPI_INT,&maxnz);
2025:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2026:   }

2028:   /* determine column ownership if matrix is not square */
2029:   if (N != M) {
2030:     n      = N/size + ((N % size) > rank);
2031:     MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
2032:     cstart = cend - n;
2033:   } else {
2034:     cstart = rstart;
2035:     cend   = rend;
2036:     n      = cend - cstart;
2037:   }

2039:   /* loop over local rows, determining number of off diagonal entries */
2040:   PetscMemzero(offlens,m*sizeof(int));
2041:   jj = 0;
2042:   for (i=0; i<m; i++) {
2043:     for (j=0; j<ourlens[i]; j++) {
2044:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2045:       jj++;
2046:     }
2047:   }

2049:   /* create our matrix */
2050:   for (i=0; i<m; i++) {
2051:     ourlens[i] -= offlens[i];
2052:   }
2053:   MatCreate(comm,m,n,M,N,&A);
2054:   MatSetType(A,type);
2055:   MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);

2057:   MatSetOption(A,MAT_COLUMNS_SORTED);
2058:   for (i=0; i<m; i++) {
2059:     ourlens[i] += offlens[i];
2060:   }

2062:   if (!rank) {
2063:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

2065:     /* read in my part of the matrix numerical values  */
2066:     nz   = procsnz[0];
2067:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2068: 
2069:     /* insert into matrix */
2070:     jj      = rstart;
2071:     smycols = mycols;
2072:     svals   = vals;
2073:     for (i=0; i<m; i++) {
2074:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2075:       smycols += ourlens[i];
2076:       svals   += ourlens[i];
2077:       jj++;
2078:     }

2080:     /* read in other processors and ship out */
2081:     for (i=1; i<size; i++) {
2082:       nz   = procsnz[i];
2083:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2084:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2085:     }
2086:     PetscFree(procsnz);
2087:   } else {
2088:     /* receive numeric values */
2089:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

2091:     /* receive message of values*/
2092:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2093:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2094:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2096:     /* insert into matrix */
2097:     jj      = rstart;
2098:     smycols = mycols;
2099:     svals   = vals;
2100:     for (i=0; i<m; i++) {
2101:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2102:       smycols += ourlens[i];
2103:       svals   += ourlens[i];
2104:       jj++;
2105:     }
2106:   }
2107:   PetscFree(ourlens);
2108:   PetscFree(vals);
2109:   PetscFree(mycols);
2110:   PetscFree(rowners);

2112:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2113:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2114:   *newmat = A;
2115:   return(0);
2116: }

2120: /*
2121:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
2122:   in local and then by concatenating the local matrices the end result.
2123:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2124: */
2125: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2126: {
2127:   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2128:   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2129:   Mat          *local,M,Mreuse;
2130:   PetscScalar  *vwork,*aa;
2131:   MPI_Comm     comm = mat->comm;
2132:   Mat_SeqAIJ   *aij;


2136:   MPI_Comm_rank(comm,&rank);
2137:   MPI_Comm_size(comm,&size);

2139:   if (call ==  MAT_REUSE_MATRIX) {
2140:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2141:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2142:     local = &Mreuse;
2143:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2144:   } else {
2145:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2146:     Mreuse = *local;
2147:     PetscFree(local);
2148:   }

2150:   /* 
2151:       m - number of local rows
2152:       n - number of columns (same on all processors)
2153:       rstart - first row in new global matrix generated
2154:   */
2155:   MatGetSize(Mreuse,&m,&n);
2156:   if (call == MAT_INITIAL_MATRIX) {
2157:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2158:     ii  = aij->i;
2159:     jj  = aij->j;

2161:     /*
2162:         Determine the number of non-zeros in the diagonal and off-diagonal 
2163:         portions of the matrix in order to do correct preallocation
2164:     */

2166:     /* first get start and end of "diagonal" columns */
2167:     if (csize == PETSC_DECIDE) {
2168:       ISGetSize(isrow,&mglobal);
2169:       if (mglobal == n) { /* square matrix */
2170:         nlocal = m;
2171:       } else {
2172:         nlocal = n/size + ((n % size) > rank);
2173:       }
2174:     } else {
2175:       nlocal = csize;
2176:     }
2177:     MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2178:     rstart = rend - nlocal;
2179:     if (rank == size - 1 && rend != n) {
2180:       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2181:     }

2183:     /* next, compute all the lengths */
2184:     PetscMalloc((2*m+1)*sizeof(int),&dlens);
2185:     olens = dlens + m;
2186:     for (i=0; i<m; i++) {
2187:       jend = ii[i+1] - ii[i];
2188:       olen = 0;
2189:       dlen = 0;
2190:       for (j=0; j<jend; j++) {
2191:         if (*jj < rstart || *jj >= rend) olen++;
2192:         else dlen++;
2193:         jj++;
2194:       }
2195:       olens[i] = olen;
2196:       dlens[i] = dlen;
2197:     }
2198:     MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);
2199:     MatSetType(M,mat->type_name);
2200:     MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2201:     PetscFree(dlens);
2202:   } else {
2203:     int ml,nl;

2205:     M = *newmat;
2206:     MatGetLocalSize(M,&ml,&nl);
2207:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2208:     MatZeroEntries(M);
2209:     /*
2210:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2211:        rather than the slower MatSetValues().
2212:     */
2213:     M->was_assembled = PETSC_TRUE;
2214:     M->assembled     = PETSC_FALSE;
2215:   }
2216:   MatGetOwnershipRange(M,&rstart,&rend);
2217:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2218:   ii  = aij->i;
2219:   jj  = aij->j;
2220:   aa  = aij->a;
2221:   for (i=0; i<m; i++) {
2222:     row   = rstart + i;
2223:     nz    = ii[i+1] - ii[i];
2224:     cwork = jj;     jj += nz;
2225:     vwork = aa;     aa += nz;
2226:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2227:   }

2229:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2230:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2231:   *newmat = M;

2233:   /* save submatrix used in processor for next request */
2234:   if (call ==  MAT_INITIAL_MATRIX) {
2235:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2236:     PetscObjectDereference((PetscObject)Mreuse);
2237:   }

2239:   return(0);
2240: }

2244: /*@C
2245:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2246:    (the default parallel PETSc format).  For good matrix assembly performance
2247:    the user should preallocate the matrix storage by setting the parameters 
2248:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2249:    performance can be increased by more than a factor of 50.

2251:    Collective on MPI_Comm

2253:    Input Parameters:
2254: +  A - the matrix 
2255: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2256:            (same value is used for all local rows)
2257: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2258:            DIAGONAL portion of the local submatrix (possibly different for each row)
2259:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2260:            The size of this array is equal to the number of local rows, i.e 'm'. 
2261:            You must leave room for the diagonal entry even if it is zero.
2262: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2263:            submatrix (same value is used for all local rows).
2264: -  o_nnz - array containing the number of nonzeros in the various rows of the
2265:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2266:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2267:            structure. The size of this array is equal to the number 
2268:            of local rows, i.e 'm'. 

2270:    The AIJ format (also called the Yale sparse matrix format or
2271:    compressed row storage), is fully compatible with standard Fortran 77
2272:    storage.  That is, the stored row and column indices can begin at
2273:    either one (as in Fortran) or zero.  See the users manual for details.

2275:    The user MUST specify either the local or global matrix dimensions
2276:    (possibly both).

2278:    The parallel matrix is partitioned such that the first m0 rows belong to 
2279:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2280:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2282:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2283:    as the submatrix which is obtained by extraction the part corresponding 
2284:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2285:    first row that belongs to the processor, and r2 is the last row belonging 
2286:    to the this processor. This is a square mxm matrix. The remaining portion 
2287:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2289:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2291:    By default, this format uses inodes (identical nodes) when possible.
2292:    We search for consecutive rows with the same nonzero structure, thereby
2293:    reusing matrix information to achieve increased efficiency.

2295:    Options Database Keys:
2296: +  -mat_aij_no_inode  - Do not use inodes
2297: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2298: -  -mat_aij_oneindex - Internally use indexing starting at 1
2299:         rather than 0.  Note that when calling MatSetValues(),
2300:         the user still MUST index entries starting at 0!

2302:    Example usage:
2303:   
2304:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2305:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2306:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2307:    as follows:

2309: .vb
2310:             1  2  0  |  0  3  0  |  0  4
2311:     Proc0   0  5  6  |  7  0  0  |  8  0
2312:             9  0 10  | 11  0  0  | 12  0
2313:     -------------------------------------
2314:            13  0 14  | 15 16 17  |  0  0
2315:     Proc1   0 18  0  | 19 20 21  |  0  0 
2316:             0  0  0  | 22 23  0  | 24  0
2317:     -------------------------------------
2318:     Proc2  25 26 27  |  0  0 28  | 29  0
2319:            30  0  0  | 31 32 33  |  0 34
2320: .ve

2322:    This can be represented as a collection of submatrices as:

2324: .vb
2325:       A B C
2326:       D E F
2327:       G H I
2328: .ve

2330:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2331:    owned by proc1, G,H,I are owned by proc2.

2333:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2334:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2335:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2337:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2338:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2339:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2340:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2341:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2342:    matrix, ans [DF] as another SeqAIJ matrix.

2344:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2345:    allocated for every row of the local diagonal submatrix, and o_nz
2346:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2347:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2348:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2349:    In this case, the values of d_nz,o_nz are:
2350: .vb
2351:      proc0 : dnz = 2, o_nz = 2
2352:      proc1 : dnz = 3, o_nz = 2
2353:      proc2 : dnz = 1, o_nz = 4
2354: .ve
2355:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2356:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2357:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2358:    34 values.

2360:    When d_nnz, o_nnz parameters are specified, the storage is specified
2361:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2362:    In the above case the values for d_nnz,o_nnz are:
2363: .vb
2364:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2365:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2366:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2367: .ve
2368:    Here the space allocated is sum of all the above values i.e 34, and
2369:    hence pre-allocation is perfect.

2371:    Level: intermediate

2373: .keywords: matrix, aij, compressed row, sparse, parallel

2375: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2376: @*/
2377: int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2378: {
2379:   int ierr,(*f)(Mat,int,const int[],int,const int[]);

2382:   PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2383:   if (f) {
2384:     (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2385:   }
2386:   return(0);
2387: }

2391: /*@C
2392:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2393:    (the default parallel PETSc format).  For good matrix assembly performance
2394:    the user should preallocate the matrix storage by setting the parameters 
2395:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2396:    performance can be increased by more than a factor of 50.

2398:    Collective on MPI_Comm

2400:    Input Parameters:
2401: +  comm - MPI communicator
2402: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2403:            This value should be the same as the local size used in creating the 
2404:            y vector for the matrix-vector product y = Ax.
2405: .  n - This value should be the same as the local size used in creating the 
2406:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2407:        calculated if N is given) For square matrices n is almost always m.
2408: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2409: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2410: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2411:            (same value is used for all local rows)
2412: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2413:            DIAGONAL portion of the local submatrix (possibly different for each row)
2414:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2415:            The size of this array is equal to the number of local rows, i.e 'm'. 
2416:            You must leave room for the diagonal entry even if it is zero.
2417: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2418:            submatrix (same value is used for all local rows).
2419: -  o_nnz - array containing the number of nonzeros in the various rows of the
2420:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2421:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2422:            structure. The size of this array is equal to the number 
2423:            of local rows, i.e 'm'. 

2425:    Output Parameter:
2426: .  A - the matrix 

2428:    Notes:
2429:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2430:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2431:    storage requirements for this matrix.

2433:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2434:    processor than it must be used on all processors that share the object for 
2435:    that argument.

2437:    The AIJ format (also called the Yale sparse matrix format or
2438:    compressed row storage), is fully compatible with standard Fortran 77
2439:    storage.  That is, the stored row and column indices can begin at
2440:    either one (as in Fortran) or zero.  See the users manual for details.

2442:    The user MUST specify either the local or global matrix dimensions
2443:    (possibly both).

2445:    The parallel matrix is partitioned such that the first m0 rows belong to 
2446:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2447:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2449:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2450:    as the submatrix which is obtained by extraction the part corresponding 
2451:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2452:    first row that belongs to the processor, and r2 is the last row belonging 
2453:    to the this processor. This is a square mxm matrix. The remaining portion 
2454:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2456:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2458:    When calling this routine with a single process communicator, a matrix of
2459:    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2460:    type of communicator, use the construction mechanism:
2461:      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);

2463:    By default, this format uses inodes (identical nodes) when possible.
2464:    We search for consecutive rows with the same nonzero structure, thereby
2465:    reusing matrix information to achieve increased efficiency.

2467:    Options Database Keys:
2468: +  -mat_aij_no_inode  - Do not use inodes
2469: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2470: -  -mat_aij_oneindex - Internally use indexing starting at 1
2471:         rather than 0.  Note that when calling MatSetValues(),
2472:         the user still MUST index entries starting at 0!


2475:    Example usage:
2476:   
2477:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2478:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2479:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2480:    as follows:

2482: .vb
2483:             1  2  0  |  0  3  0  |  0  4
2484:     Proc0   0  5  6  |  7  0  0  |  8  0
2485:             9  0 10  | 11  0  0  | 12  0
2486:     -------------------------------------
2487:            13  0 14  | 15 16 17  |  0  0
2488:     Proc1   0 18  0  | 19 20 21  |  0  0 
2489:             0  0  0  | 22 23  0  | 24  0
2490:     -------------------------------------
2491:     Proc2  25 26 27  |  0  0 28  | 29  0
2492:            30  0  0  | 31 32 33  |  0 34
2493: .ve

2495:    This can be represented as a collection of submatrices as:

2497: .vb
2498:       A B C
2499:       D E F
2500:       G H I
2501: .ve

2503:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2504:    owned by proc1, G,H,I are owned by proc2.

2506:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2508:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2510:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2511:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2512:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2513:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2514:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2515:    matrix, ans [DF] as another SeqAIJ matrix.

2517:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2518:    allocated for every row of the local diagonal submatrix, and o_nz
2519:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2520:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2521:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2522:    In this case, the values of d_nz,o_nz are:
2523: .vb
2524:      proc0 : dnz = 2, o_nz = 2
2525:      proc1 : dnz = 3, o_nz = 2
2526:      proc2 : dnz = 1, o_nz = 4
2527: .ve
2528:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2529:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2530:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2531:    34 values.

2533:    When d_nnz, o_nnz parameters are specified, the storage is specified
2534:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2535:    In the above case the values for d_nnz,o_nnz are:
2536: .vb
2537:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2538:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2539:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2540: .ve
2541:    Here the space allocated is sum of all the above values i.e 34, and
2542:    hence pre-allocation is perfect.

2544:    Level: intermediate

2546: .keywords: matrix, aij, compressed row, sparse, parallel

2548: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2549: @*/
2550: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2551: {
2552:   int ierr,size;

2555:   MatCreate(comm,m,n,M,N,A);
2556:   MPI_Comm_size(comm,&size);
2557:   if (size > 1) {
2558:     MatSetType(*A,MATMPIAIJ);
2559:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2560:   } else {
2561:     MatSetType(*A,MATSEQAIJ);
2562:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2563:   }
2564:   return(0);
2565: }

2569: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2570: {
2571:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573:   *Ad     = a->A;
2574:   *Ao     = a->B;
2575:   *colmap = a->garray;
2576:   return(0);
2577: }

2581: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2582: {
2583:   int        ierr,i;
2584:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

2587:   if (coloring->ctype == IS_COLORING_LOCAL) {
2588:     ISColoringValue *allcolors,*colors;
2589:     ISColoring      ocoloring;

2591:     /* set coloring for diagonal portion */
2592:     MatSetColoring_SeqAIJ(a->A,coloring);

2594:     /* set coloring for off-diagonal portion */
2595:     ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2596:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2597:     for (i=0; i<a->B->n; i++) {
2598:       colors[i] = allcolors[a->garray[i]];
2599:     }
2600:     PetscFree(allcolors);
2601:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2602:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2603:     ISColoringDestroy(ocoloring);
2604:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2605:     ISColoringValue *colors;
2606:     int             *larray;
2607:     ISColoring      ocoloring;

2609:     /* set coloring for diagonal portion */
2610:     PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2611:     for (i=0; i<a->A->n; i++) {
2612:       larray[i] = i + a->cstart;
2613:     }
2614:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2615:     PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2616:     for (i=0; i<a->A->n; i++) {
2617:       colors[i] = coloring->colors[larray[i]];
2618:     }
2619:     PetscFree(larray);
2620:     ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2621:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2622:     ISColoringDestroy(ocoloring);

2624:     /* set coloring for off-diagonal portion */
2625:     PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2626:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2627:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2628:     for (i=0; i<a->B->n; i++) {
2629:       colors[i] = coloring->colors[larray[i]];
2630:     }
2631:     PetscFree(larray);
2632:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2633:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2634:     ISColoringDestroy(ocoloring);
2635:   } else {
2636:     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2637:   }

2639:   return(0);
2640: }

2644: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2645: {
2646:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2647:   int        ierr;

2650:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2651:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2652:   return(0);
2653: }

2657: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2658: {
2659:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2660:   int        ierr;

2663:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2664:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2665:   return(0);
2666: }

2670: /*@C
2671:       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2672:                  matrices from each processor

2674:     Collective on MPI_Comm

2676:    Input Parameters:
2677: +    comm - the communicators the parallel matrix will live on
2678: -    inmat - the input sequential matrices

2680:    Output Parameter:
2681: .    outmat - the parallel matrix generated

2683:     Level: advanced

2685:    Notes: The number of columns of the matrix in EACH of the seperate files
2686:       MUST be the same.

2688: @*/
2689: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2690: {
2691:   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2692:   PetscScalar *values;
2693:   PetscMap    columnmap,rowmap;

2696: 
2697:   MatGetSize(inmat,&m,&n);

2699:   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2700:   PetscMapCreate(comm,&columnmap);
2701:   PetscMapSetSize(columnmap,n);
2702:   PetscMapSetType(columnmap,MAP_MPI);
2703:   PetscMapGetLocalSize(columnmap,&n);
2704:   PetscMapDestroy(columnmap);

2706:   PetscMapCreate(comm,&rowmap);
2707:   PetscMapSetLocalSize(rowmap,m);
2708:   PetscMapSetType(rowmap,MAP_MPI);
2709:   PetscMapGetLocalRange(rowmap,&rstart,0);
2710:   PetscMapDestroy(rowmap);

2712:   MatPreallocateInitialize(comm,m,n,dnz,onz);
2713:   for (i=0;i<m;i++) {
2714:     MatGetRow(inmat,i,&nnz,&indx,&values);
2715:     MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2716:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2717:   }
2718:   /* This routine will ONLY return MPIAIJ type matrix */
2719:   MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);
2720:   MatSetType(*outmat,MATMPIAIJ);
2721:   MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
2722:   MatPreallocateFinalize(dnz,onz);

2724:   for (i=0;i<m;i++) {
2725:     MatGetRow(inmat,i,&nnz,&indx,&values);
2726:     I    = i + rstart;
2727:     MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2728:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2729:   }
2730:   MatDestroy(inmat);
2731:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2732:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);

2734:   return(0);
2735: }

2739: int MatFileSplit(Mat A,char *outfile)
2740: {
2741:   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2742:   PetscViewer out;
2743:   char        *name;
2744:   Mat         B;
2745:   PetscScalar *values;

2748: 
2749:   MatGetLocalSize(A,&m,0);
2750:   MatGetSize(A,0,&N);
2751:   /* Should this be the type of the diagonal block of A? */
2752:   MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);
2753:   MatSetType(B,MATSEQAIJ);
2754:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
2755:   MatGetOwnershipRange(A,&rstart,0);
2756:   for (i=0;i<m;i++) {
2757:     MatGetRow(A,i+rstart,&nnz,&indx,&values);
2758:     MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2759:     MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2760:   }
2761:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2762:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2764:   MPI_Comm_rank(A->comm,&rank);
2765:   PetscStrlen(outfile,&len);
2766:   PetscMalloc((len+5)*sizeof(char),&name);
2767:   sprintf(name,"%s.%d",outfile,rank);
2768:   PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);
2769:   PetscFree(name);
2770:   MatView(B,out);
2771:   PetscViewerDestroy(out);
2772:   MatDestroy(B);
2773:   return(0);
2774: }