Actual source code: mpibdiag.c

  1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
  2: /*
  3:    The basic matrix operations for the Block diagonal parallel 
  4:   matrices.
  5: */
 6:  #include src/mat/impls/bdiag/mpi/mpibdiag.h

 10: int MatSetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
 11: {
 12:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 13:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 14:   PetscTruth   roworiented = mbd->roworiented;

 17:   for (i=0; i<m; i++) {
 18:     if (idxm[i] < 0) continue;
 19:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 20:     if (idxm[i] >= rstart && idxm[i] < rend) {
 21:       row = idxm[i] - rstart;
 22:       for (j=0; j<n; j++) {
 23:         if (idxn[j] < 0) continue;
 24:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 25:         if (roworiented) {
 26:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 27:         } else {
 28:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 29:         }
 30:       }
 31:     } else {
 32:       if (!mbd->donotstash) {
 33:         if (roworiented) {
 34:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 35:         } else {
 36:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 37:         }
 38:       }
 39:     }
 40:   }
 41:   return(0);
 42: }

 46: int MatGetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
 47: {
 48:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 49:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 52:   for (i=0; i<m; i++) {
 53:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 54:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 55:     if (idxm[i] >= rstart && idxm[i] < rend) {
 56:       row = idxm[i] - rstart;
 57:       for (j=0; j<n; j++) {
 58:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 59:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 60:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 61:       }
 62:     } else {
 63:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 64:     }
 65:   }
 66:   return(0);
 67: }

 71: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 72: {
 73:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 74:   MPI_Comm     comm = mat->comm;
 75:   int          ierr,nstash,reallocs;
 76:   InsertMode   addv;

 79:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 80:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 81:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 82:   }
 83:   mat->insertmode = addv; /* in case this processor had no cache */
 84:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 85:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 86:   PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
 87:   return(0);
 88: }

 92: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 93: {
 94:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 95:   Mat_SeqBDiag *mlocal;
 96:   int          i,n,*row,*col;
 97:   int          *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
 98:   PetscScalar  *val;
 99:   InsertMode   addv = mat->insertmode;


103:   while (1) {
104:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
105:     if (!flg) break;
106: 
107:     for (i=0; i<n;) {
108:       /* Now identify the consecutive vals belonging to the same row */
109:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
110:       if (j < n) ncols = j-i;
111:       else       ncols = n-i;
112:       /* Now assemble all these values with a single function call */
113:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
114:       i = j;
115:     }
116:   }
117:   MatStashScatterEnd_Private(&mat->stash);

119:   MatAssemblyBegin(mbd->A,mode);
120:   MatAssemblyEnd(mbd->A,mode);

122:   /* Fix main diagonal location and determine global diagonals */
123:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
124:   Mblock         = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
125:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
126:   PetscMalloc(2*len*sizeof(int),&tmp1);
127:   tmp2           = tmp1 + len;
128:   PetscMemzero(tmp1,2*len*sizeof(int));
129:   mlocal->mainbd = -1;
130:   for (i=0; i<mlocal->nd; i++) {
131:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
132:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
133:   }
134:   MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
135:   ict  = 0;
136:   for (i=0; i<len; i++) {
137:     if (tmp2[i]) {
138:       mbd->gdiag[ict] = i - Mblock;
139:       ict++;
140:     }
141:   }
142:   mbd->gnd = ict;
143:   PetscFree(tmp1);

145:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
146:     MatSetUpMultiply_MPIBDiag(mat);
147:   }
148:   return(0);
149: }

153: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
154: {
155:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
156:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;

159:   *bs = dmat->bs;
160:   return(0);
161: }

165: int MatZeroEntries_MPIBDiag(Mat A)
166: {
167:   Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
168:   int          ierr;

171:   MatZeroEntries(l->A);
172:   return(0);
173: }

175: /* again this uses the same basic stratagy as in the assembly and 
176:    scatter create routines, we should try to do it systematically 
177:    if we can figure out the proper level of generality. */

179: /* the code does not do the diagonal entries correctly unless the 
180:    matrix is square and the column and row owerships are identical.
181:    This is a BUG. The only way to fix it seems to be to access 
182:    aij->A and aij->B directly and not through the MatZeroRows() 
183:    routine. 
184: */

188: int MatZeroRows_MPIBDiag(Mat A,IS is,const PetscScalar *diag)
189: {
190:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
191:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
192:   int            *nprocs,j,idx,nsends;
193:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
194:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
195:   int            *lens,imdex,*lrows,*values;
196:   MPI_Comm       comm = A->comm;
197:   MPI_Request    *send_waits,*recv_waits;
198:   MPI_Status     recv_status,*send_status;
199:   IS             istmp;
200:   PetscTruth     found;

203:   ISGetLocalSize(is,&N);
204:   ISGetIndices(is,&rows);

206:   /*  first count number of contributors to each processor */
207:   PetscMalloc(2*size*sizeof(int),&nprocs);
208:   PetscMemzero(nprocs,2*size*sizeof(int));
209:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
210:   for (i=0; i<N; i++) {
211:     idx = rows[i];
212:     found = PETSC_FALSE;
213:     for (j=0; j<size; j++) {
214:       if (idx >= owners[j] && idx < owners[j+1]) {
215:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
216:       }
217:     }
218:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
219:   }
220:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

222:   /* inform other processors of number of messages and max length*/
223:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

225:   /* post receives:   */
226:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
227:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
228:   for (i=0; i<nrecvs; i++) {
229:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
230:   }

232:   /* do sends:
233:       1) starts[i] gives the starting index in svalues for stuff going to 
234:          the ith processor
235:   */
236:   PetscMalloc((N+1)*sizeof(int),&svalues);
237:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
238:   PetscMalloc((size+1)*sizeof(int),&starts);
239:   starts[0] = 0;
240:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
241:   for (i=0; i<N; i++) {
242:     svalues[starts[owner[i]]++] = rows[i];
243:   }
244:   ISRestoreIndices(is,&rows);

246:   starts[0] = 0;
247:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
248:   count = 0;
249:   for (i=0; i<size; i++) {
250:     if (nprocs[2*i+1]) {
251:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
252:     }
253:   }
254:   PetscFree(starts);

256:   base = owners[rank];

258:   /*  wait on receives */
259:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
260:   source = lens + nrecvs;
261:   count  = nrecvs;
262:   slen   = 0;
263:   while (count) {
264:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
265:     /* unpack receives into our local space */
266:     MPI_Get_count(&recv_status,MPI_INT,&n);
267:     source[imdex]  = recv_status.MPI_SOURCE;
268:     lens[imdex]  = n;
269:     slen += n;
270:     count--;
271:   }
272:   PetscFree(recv_waits);
273: 
274:   /* move the data into the send scatter */
275:   PetscMalloc((slen+1)*sizeof(int),&lrows);
276:   count = 0;
277:   for (i=0; i<nrecvs; i++) {
278:     values = rvalues + i*nmax;
279:     for (j=0; j<lens[i]; j++) {
280:       lrows[count++] = values[j] - base;
281:     }
282:   }
283:   PetscFree(rvalues);
284:   PetscFree(lens);
285:   PetscFree(owner);
286:   PetscFree(nprocs);
287: 
288:   /* actually zap the local rows */
289:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
290:   PetscLogObjectParent(A,istmp);
291:   PetscFree(lrows);
292:   MatZeroRows(l->A,istmp,diag);
293:   ISDestroy(istmp);

295:   /* wait on sends */
296:   if (nsends) {
297:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
298:     MPI_Waitall(nsends,send_waits,send_status);
299:     PetscFree(send_status);
300:   }
301:   PetscFree(send_waits);
302:   PetscFree(svalues);

304:   return(0);
305: }

309: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
310: {
311:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
312:   int          ierr;

315:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
316:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
318:   return(0);
319: }

323: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
324: {
325:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
326:   int          ierr;

329:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
330:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
331:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
332:   return(0);
333: }

337: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
338: {
339:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
340:   int          ierr;
341:   PetscScalar  zero = 0.0;

344:   VecSet(&zero,yy);
345:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
346:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
347:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348:   return(0);
349: }

353: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
354: {
355:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
356:   int          ierr;

359:   VecCopy(yy,zz);
360:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
361:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
362:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
363:   return(0);
364: }

368: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
369: {
370:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
371:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
372:   int          ierr;
373:   PetscReal    isend[5],irecv[5];

376:   info->block_size     = (PetscReal)dmat->bs;
377:   MatGetInfo(mat->A,MAT_LOCAL,info);
378:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
379:   isend[3] = info->memory;  isend[4] = info->mallocs;
380:   if (flag == MAT_LOCAL) {
381:     info->nz_used      = isend[0];
382:     info->nz_allocated = isend[1];
383:     info->nz_unneeded  = isend[2];
384:     info->memory       = isend[3];
385:     info->mallocs      = isend[4];
386:   } else if (flag == MAT_GLOBAL_MAX) {
387:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
388:     info->nz_used      = irecv[0];
389:     info->nz_allocated = irecv[1];
390:     info->nz_unneeded  = irecv[2];
391:     info->memory       = irecv[3];
392:     info->mallocs      = irecv[4];
393:   } else if (flag == MAT_GLOBAL_SUM) {
394:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
395:     info->nz_used      = irecv[0];
396:     info->nz_allocated = irecv[1];
397:     info->nz_unneeded  = irecv[2];
398:     info->memory       = irecv[3];
399:     info->mallocs      = irecv[4];
400:   }
401:   info->rows_global    = (double)matin->M;
402:   info->columns_global = (double)matin->N;
403:   info->rows_local     = (double)matin->m;
404:   info->columns_local  = (double)matin->N;
405:   return(0);
406: }

410: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
411: {
412:   int          ierr;
413:   Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;

416:   MatGetDiagonal(A->A,v);
417:   return(0);
418: }

422: int MatDestroy_MPIBDiag(Mat mat)
423: {
424:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
425:   int          ierr;
426: #if defined(PETSC_USE_LOG)
427:   Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;

430:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
431: #else
433: #endif
434:   MatStashDestroy_Private(&mat->stash);
435:   PetscFree(mbd->rowners);
436:   PetscFree(mbd->gdiag);
437:   MatDestroy(mbd->A);
438:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
439:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
440:   PetscFree(mbd);
441:   return(0);
442: }


447: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
448: {
449:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
450:   int          ierr;

453:   if (mbd->size == 1) {
454:     MatView(mbd->A,viewer);
455:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
456:   return(0);
457: }

461: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
462: {
463:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
464:   Mat_SeqBDiag      *dmat = (Mat_SeqBDiag*)mbd->A->data;
465:   int               ierr,i,size = mbd->size,rank = mbd->rank;
466:   PetscTruth        isascii,isdraw;
467:   PetscViewer       sviewer;
468:   PetscViewerFormat format;

471:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
472:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
473:   if (isascii) {
474:     PetscViewerGetFormat(viewer,&format);
475:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
476:       int nline = PetscMin(10,mbd->gnd),k,nk,np;
477:       PetscViewerASCIIPrintf(viewer,"  block size=%d, total number of diagonals=%d\n",dmat->bs,mbd->gnd);
478:       nk = (mbd->gnd-1)/nline + 1;
479:       for (k=0; k<nk; k++) {
480:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
481:         np = PetscMin(nline,mbd->gnd - nline*k);
482:         for (i=0; i<np; i++) {
483:           PetscViewerASCIIPrintf(viewer,"  %d",mbd->gdiag[i+nline*k]);
484:         }
485:         PetscViewerASCIIPrintf(viewer,"\n");
486:       }
487:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
488:         MatInfo info;
489:         MPI_Comm_rank(mat->comm,&rank);
490:         MatGetInfo(mat,MAT_LOCAL,&info);
491:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
492:             (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
493:         PetscViewerFlush(viewer);
494:         VecScatterView(mbd->Mvctx,viewer);
495:       }
496:       return(0);
497:     }
498:   }

500:   if (isdraw) {
501:     PetscDraw       draw;
502:     PetscTruth isnull;
503:     PetscViewerDrawGetDraw(viewer,0,&draw);
504:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
505:   }

507:   if (size == 1) {
508:     MatView(mbd->A,viewer);
509:   } else {
510:     /* assemble the entire matrix onto first processor. */
511:     Mat          A;
512:     int          M = mat->M,N = mat->N,m,row,nz,*cols;
513:     PetscScalar  *vals;
514:     Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;

516:     /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
517:     if (!rank) {
518:       MatCreate(mat->comm,M,M,M,N,&A);
519:       MatSetType(A,MATMPIBDIAG);
520:       MatMPIBDiagSetPreallocation(A,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL);
521:     } else {
522:       MatCreate(mat->comm,0,0,M,N,&A);
523:       MatSetType(A,MATMPIBDIAG);
524:       MatMPIBDiagSetPreallocation(A,0,Ambd->bs,PETSC_NULL,PETSC_NULL);
525:     }
526:     PetscLogObjectParent(mat,A);

528:     /* Copy the matrix ... This isn't the most efficient means,
529:        but it's quick for now */
530:     row = mbd->rstart;
531:     m = mbd->A->m;
532:     for (i=0; i<m; i++) {
533:       MatGetRow(mat,row,&nz,&cols,&vals);
534:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
535:       MatRestoreRow(mat,row,&nz,&cols,&vals);
536:       row++;
537:     }
538:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
539:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
540:     PetscViewerGetSingleton(viewer,&sviewer);
541:     if (!rank) {
542:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
543:     }
544:     PetscViewerRestoreSingleton(viewer,&sviewer);
545:     PetscViewerFlush(viewer);
546:     MatDestroy(A);
547:   }
548:   return(0);
549: }

553: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
554: {
555:   int        ierr;
556:   PetscTruth isascii,isdraw,isbinary;

559:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
560:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
561:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
562:   if (isascii || isdraw) {
563:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
564:   } else if (isbinary) {
565:     MatView_MPIBDiag_Binary(mat,viewer);
566:   } else {
567:     SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
568:   }
569:   return(0);
570: }

574: int MatSetOption_MPIBDiag(Mat A,MatOption op)
575: {
576:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
577:   int          ierr;

579:   switch (op) {
580:   case MAT_NO_NEW_NONZERO_LOCATIONS:
581:   case MAT_YES_NEW_NONZERO_LOCATIONS:
582:   case MAT_NEW_NONZERO_LOCATION_ERR:
583:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
584:   case MAT_NO_NEW_DIAGONALS:
585:   case MAT_YES_NEW_DIAGONALS:
586:     MatSetOption(mbd->A,op);
587:     break;
588:   case MAT_ROW_ORIENTED:
589:     mbd->roworiented = PETSC_TRUE;
590:     MatSetOption(mbd->A,op);
591:     break;
592:   case MAT_COLUMN_ORIENTED:
593:     mbd->roworiented = PETSC_FALSE;
594:     MatSetOption(mbd->A,op);
595:     break;
596:   case MAT_IGNORE_OFF_PROC_ENTRIES:
597:     mbd->donotstash = PETSC_TRUE;
598:     break;
599:   case MAT_ROWS_SORTED:
600:   case MAT_ROWS_UNSORTED:
601:   case MAT_COLUMNS_SORTED:
602:   case MAT_COLUMNS_UNSORTED:
603:     PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignored\n");
604:     break;
605:   case MAT_SYMMETRIC:
606:   case MAT_STRUCTURALLY_SYMMETRIC:
607:   case MAT_NOT_SYMMETRIC:
608:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
609:   case MAT_HERMITIAN:
610:   case MAT_NOT_HERMITIAN:
611:   case MAT_SYMMETRY_ETERNAL:
612:   case MAT_NOT_SYMMETRY_ETERNAL:
613:     break;
614:   default:
615:     SETERRQ(PETSC_ERR_SUP,"unknown option");
616:   }
617:   return(0);
618: }

622: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
623: {
624:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
625:   int          lrow,ierr;

628:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
629:   lrow = row - mat->rstart;
630:   MatGetRow(mat->A,lrow,nz,idx,v);
631:   return(0);
632: }

636: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
637:                                   PetscScalar **v)
638: {
639:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
640:   int          lrow,ierr;

643:   lrow = row - mat->rstart;
644:   MatRestoreRow(mat->A,lrow,nz,idx,v);
645:   return(0);
646: }


651: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
652: {
653:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
654:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
655:   PetscReal    sum = 0.0;
656:   int          ierr,d,i,nd = a->nd,bs = a->bs,len;
657:   PetscScalar  *dv;

660:   if (type == NORM_FROBENIUS) {
661:     for (d=0; d<nd; d++) {
662:       dv   = a->diagv[d];
663:       len  = a->bdlen[d]*bs*bs;
664:       for (i=0; i<len; i++) {
665: #if defined(PETSC_USE_COMPLEX)
666:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
667: #else
668:         sum += dv[i]*dv[i];
669: #endif
670:       }
671:     }
672:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
673:     *nrm = sqrt(*nrm);
674:     PetscLogFlops(2*A->n*A->m);
675:   } else if (type == NORM_1) { /* max column norm */
676:     PetscReal *tmp,*tmp2;
677:     int    j;
678:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
679:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
680:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
681:     *nrm = 0.0;
682:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
683:     for (j=0; j<mbd->A->n; j++) {
684:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
685:     }
686:     PetscFree(tmp);
687:     PetscFree(tmp2);
688:   } else if (type == NORM_INFINITY) { /* max row norm */
689:     PetscReal normtemp;
690:     MatNorm(mbd->A,type,&normtemp);
691:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
692:   }
693:   return(0);
694: }

698: int MatPrintHelp_MPIBDiag(Mat A)
699: {
700:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
701:   int          ierr;

704:   if (!a->rank) {
705:     MatPrintHelp_SeqBDiag(a->A);
706:   }
707:   return(0);
708: }

712: int MatScale_MPIBDiag(const PetscScalar *alpha,Mat A)
713: {
714:   int          ierr;
715:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;

718:   MatScale_SeqBDiag(alpha,a->A);
719:   return(0);
720: }

724: int MatSetUpPreallocation_MPIBDiag(Mat A)
725: {
726:   int        ierr;

729:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
730:   return(0);
731: }

733: /* -------------------------------------------------------------------*/

735: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
736:        MatGetRow_MPIBDiag,
737:        MatRestoreRow_MPIBDiag,
738:        MatMult_MPIBDiag,
739: /* 4*/ MatMultAdd_MPIBDiag,
740:        MatMultTranspose_MPIBDiag,
741:        MatMultTransposeAdd_MPIBDiag,
742:        0,
743:        0,
744:        0,
745: /*10*/ 0,
746:        0,
747:        0,
748:        0,
749:        0,
750: /*15*/ MatGetInfo_MPIBDiag,
751:        0,
752:        MatGetDiagonal_MPIBDiag,
753:        0,
754:        MatNorm_MPIBDiag,
755: /*20*/ MatAssemblyBegin_MPIBDiag,
756:        MatAssemblyEnd_MPIBDiag,
757:        0,
758:        MatSetOption_MPIBDiag,
759:        MatZeroEntries_MPIBDiag,
760: /*25*/ MatZeroRows_MPIBDiag,
761:        0,
762:        0,
763:        0,
764:        0,
765: /*30*/ MatSetUpPreallocation_MPIBDiag,
766:        0,
767:        0,
768:        0,
769:        0,
770: /*35*/ 0,
771:        0,
772:        0,
773:        0,
774:        0,
775: /*40*/ 0,
776:        0,
777:        0,
778:        MatGetValues_MPIBDiag,
779:        0,
780: /*45*/ MatPrintHelp_MPIBDiag,
781:        MatScale_MPIBDiag,
782:        0,
783:        0,
784:        0,
785: /*50*/ MatGetBlockSize_MPIBDiag,
786:        0,
787:        0,
788:        0,
789:        0,
790: /*55*/ 0,
791:        0,
792:        0,
793:        0,
794:        0,
795: /*60*/ 0,
796:        MatDestroy_MPIBDiag,
797:        MatView_MPIBDiag,
798:        MatGetPetscMaps_Petsc,
799:        0,
800: /*65*/ 0,
801:        0,
802:        0,
803:        0,
804:        0,
805: /*70*/ 0,
806:        0,
807:        0,
808:        0,
809:        0,
810: /*75*/ 0,
811:        0,
812:        0,
813:        0,
814:        0,
815: /*80*/ 0,
816:        0,
817:        0,
818:        0,
819: /*85*/ MatLoad_MPIBDiag
820: };

822: EXTERN_C_BEGIN
825: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
826: {
827:   Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
828:   int          ierr,lrows,lcols,rstart,rend;
829:   IS           localc,localr;

832:   MatGetLocalSize(A,&lrows,&lcols);
833:   MatGetOwnershipRange(A,&rstart,&rend);
834:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
835:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
836:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
837:   ISDestroy(localr);
838:   ISDestroy(localc);

840:   *iscopy = PETSC_TRUE;
841:   return(0);
842: }
843: EXTERN_C_END

845: EXTERN_C_BEGIN
848: int MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
849: {
850:   Mat_MPIBDiag *b;
851:   int          ierr,i,k,*ldiag,len,nd2;
852:   PetscScalar  **ldiagv = 0;
853:   PetscTruth   flg2;

856:   B->preallocated = PETSC_TRUE;
857:   if (bs == PETSC_DEFAULT) bs = 1;
858:   if (nd == PETSC_DEFAULT) nd = 0;
859:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
860:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
861:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
862:   if (nd && !diag) {
863:     PetscMalloc(nd*sizeof(int),&diag);
864:     nd2  = nd;
865:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
866:     if (nd2 != nd) {
867:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
868:     }
869:   } else if (flg2) {
870:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
871:   }

873:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

875:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
876:   B->n = B->N = PetscMax(B->n,B->N);

878:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
879:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
880:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");

882:   /* the information in the maps duplicates the information computed below, eventually 
883:      we should remove the duplicate information that is not contained in the maps */
884:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
885:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);


888:   b          = (Mat_MPIBDiag*)B->data;
889:   b->gnd     = nd;

891:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
892:   b->rowners[0] = 0;
893:   for (i=2; i<=b->size; i++) {
894:     b->rowners[i] += b->rowners[i-1];
895:   }
896:   b->rstart  = b->rowners[b->rank];
897:   b->rend    = b->rowners[b->rank+1];
898:   b->brstart = (b->rstart)/bs;
899:   b->brend   = (b->rend)/bs;


902:   /* Determine local diagonals; for now, assume global rows = global cols */
903:   /* These are sorted in MatCreateSeqBDiag */
904:   PetscMalloc((nd+1)*sizeof(int),&ldiag);
905:   len  = B->M/bs + B->N/bs + 1;
906:   PetscMalloc(len*sizeof(int),&b->gdiag);
907:   k    = 0;
908:   PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
909:                         + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
910:   if (diagv) {
911:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
912:   }
913:   for (i=0; i<nd; i++) {
914:     b->gdiag[i] = diag[i];
915:     if (diag[i] > 0) { /* lower triangular */
916:       if (diag[i] < b->brend) {
917:         ldiag[k] = diag[i] - b->brstart;
918:         if (diagv) ldiagv[k] = diagv[i];
919:         k++;
920:       }
921:     } else { /* upper triangular */
922:       if (B->M/bs - diag[i] > B->N/bs) {
923:         if (B->M/bs + diag[i] > b->brstart) {
924:           ldiag[k] = diag[i] - b->brstart;
925:           if (diagv) ldiagv[k] = diagv[i];
926:           k++;
927:         }
928:       } else {
929:         if (B->M/bs > b->brstart) {
930:           ldiag[k] = diag[i] - b->brstart;
931:           if (diagv) ldiagv[k] = diagv[i];
932:           k++;
933:         }
934:       }
935:     }
936:   }

938:   /* Form local matrix */
939:   MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
940:   MatSetType(b->A,MATSEQBDIAG);
941:   MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
942:   PetscLogObjectParent(B,b->A);
943:   PetscFree(ldiag);
944:   if (ldiagv) {PetscFree(ldiagv);}

946:   return(0);
947: }
948: EXTERN_C_END

950: /*MC
951:    MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.

953:    Options Database Keys:
954: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()

956:   Level: beginner

958: .seealso: MatCreateMPIBDiag
959: M*/

961: EXTERN_C_BEGIN
964: int MatCreate_MPIBDiag(Mat B)
965: {
966:   Mat_MPIBDiag *b;
967:   int          ierr;

970:   PetscNew(Mat_MPIBDiag,&b);
971:   B->data         = (void*)b;
972:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
973:   B->factor       = 0;
974:   B->mapping      = 0;

976:   B->insertmode = NOT_SET_VALUES;
977:   MPI_Comm_rank(B->comm,&b->rank);
978:   MPI_Comm_size(B->comm,&b->size);

980:   /* build local table of row ownerships */
981:   PetscMalloc((b->size+2)*sizeof(int),&b->rowners);

983:   /* build cache for off array entries formed */
984:   MatStashCreate_Private(B->comm,1,&B->stash);
985:   b->donotstash = PETSC_FALSE;

987:   /* stuff used for matrix-vector multiply */
988:   b->lvec        = 0;
989:   b->Mvctx       = 0;

991:   /* used for MatSetValues() input */
992:   b->roworiented = PETSC_TRUE;

994:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
995:                                      "MatGetDiagonalBlock_MPIBDiag",
996:                                       MatGetDiagonalBlock_MPIBDiag);
997:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
998:                                      "MatMPIBDiagSetPreallocation_MPIBDiag",
999:                                       MatMPIBDiagSetPreallocation_MPIBDiag);
1000:   return(0);
1001: }
1002: EXTERN_C_END

1004: /*MC
1005:    MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.

1007:    This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
1008:    and MATMPIBDIAG otherwise.

1010:    Options Database Keys:
1011: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()

1013:   Level: beginner

1015: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1016: M*/

1018: EXTERN_C_BEGIN
1021: int MatCreate_BDiag(Mat A) {
1022:   int ierr,size;

1025:   PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1026:   MPI_Comm_size(A->comm,&size);
1027:   if (size == 1) {
1028:     MatSetType(A,MATSEQBDIAG);
1029:   } else {
1030:     MatSetType(A,MATMPIBDIAG);
1031:   }
1032:   return(0);
1033: }
1034: EXTERN_C_END

1038: /*@C
1039:    MatMPIBDiagSetPreallocation - 

1041:    Collective on Mat

1043:    Input Parameters:
1044: +  A - the matrix 
1045: .  nd - number of block diagonals (global) (optional)
1046: .  bs - each element of a diagonal is an bs x bs dense matrix
1047: .  diag - optional array of block diagonal numbers (length nd).
1048:    For a matrix element A[i,j], where i=row and j=column, the
1049:    diagonal number is
1050: $     diag = i/bs - j/bs  (integer division)
1051:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1052:    needed (expensive).
1053: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1054:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1055:    to control memory allocation.


1058:    Options Database Keys:
1059: .  -mat_block_size <bs> - Sets blocksize
1060: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1062:    Notes:
1063:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1064:    than it must be used on all processors that share the object for that argument.

1066:    The parallel matrix is partitioned across the processors by rows, where
1067:    each local rectangular matrix is stored in the uniprocessor block 
1068:    diagonal format.  See the users manual for further details.

1070:    The user MUST specify either the local or global numbers of rows
1071:    (possibly both).

1073:    The case bs=1 (conventional diagonal storage) is implemented as
1074:    a special case.

1076:    Fortran Notes:
1077:    Fortran programmers cannot set diagv; this variable is ignored.

1079:    Level: intermediate

1081: .keywords: matrix, block, diagonal, parallel, sparse

1083: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1084: @*/
1085: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,const int diag[],PetscScalar *diagv[])
1086: {
1087:   int ierr,(*f)(Mat,int,int,const int[],PetscScalar*[]);

1090:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1091:   if (f) {
1092:     (*f)(B,nd,bs,diag,diagv);
1093:   }
1094:   return(0);
1095: }

1099: /*@C
1100:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

1102:    Collective on MPI_Comm

1104:    Input Parameters:
1105: +  comm - MPI communicator
1106: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1107: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1108: .  N - number of columns (local and global)
1109: .  nd - number of block diagonals (global) (optional)
1110: .  bs - each element of a diagonal is an bs x bs dense matrix
1111: .  diag - optional array of block diagonal numbers (length nd).
1112:    For a matrix element A[i,j], where i=row and j=column, the
1113:    diagonal number is
1114: $     diag = i/bs - j/bs  (integer division)
1115:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1116:    needed (expensive).
1117: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1118:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1119:    to control memory allocation.

1121:    Output Parameter:
1122: .  A - the matrix 

1124:    Options Database Keys:
1125: .  -mat_block_size <bs> - Sets blocksize
1126: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1128:    Notes:
1129:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1130:    than it must be used on all processors that share the object for that argument.

1132:    The parallel matrix is partitioned across the processors by rows, where
1133:    each local rectangular matrix is stored in the uniprocessor block 
1134:    diagonal format.  See the users manual for further details.

1136:    The user MUST specify either the local or global numbers of rows
1137:    (possibly both).

1139:    The case bs=1 (conventional diagonal storage) is implemented as
1140:    a special case.

1142:    Fortran Notes:
1143:    Fortran programmers cannot set diagv; this variable is ignored.

1145:    Level: intermediate

1147: .keywords: matrix, block, diagonal, parallel, sparse

1149: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1150: @*/
1151: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,const int diag[],PetscScalar *diagv[],Mat *A)
1152: {
1153:   int ierr,size;

1156:   MatCreate(comm,m,m,M,N,A);
1157:   MPI_Comm_size(comm,&size);
1158:   if (size > 1) {
1159:     MatSetType(*A,MATMPIBDIAG);
1160:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1161:   } else {
1162:     MatSetType(*A,MATSEQBDIAG);
1163:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1164:   }
1165:   return(0);
1166: }

1170: /*@C
1171:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1172:    For the parallel case, this returns information for the local submatrix.

1174:    Input Parameters:
1175: .  mat - the matrix, stored in block diagonal format.

1177:    Not Collective

1179:    Output Parameters:
1180: +  m - number of rows
1181: .  n - number of columns
1182: .  nd - number of block diagonals
1183: .  bs - each element of a diagonal is an bs x bs dense matrix
1184: .  bdlen - array of total block lengths of block diagonals
1185: .  diag - optional array of block diagonal numbers (length nd).
1186:    For a matrix element A[i,j], where i=row and j=column, the
1187:    diagonal number is
1188: $     diag = i/bs - j/bs  (integer division)
1189:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1190:    needed (expensive).
1191: -  diagv - pointer to actual diagonals (in same order as diag array), 

1193:    Level: advanced

1195:    Notes:
1196:    See the users manual for further details regarding this storage format.

1198: .keywords: matrix, block, diagonal, get, data

1200: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1201: @*/
1202: int MatBDiagGetData(Mat mat,int *nd,int *bs,int *diag[],int *bdlen[],PetscScalar ***diagv)
1203: {
1204:   Mat_MPIBDiag *pdmat;
1205:   Mat_SeqBDiag *dmat = 0;
1206:   PetscTruth   isseq,ismpi;
1207:   int          ierr;

1211:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1212:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1213:   if (isseq) {
1214:     dmat = (Mat_SeqBDiag*)mat->data;
1215:   } else if (ismpi) {
1216:     pdmat = (Mat_MPIBDiag*)mat->data;
1217:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1218:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1219:   *nd    = dmat->nd;
1220:   *bs    = dmat->bs;
1221:   *diag  = dmat->diag;
1222:   *bdlen = dmat->bdlen;
1223:   *diagv = dmat->diagv;
1224:   return(0);
1225: }

1227:  #include petscsys.h

1231: int MatLoad_MPIBDiag(PetscViewer viewer,const MatType type,Mat *newmat)
1232: {
1233:   Mat          A;
1234:   PetscScalar  *vals,*svals;
1235:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1236:   MPI_Status   status;
1237:   int          bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1238:   int          header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1239:   int          *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1240:   int          tag = ((PetscObject)viewer)->tag,extra_rows;

1243:   MPI_Comm_size(comm,&size);
1244:   MPI_Comm_rank(comm,&rank);
1245:   if (!rank) {
1246:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1247:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1248:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1249:     if (header[3] < 0) {
1250:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1251:     }
1252:   }
1253:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1254:   M = header[1]; N = header[2];

1256:   bs = 1;   /* uses a block size of 1 by default; */
1257:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1259:   /* 
1260:      This code adds extra rows to make sure the number of rows is 
1261:      divisible by the blocksize
1262:   */
1263:   Mbs        = M/bs;
1264:   extra_rows = bs - M + bs*(Mbs);
1265:   if (extra_rows == bs) extra_rows = 0;
1266:   else                  Mbs++;
1267:   if (extra_rows && !rank) {
1268:     PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n");
1269:   }

1271:   /* determine ownership of all rows */
1272:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1273:   PetscMalloc((size+2)*sizeof(int),&rowners);
1274:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1275:   rowners[0] = 0;
1276:   for (i=2; i<=size; i++) {
1277:     rowners[i] += rowners[i-1];
1278:   }
1279:   rstart = rowners[rank];
1280:   rend   = rowners[rank+1];

1282:   /* distribute row lengths to all processors */
1283:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1284:   if (!rank) {
1285:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1286:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1287:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1288:     PetscMalloc(size*sizeof(int),&sndcounts);
1289:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1290:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1291:     PetscFree(sndcounts);
1292:   } else {
1293:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1294:   }

1296:   if (!rank) {
1297:     /* calculate the number of nonzeros on each processor */
1298:     PetscMalloc(size*sizeof(int),&procsnz);
1299:     PetscMemzero(procsnz,size*sizeof(int));
1300:     for (i=0; i<size; i++) {
1301:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1302:         procsnz[i] += rowlengths[j];
1303:       }
1304:     }
1305:     PetscFree(rowlengths);

1307:     /* determine max buffer needed and allocate it */
1308:     maxnz = 0;
1309:     for (i=0; i<size; i++) {
1310:       maxnz = PetscMax(maxnz,procsnz[i]);
1311:     }
1312:     PetscMalloc(maxnz*sizeof(int),&cols);

1314:     /* read in my part of the matrix column indices  */
1315:     nz   = procsnz[0];
1316:     PetscMalloc(nz*sizeof(int),&mycols);
1317:     if (size == 1)  nz -= extra_rows;
1318:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1319:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1321:     /* read in every one elses and ship off */
1322:     for (i=1; i<size-1; i++) {
1323:       nz   = procsnz[i];
1324:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1325:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1326:     }
1327:     /* read in the stuff for the last proc */
1328:     if (size != 1) {
1329:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1330:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1331:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1332:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1333:     }
1334:     PetscFree(cols);
1335:   } else {
1336:     /* determine buffer space needed for message */
1337:     nz = 0;
1338:     for (i=0; i<m; i++) {
1339:       nz += ourlens[i];
1340:     }
1341:     PetscMalloc(nz*sizeof(int),&mycols);

1343:     /* receive message of column indices*/
1344:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1345:     MPI_Get_count(&status,MPI_INT,&maxnz);
1346:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1347:   }

1349:   MatCreate(comm,m,m,M+extra_rows,N+extra_rows,newmat);
1350:   MatSetType(*newmat,type);
1351:   MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1352:   A = *newmat;

1354:   if (!rank) {
1355:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1357:     /* read in my part of the matrix numerical values  */
1358:     nz = procsnz[0];
1359:     if (size == 1)  nz -= extra_rows;
1360:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1361:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1363:     /* insert into matrix */
1364:     jj      = rstart;
1365:     smycols = mycols;
1366:     svals   = vals;
1367:     for (i=0; i<m; i++) {
1368:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1369:       smycols += ourlens[i];
1370:       svals   += ourlens[i];
1371:       jj++;
1372:     }

1374:     /* read in other processors (except the last one) and ship out */
1375:     for (i=1; i<size-1; i++) {
1376:       nz   = procsnz[i];
1377:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1378:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1379:     }
1380:     /* the last proc */
1381:     if (size != 1){
1382:       nz   = procsnz[i] - extra_rows;
1383:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1384:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1385:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1386:     }
1387:     PetscFree(procsnz);
1388:   } else {
1389:     /* receive numeric values */
1390:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1392:     /* receive message of values*/
1393:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1394:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1395:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1397:     /* insert into matrix */
1398:     jj      = rstart;
1399:     smycols = mycols;
1400:     svals   = vals;
1401:     for (i=0; i<m; i++) {
1402:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1403:       smycols += ourlens[i];
1404:       svals   += ourlens[i];
1405:       jj++;
1406:     }
1407:   }
1408:   PetscFree(ourlens);
1409:   PetscFree(vals);
1410:   PetscFree(mycols);
1411:   PetscFree(rowners);

1413:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1414:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1415:   return(0);
1416: }