Actual source code: mpidense.c

  1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h

  9: EXTERN_C_BEGIN
 12: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 13: {
 14:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 15:   int          m = A->m,rstart = mdn->rstart,ierr;
 16:   PetscScalar  *array;
 17:   MPI_Comm     comm;

 20:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 22:   /* The reuse aspect is not implemented efficiently */
 23:   if (reuse) { MatDestroy(*B);}

 25:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 26:   MatGetArray(mdn->A,&array);
 27:   MatCreate(comm,m,m,m,m,B);
 28:   MatSetType(*B,mdn->A->type_name);
 29:   MatSeqDenseSetPreallocation(*B,array+m*rstart);
 30:   MatRestoreArray(mdn->A,&array);
 31:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 32:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 33: 
 34:   *iscopy = PETSC_TRUE;
 35:   return(0);
 36: }
 37: EXTERN_C_END

 41: int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
 42: {
 43:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 44:   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
 45:   PetscTruth   roworiented = A->roworiented;

 48:   for (i=0; i<m; i++) {
 49:     if (idxm[i] < 0) continue;
 50:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 51:     if (idxm[i] >= rstart && idxm[i] < rend) {
 52:       row = idxm[i] - rstart;
 53:       if (roworiented) {
 54:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 55:       } else {
 56:         for (j=0; j<n; j++) {
 57:           if (idxn[j] < 0) continue;
 58:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 59:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 60:         }
 61:       }
 62:     } else {
 63:       if (!A->donotstash) {
 64:         if (roworiented) {
 65:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 66:         } else {
 67:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 68:         }
 69:       }
 70:     }
 71:   }
 72:   return(0);
 73: }

 77: int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
 78: {
 79:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
 80:   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;

 83:   for (i=0; i<m; i++) {
 84:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 85:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 86:     if (idxm[i] >= rstart && idxm[i] < rend) {
 87:       row = idxm[i] - rstart;
 88:       for (j=0; j<n; j++) {
 89:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 90:         if (idxn[j] >= mat->N) {
 91:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 92:         }
 93:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
 94:       }
 95:     } else {
 96:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 97:     }
 98:   }
 99:   return(0);
100: }

104: int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
105: {
106:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
107:   int          ierr;

110:   MatGetArray(a->A,array);
111:   return(0);
112: }

116: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
117: {
118:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
119:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
120:   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
121:   PetscScalar  *av,*bv,*v = lmat->v;
122:   Mat          newmat;

125:   ISGetIndices(isrow,&irow);
126:   ISGetIndices(iscol,&icol);
127:   ISGetLocalSize(isrow,&nrows);
128:   ISGetLocalSize(iscol,&ncols);

130:   /* No parallel redistribution currently supported! Should really check each index set
131:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
132:      original matrix! */

134:   MatGetLocalSize(A,&nlrows,&nlcols);
135:   MatGetOwnershipRange(A,&rstart,&rend);
136: 
137:   /* Check submatrix call */
138:   if (scall == MAT_REUSE_MATRIX) {
139:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
140:     /* Really need to test rows and column sizes! */
141:     newmat = *B;
142:   } else {
143:     /* Create and fill new matrix */
144:     MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);
145:     MatSetType(newmat,A->type_name);
146:     MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
147:   }

149:   /* Now extract the data pointers and do the copy, column at a time */
150:   newmatd = (Mat_MPIDense*)newmat->data;
151:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
152: 
153:   for (i=0; i<ncols; i++) {
154:     av = v + nlrows*icol[i];
155:     for (j=0; j<nrows; j++) {
156:       *bv++ = av[irow[j] - rstart];
157:     }
158:   }

160:   /* Assemble the matrices so that the correct flags are set */
161:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
162:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

164:   /* Free work space */
165:   ISRestoreIndices(isrow,&irow);
166:   ISRestoreIndices(iscol,&icol);
167:   *B = newmat;
168:   return(0);
169: }

173: int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
174: {
176:   return(0);
177: }

181: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
182: {
183:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
184:   MPI_Comm     comm = mat->comm;
185:   int          ierr,nstash,reallocs;
186:   InsertMode   addv;

189:   /* make sure all processors are either in INSERTMODE or ADDMODE */
190:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
191:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
192:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
193:   }
194:   mat->insertmode = addv; /* in case this processor had no cache */

196:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
197:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
198:   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
199:   return(0);
200: }

204: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
205: {
206:   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
207:   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
208:   PetscScalar  *val;
209:   InsertMode   addv=mat->insertmode;

212:   /*  wait on receives */
213:   while (1) {
214:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
215:     if (!flg) break;
216: 
217:     for (i=0; i<n;) {
218:       /* Now identify the consecutive vals belonging to the same row */
219:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
220:       if (j < n) ncols = j-i;
221:       else       ncols = n-i;
222:       /* Now assemble all these values with a single function call */
223:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
224:       i = j;
225:     }
226:   }
227:   MatStashScatterEnd_Private(&mat->stash);
228: 
229:   MatAssemblyBegin(mdn->A,mode);
230:   MatAssemblyEnd(mdn->A,mode);

232:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
233:     MatSetUpMultiply_MPIDense(mat);
234:   }
235:   return(0);
236: }

240: int MatZeroEntries_MPIDense(Mat A)
241: {
242:   int          ierr;
243:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

246:   MatZeroEntries(l->A);
247:   return(0);
248: }

252: int MatGetBlockSize_MPIDense(Mat A,int *bs)
253: {
255:   *bs = 1;
256:   return(0);
257: }

259: /* the code does not do the diagonal entries correctly unless the 
260:    matrix is square and the column and row owerships are identical.
261:    This is a BUG. The only way to fix it seems to be to access 
262:    mdn->A and mdn->B directly and not through the MatZeroRows() 
263:    routine. 
264: */
267: int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
268: {
269:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
270:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
271:   int            *nprocs,j,idx,nsends;
272:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
273:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
274:   int            *lens,imdex,*lrows,*values;
275:   MPI_Comm       comm = A->comm;
276:   MPI_Request    *send_waits,*recv_waits;
277:   MPI_Status     recv_status,*send_status;
278:   IS             istmp;
279:   PetscTruth     found;

282:   ISGetLocalSize(is,&N);
283:   ISGetIndices(is,&rows);

285:   /*  first count number of contributors to each processor */
286:   PetscMalloc(2*size*sizeof(int),&nprocs);
287:   PetscMemzero(nprocs,2*size*sizeof(int));
288:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
289:   for (i=0; i<N; i++) {
290:     idx = rows[i];
291:     found = PETSC_FALSE;
292:     for (j=0; j<size; j++) {
293:       if (idx >= owners[j] && idx < owners[j+1]) {
294:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
295:       }
296:     }
297:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298:   }
299:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

301:   /* inform other processors of number of messages and max length*/
302:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

304:   /* post receives:   */
305:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
306:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
307:   for (i=0; i<nrecvs; i++) {
308:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
309:   }

311:   /* do sends:
312:       1) starts[i] gives the starting index in svalues for stuff going to 
313:          the ith processor
314:   */
315:   PetscMalloc((N+1)*sizeof(int),&svalues);
316:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
317:   PetscMalloc((size+1)*sizeof(int),&starts);
318:   starts[0]  = 0;
319:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
320:   for (i=0; i<N; i++) {
321:     svalues[starts[owner[i]]++] = rows[i];
322:   }
323:   ISRestoreIndices(is,&rows);

325:   starts[0] = 0;
326:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
327:   count = 0;
328:   for (i=0; i<size; i++) {
329:     if (nprocs[2*i+1]) {
330:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
331:     }
332:   }
333:   PetscFree(starts);

335:   base = owners[rank];

337:   /*  wait on receives */
338:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
339:   source = lens + nrecvs;
340:   count  = nrecvs; slen = 0;
341:   while (count) {
342:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
343:     /* unpack receives into our local space */
344:     MPI_Get_count(&recv_status,MPI_INT,&n);
345:     source[imdex]  = recv_status.MPI_SOURCE;
346:     lens[imdex]  = n;
347:     slen += n;
348:     count--;
349:   }
350:   PetscFree(recv_waits);
351: 
352:   /* move the data into the send scatter */
353:   PetscMalloc((slen+1)*sizeof(int),&lrows);
354:   count = 0;
355:   for (i=0; i<nrecvs; i++) {
356:     values = rvalues + i*nmax;
357:     for (j=0; j<lens[i]; j++) {
358:       lrows[count++] = values[j] - base;
359:     }
360:   }
361:   PetscFree(rvalues);
362:   PetscFree(lens);
363:   PetscFree(owner);
364:   PetscFree(nprocs);
365: 
366:   /* actually zap the local rows */
367:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
368:   PetscLogObjectParent(A,istmp);
369:   PetscFree(lrows);
370:   MatZeroRows(l->A,istmp,diag);
371:   ISDestroy(istmp);

373:   /* wait on sends */
374:   if (nsends) {
375:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
376:     MPI_Waitall(nsends,send_waits,send_status);
377:     PetscFree(send_status);
378:   }
379:   PetscFree(send_waits);
380:   PetscFree(svalues);

382:   return(0);
383: }

387: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
388: {
389:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
390:   int          ierr;

393:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
394:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
395:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
396:   return(0);
397: }

401: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
402: {
403:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
404:   int          ierr;

407:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
408:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
409:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
410:   return(0);
411: }

415: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
416: {
417:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
418:   int          ierr;
419:   PetscScalar  zero = 0.0;

422:   VecSet(&zero,yy);
423:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
424:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
425:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
426:   return(0);
427: }

431: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
432: {
433:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
434:   int          ierr;

437:   VecCopy(yy,zz);
438:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
439:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
440:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
441:   return(0);
442: }

446: int MatGetDiagonal_MPIDense(Mat A,Vec v)
447: {
448:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
449:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
450:   int          ierr,len,i,n,m = A->m,radd;
451:   PetscScalar  *x,zero = 0.0;
452: 
454:   VecSet(&zero,v);
455:   VecGetArray(v,&x);
456:   VecGetSize(v,&n);
457:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
458:   len  = PetscMin(a->A->m,a->A->n);
459:   radd = a->rstart*m;
460:   for (i=0; i<len; i++) {
461:     x[i] = aloc->v[radd + i*m + i];
462:   }
463:   VecRestoreArray(v,&x);
464:   return(0);
465: }

469: int MatDestroy_MPIDense(Mat mat)
470: {
471:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
472:   int          ierr;


476: #if defined(PETSC_USE_LOG)
477:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
478: #endif
479:   MatStashDestroy_Private(&mat->stash);
480:   PetscFree(mdn->rowners);
481:   MatDestroy(mdn->A);
482:   if (mdn->lvec)   VecDestroy(mdn->lvec);
483:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
484:   if (mdn->factor) {
485:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
486:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
487:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
488:     PetscFree(mdn->factor);
489:   }
490:   PetscFree(mdn);
491:   return(0);
492: }

496: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
497: {
498:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
499:   int          ierr;

502:   if (mdn->size == 1) {
503:     MatView(mdn->A,viewer);
504:   }
505:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
506:   return(0);
507: }

511: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
512: {
513:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
514:   int               ierr,size = mdn->size,rank = mdn->rank;
515:   PetscViewerType   vtype;
516:   PetscTruth        isascii,isdraw;
517:   PetscViewer       sviewer;
518:   PetscViewerFormat format;

521:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
522:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
523:   if (isascii) {
524:     PetscViewerGetType(viewer,&vtype);
525:     PetscViewerGetFormat(viewer,&format);
526:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
527:       MatInfo info;
528:       MatGetInfo(mat,MAT_LOCAL,&info);
529:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
530:                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
531:       PetscViewerFlush(viewer);
532:       VecScatterView(mdn->Mvctx,viewer);
533:       return(0);
534:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
535:       return(0);
536:     }
537:   } else if (isdraw) {
538:     PetscDraw       draw;
539:     PetscTruth isnull;

541:     PetscViewerDrawGetDraw(viewer,0,&draw);
542:     PetscDrawIsNull(draw,&isnull);
543:     if (isnull) return(0);
544:   }

546:   if (size == 1) {
547:     MatView(mdn->A,viewer);
548:   } else {
549:     /* assemble the entire matrix onto first processor. */
550:     Mat          A;
551:     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
552:     PetscScalar  *vals;

554:     if (!rank) {
555:       MatCreate(mat->comm,M,N,M,N,&A);
556:     } else {
557:       MatCreate(mat->comm,0,0,M,N,&A);
558:     }
559:     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
560:     MatSetType(A,MATMPIDENSE);
561:     MatMPIDenseSetPreallocation(A,PETSC_NULL);
562:     PetscLogObjectParent(mat,A);

564:     /* Copy the matrix ... This isn't the most efficient means,
565:        but it's quick for now */
566:     row = mdn->rstart; m = mdn->A->m;
567:     for (i=0; i<m; i++) {
568:       MatGetRow(mat,row,&nz,&cols,&vals);
569:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
570:       MatRestoreRow(mat,row,&nz,&cols,&vals);
571:       row++;
572:     }

574:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
575:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
576:     PetscViewerGetSingleton(viewer,&sviewer);
577:     if (!rank) {
578:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
579:     }
580:     PetscViewerRestoreSingleton(viewer,&sviewer);
581:     PetscViewerFlush(viewer);
582:     MatDestroy(A);
583:   }
584:   return(0);
585: }

589: int MatView_MPIDense(Mat mat,PetscViewer viewer)
590: {
591:   int        ierr;
592:   PetscTruth isascii,isbinary,isdraw,issocket;
593: 
595: 
596:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
597:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
598:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
599:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

601:   if (isascii || issocket || isdraw) {
602:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
603:   } else if (isbinary) {
604:     MatView_MPIDense_Binary(mat,viewer);
605:   } else {
606:     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
607:   }
608:   return(0);
609: }

613: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
614: {
615:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
616:   Mat          mdn = mat->A;
617:   int          ierr;
618:   PetscReal    isend[5],irecv[5];

621:   info->rows_global    = (double)A->M;
622:   info->columns_global = (double)A->N;
623:   info->rows_local     = (double)A->m;
624:   info->columns_local  = (double)A->N;
625:   info->block_size     = 1.0;
626:   MatGetInfo(mdn,MAT_LOCAL,info);
627:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
628:   isend[3] = info->memory;  isend[4] = info->mallocs;
629:   if (flag == MAT_LOCAL) {
630:     info->nz_used      = isend[0];
631:     info->nz_allocated = isend[1];
632:     info->nz_unneeded  = isend[2];
633:     info->memory       = isend[3];
634:     info->mallocs      = isend[4];
635:   } else if (flag == MAT_GLOBAL_MAX) {
636:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
637:     info->nz_used      = irecv[0];
638:     info->nz_allocated = irecv[1];
639:     info->nz_unneeded  = irecv[2];
640:     info->memory       = irecv[3];
641:     info->mallocs      = irecv[4];
642:   } else if (flag == MAT_GLOBAL_SUM) {
643:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
644:     info->nz_used      = irecv[0];
645:     info->nz_allocated = irecv[1];
646:     info->nz_unneeded  = irecv[2];
647:     info->memory       = irecv[3];
648:     info->mallocs      = irecv[4];
649:   }
650:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
651:   info->fill_ratio_needed = 0;
652:   info->factor_mallocs    = 0;
653:   return(0);
654: }

658: int MatSetOption_MPIDense(Mat A,MatOption op)
659: {
660:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
661:   int          ierr;

664:   switch (op) {
665:   case MAT_NO_NEW_NONZERO_LOCATIONS:
666:   case MAT_YES_NEW_NONZERO_LOCATIONS:
667:   case MAT_NEW_NONZERO_LOCATION_ERR:
668:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
669:   case MAT_COLUMNS_SORTED:
670:   case MAT_COLUMNS_UNSORTED:
671:     MatSetOption(a->A,op);
672:     break;
673:   case MAT_ROW_ORIENTED:
674:     a->roworiented = PETSC_TRUE;
675:     MatSetOption(a->A,op);
676:     break;
677:   case MAT_ROWS_SORTED:
678:   case MAT_ROWS_UNSORTED:
679:   case MAT_YES_NEW_DIAGONALS:
680:   case MAT_USE_HASH_TABLE:
681:     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
682:     break;
683:   case MAT_COLUMN_ORIENTED:
684:     a->roworiented = PETSC_FALSE;
685:     MatSetOption(a->A,op);
686:     break;
687:   case MAT_IGNORE_OFF_PROC_ENTRIES:
688:     a->donotstash = PETSC_TRUE;
689:     break;
690:   case MAT_NO_NEW_DIAGONALS:
691:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
692:   case MAT_SYMMETRIC:
693:   case MAT_STRUCTURALLY_SYMMETRIC:
694:   case MAT_NOT_SYMMETRIC:
695:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
696:   case MAT_HERMITIAN:
697:   case MAT_NOT_HERMITIAN:
698:   case MAT_SYMMETRY_ETERNAL:
699:   case MAT_NOT_SYMMETRY_ETERNAL:
700:     break;
701:   default:
702:     SETERRQ(PETSC_ERR_SUP,"unknown option");
703:   }
704:   return(0);
705: }

709: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
710: {
711:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
712:   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;

715:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
716:   lrow = row - rstart;
717:   MatGetRow(mat->A,lrow,nz,idx,v);
718:   return(0);
719: }

723: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
724: {

728:   if (idx) {PetscFree(*idx);}
729:   if (v) {PetscFree(*v);}
730:   return(0);
731: }

735: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
736: {
737:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
738:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
739:   PetscScalar  *l,*r,x,*v;
740:   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

743:   MatGetLocalSize(A,&s2,&s3);
744:   if (ll) {
745:     VecGetLocalSize(ll,&s2a);
746:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
747:     VecGetArray(ll,&l);
748:     for (i=0; i<m; i++) {
749:       x = l[i];
750:       v = mat->v + i;
751:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
752:     }
753:     VecRestoreArray(ll,&l);
754:     PetscLogFlops(n*m);
755:   }
756:   if (rr) {
757:     VecGetSize(rr,&s3a);
758:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
759:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
760:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
761:     VecGetArray(mdn->lvec,&r);
762:     for (i=0; i<n; i++) {
763:       x = r[i];
764:       v = mat->v + i*m;
765:       for (j=0; j<m; j++) { (*v++) *= x;}
766:     }
767:     VecRestoreArray(mdn->lvec,&r);
768:     PetscLogFlops(n*m);
769:   }
770:   return(0);
771: }

775: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
776: {
777:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
778:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
779:   int          ierr,i,j;
780:   PetscReal    sum = 0.0;
781:   PetscScalar  *v = mat->v;

784:   if (mdn->size == 1) {
785:      MatNorm(mdn->A,type,nrm);
786:   } else {
787:     if (type == NORM_FROBENIUS) {
788:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
789: #if defined(PETSC_USE_COMPLEX)
790:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
791: #else
792:         sum += (*v)*(*v); v++;
793: #endif
794:       }
795:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
796:       *nrm = sqrt(*nrm);
797:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
798:     } else if (type == NORM_1) {
799:       PetscReal *tmp,*tmp2;
800:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
801:       tmp2 = tmp + A->N;
802:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
803:       *nrm = 0.0;
804:       v = mat->v;
805:       for (j=0; j<mdn->A->n; j++) {
806:         for (i=0; i<mdn->A->m; i++) {
807:           tmp[j] += PetscAbsScalar(*v);  v++;
808:         }
809:       }
810:       MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
811:       for (j=0; j<A->N; j++) {
812:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
813:       }
814:       PetscFree(tmp);
815:       PetscLogFlops(A->n*A->m);
816:     } else if (type == NORM_INFINITY) { /* max row norm */
817:       PetscReal ntemp;
818:       MatNorm(mdn->A,type,&ntemp);
819:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
820:     } else {
821:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
822:     }
823:   }
824:   return(0);
825: }

829: int MatTranspose_MPIDense(Mat A,Mat *matout)
830: {
831:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
832:   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
833:   Mat          B;
834:   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
835:   int          j,i,ierr;
836:   PetscScalar  *v;

839:   if (!matout && M != N) {
840:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
841:   }
842:   MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);
843:   MatSetType(B,A->type_name);
844:   MatMPIDenseSetPreallocation(B,PETSC_NULL);

846:   m = a->A->m; n = a->A->n; v = Aloc->v;
847:   PetscMalloc(n*sizeof(int),&rwork);
848:   for (j=0; j<n; j++) {
849:     for (i=0; i<m; i++) rwork[i] = rstart + i;
850:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
851:     v   += m;
852:   }
853:   PetscFree(rwork);
854:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
855:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
856:   if (matout) {
857:     *matout = B;
858:   } else {
859:     MatHeaderCopy(A,B);
860:   }
861:   return(0);
862: }

864:  #include petscblaslapack.h
867: int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
868: {
869:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
870:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
871:   int          one = 1,nz;

874:   nz = inA->m*inA->N;
875:   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
876:   PetscLogFlops(nz);
877:   return(0);
878: }

880: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

884: int MatSetUpPreallocation_MPIDense(Mat A)
885: {
886:   int        ierr;

889:    MatMPIDenseSetPreallocation(A,0);
890:   return(0);
891: }

893: /* -------------------------------------------------------------------*/
894: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
895:        MatGetRow_MPIDense,
896:        MatRestoreRow_MPIDense,
897:        MatMult_MPIDense,
898: /* 4*/ MatMultAdd_MPIDense,
899:        MatMultTranspose_MPIDense,
900:        MatMultTransposeAdd_MPIDense,
901:        0,
902:        0,
903:        0,
904: /*10*/ 0,
905:        0,
906:        0,
907:        0,
908:        MatTranspose_MPIDense,
909: /*15*/ MatGetInfo_MPIDense,
910:        0,
911:        MatGetDiagonal_MPIDense,
912:        MatDiagonalScale_MPIDense,
913:        MatNorm_MPIDense,
914: /*20*/ MatAssemblyBegin_MPIDense,
915:        MatAssemblyEnd_MPIDense,
916:        0,
917:        MatSetOption_MPIDense,
918:        MatZeroEntries_MPIDense,
919: /*25*/ MatZeroRows_MPIDense,
920:        0,
921:        0,
922:        0,
923:        0,
924: /*30*/ MatSetUpPreallocation_MPIDense,
925:        0,
926:        0,
927:        MatGetArray_MPIDense,
928:        MatRestoreArray_MPIDense,
929: /*35*/ MatDuplicate_MPIDense,
930:        0,
931:        0,
932:        0,
933:        0,
934: /*40*/ 0,
935:        MatGetSubMatrices_MPIDense,
936:        0,
937:        MatGetValues_MPIDense,
938:        0,
939: /*45*/ 0,
940:        MatScale_MPIDense,
941:        0,
942:        0,
943:        0,
944: /*50*/ MatGetBlockSize_MPIDense,
945:        0,
946:        0,
947:        0,
948:        0,
949: /*55*/ 0,
950:        0,
951:        0,
952:        0,
953:        0,
954: /*60*/ MatGetSubMatrix_MPIDense,
955:        MatDestroy_MPIDense,
956:        MatView_MPIDense,
957:        MatGetPetscMaps_Petsc,
958:        0,
959: /*65*/ 0,
960:        0,
961:        0,
962:        0,
963:        0,
964: /*70*/ 0,
965:        0,
966:        0,
967:        0,
968:        0,
969: /*75*/ 0,
970:        0,
971:        0,
972:        0,
973:        0,
974: /*80*/ 0,
975:        0,
976:        0,
977:        0,
978: /*85*/ MatLoad_MPIDense};

980: EXTERN_C_BEGIN
983: int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
984: {
985:   Mat_MPIDense *a;
986:   int          ierr;

989:   mat->preallocated = PETSC_TRUE;
990:   /* Note:  For now, when data is specified above, this assumes the user correctly
991:    allocates the local dense storage space.  We should add error checking. */

993:   a    = (Mat_MPIDense*)mat->data;
994:   MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);
995:   MatSetType(a->A,MATSEQDENSE);
996:   MatSeqDenseSetPreallocation(a->A,data);
997:   PetscLogObjectParent(mat,a->A);
998:   return(0);
999: }
1000: EXTERN_C_END

1002: /*MC
1003:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1005:    Options Database Keys:
1006: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()

1008:   Level: beginner

1010: .seealso: MatCreateMPIDense
1011: M*/

1013: EXTERN_C_BEGIN
1016: int MatCreate_MPIDense(Mat mat)
1017: {
1018:   Mat_MPIDense *a;
1019:   int          ierr,i;

1022:   PetscNew(Mat_MPIDense,&a);
1023:   mat->data         = (void*)a;
1024:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1025:   mat->factor       = 0;
1026:   mat->mapping      = 0;

1028:   a->factor       = 0;
1029:   mat->insertmode = NOT_SET_VALUES;
1030:   MPI_Comm_rank(mat->comm,&a->rank);
1031:   MPI_Comm_size(mat->comm,&a->size);

1033:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1034:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1035:   a->nvec = mat->n;

1037:   /* the information in the maps duplicates the information computed below, eventually 
1038:      we should remove the duplicate information that is not contained in the maps */
1039:   PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1040:   PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

1042:   /* build local table of row and column ownerships */
1043:   PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
1044:   a->cowners = a->rowners + a->size + 1;
1045:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1046:   MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
1047:   a->rowners[0] = 0;
1048:   for (i=2; i<=a->size; i++) {
1049:     a->rowners[i] += a->rowners[i-1];
1050:   }
1051:   a->rstart = a->rowners[a->rank];
1052:   a->rend   = a->rowners[a->rank+1];
1053:   MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
1054:   a->cowners[0] = 0;
1055:   for (i=2; i<=a->size; i++) {
1056:     a->cowners[i] += a->cowners[i-1];
1057:   }

1059:   /* build cache for off array entries formed */
1060:   a->donotstash = PETSC_FALSE;
1061:   MatStashCreate_Private(mat->comm,1,&mat->stash);

1063:   /* stuff used for matrix vector multiply */
1064:   a->lvec        = 0;
1065:   a->Mvctx       = 0;
1066:   a->roworiented = PETSC_TRUE;

1068:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1069:                                      "MatGetDiagonalBlock_MPIDense",
1070:                                      MatGetDiagonalBlock_MPIDense);
1071:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1072:                                      "MatMPIDenseSetPreallocation_MPIDense",
1073:                                      MatMPIDenseSetPreallocation_MPIDense);
1074:   return(0);
1075: }
1076: EXTERN_C_END

1078: /*MC
1079:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1081:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1082:    and MATMPIDENSE otherwise.

1084:    Options Database Keys:
1085: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1087:   Level: beginner

1089: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1090: M*/

1092: EXTERN_C_BEGIN
1095: int MatCreate_Dense(Mat A) {
1096:   int ierr,size;

1099:   PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1100:   MPI_Comm_size(A->comm,&size);
1101:   if (size == 1) {
1102:     MatSetType(A,MATSEQDENSE);
1103:   } else {
1104:     MatSetType(A,MATMPIDENSE);
1105:   }
1106:   return(0);
1107: }
1108: EXTERN_C_END

1112: /*@C
1113:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1115:    Not collective

1117:    Input Parameters:
1118: .  A - the matrix
1119: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1120:    to control all matrix memory allocation.

1122:    Notes:
1123:    The dense format is fully compatible with standard Fortran 77
1124:    storage by columns.

1126:    The data input variable is intended primarily for Fortran programmers
1127:    who wish to allocate their own matrix memory space.  Most users should
1128:    set data=PETSC_NULL.

1130:    Level: intermediate

1132: .keywords: matrix,dense, parallel

1134: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1135: @*/
1136: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1137: {
1138:   int ierr,(*f)(Mat,PetscScalar *);

1141:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1142:   if (f) {
1143:     (*f)(mat,data);
1144:   }
1145:   return(0);
1146: }

1150: /*@C
1151:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1153:    Collective on MPI_Comm

1155:    Input Parameters:
1156: +  comm - MPI communicator
1157: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1158: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1159: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1160: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1161: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1162:    to control all matrix memory allocation.

1164:    Output Parameter:
1165: .  A - the matrix

1167:    Notes:
1168:    The dense format is fully compatible with standard Fortran 77
1169:    storage by columns.

1171:    The data input variable is intended primarily for Fortran programmers
1172:    who wish to allocate their own matrix memory space.  Most users should
1173:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1175:    The user MUST specify either the local or global matrix dimensions
1176:    (possibly both).

1178:    Level: intermediate

1180: .keywords: matrix,dense, parallel

1182: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1183: @*/
1184: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1185: {
1186:   int ierr,size;

1189:   MatCreate(comm,m,n,M,N,A);
1190:   MPI_Comm_size(comm,&size);
1191:   if (size > 1) {
1192:     MatSetType(*A,MATMPIDENSE);
1193:     MatMPIDenseSetPreallocation(*A,data);
1194:   } else {
1195:     MatSetType(*A,MATSEQDENSE);
1196:     MatSeqDenseSetPreallocation(*A,data);
1197:   }
1198:   return(0);
1199: }

1203: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1204: {
1205:   Mat          mat;
1206:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1207:   int          ierr;

1210:   *newmat       = 0;
1211:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1212:   MatSetType(mat,A->type_name);
1213:   PetscNew(Mat_MPIDense,&a);
1214:   mat->data         = (void*)a;
1215:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1216:   mat->factor       = A->factor;
1217:   mat->assembled    = PETSC_TRUE;
1218:   mat->preallocated = PETSC_TRUE;

1220:   a->rstart       = oldmat->rstart;
1221:   a->rend         = oldmat->rend;
1222:   a->size         = oldmat->size;
1223:   a->rank         = oldmat->rank;
1224:   mat->insertmode = NOT_SET_VALUES;
1225:   a->nvec         = oldmat->nvec;
1226:   a->donotstash   = oldmat->donotstash;
1227:   PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1228:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1229:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1230:   MatStashCreate_Private(A->comm,1,&mat->stash);

1232:   MatSetUpMultiply_MPIDense(mat);
1233:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1234:   PetscLogObjectParent(mat,a->A);
1235:   *newmat = mat;
1236:   return(0);
1237: }

1239:  #include petscsys.h

1243: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1244: {
1245:   int          *rowners,i,size,rank,m,ierr,nz,j;
1246:   PetscScalar  *array,*vals,*vals_ptr;
1247:   MPI_Status   status;

1250:   MPI_Comm_rank(comm,&rank);
1251:   MPI_Comm_size(comm,&size);

1253:   /* determine ownership of all rows */
1254:   m          = M/size + ((M % size) > rank);
1255:   PetscMalloc((size+2)*sizeof(int),&rowners);
1256:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1257:   rowners[0] = 0;
1258:   for (i=2; i<=size; i++) {
1259:     rowners[i] += rowners[i-1];
1260:   }

1262:   MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1263:   MatSetType(*newmat,type);
1264:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1265:   MatGetArray(*newmat,&array);

1267:   if (!rank) {
1268:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1270:     /* read in my part of the matrix numerical values  */
1271:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1272: 
1273:     /* insert into matrix-by row (this is why cannot directly read into array */
1274:     vals_ptr = vals;
1275:     for (i=0; i<m; i++) {
1276:       for (j=0; j<N; j++) {
1277:         array[i + j*m] = *vals_ptr++;
1278:       }
1279:     }

1281:     /* read in other processors and ship out */
1282:     for (i=1; i<size; i++) {
1283:       nz   = (rowners[i+1] - rowners[i])*N;
1284:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1285:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1286:     }
1287:   } else {
1288:     /* receive numeric values */
1289:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1291:     /* receive message of values*/
1292:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1294:     /* insert into matrix-by row (this is why cannot directly read into array */
1295:     vals_ptr = vals;
1296:     for (i=0; i<m; i++) {
1297:       for (j=0; j<N; j++) {
1298:         array[i + j*m] = *vals_ptr++;
1299:       }
1300:     }
1301:   }
1302:   PetscFree(rowners);
1303:   PetscFree(vals);
1304:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1305:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1306:   return(0);
1307: }

1311: int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1312: {
1313:   Mat          A;
1314:   PetscScalar  *vals,*svals;
1315:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1316:   MPI_Status   status;
1317:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1318:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1319:   int          tag = ((PetscObject)viewer)->tag;
1320:   int          i,nz,ierr,j,rstart,rend,fd;

1323:   MPI_Comm_size(comm,&size);
1324:   MPI_Comm_rank(comm,&rank);
1325:   if (!rank) {
1326:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1327:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1328:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1329:   }

1331:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1332:   M = header[1]; N = header[2]; nz = header[3];

1334:   /*
1335:        Handle case where matrix is stored on disk as a dense matrix 
1336:   */
1337:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1338:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1339:     return(0);
1340:   }

1342:   /* determine ownership of all rows */
1343:   m          = M/size + ((M % size) > rank);
1344:   PetscMalloc((size+2)*sizeof(int),&rowners);
1345:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1346:   rowners[0] = 0;
1347:   for (i=2; i<=size; i++) {
1348:     rowners[i] += rowners[i-1];
1349:   }
1350:   rstart = rowners[rank];
1351:   rend   = rowners[rank+1];

1353:   /* distribute row lengths to all processors */
1354:   PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1355:   offlens = ourlens + (rend-rstart);
1356:   if (!rank) {
1357:     PetscMalloc(M*sizeof(int),&rowlengths);
1358:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1359:     PetscMalloc(size*sizeof(int),&sndcounts);
1360:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1361:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1362:     PetscFree(sndcounts);
1363:   } else {
1364:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1365:   }

1367:   if (!rank) {
1368:     /* calculate the number of nonzeros on each processor */
1369:     PetscMalloc(size*sizeof(int),&procsnz);
1370:     PetscMemzero(procsnz,size*sizeof(int));
1371:     for (i=0; i<size; i++) {
1372:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1373:         procsnz[i] += rowlengths[j];
1374:       }
1375:     }
1376:     PetscFree(rowlengths);

1378:     /* determine max buffer needed and allocate it */
1379:     maxnz = 0;
1380:     for (i=0; i<size; i++) {
1381:       maxnz = PetscMax(maxnz,procsnz[i]);
1382:     }
1383:     PetscMalloc(maxnz*sizeof(int),&cols);

1385:     /* read in my part of the matrix column indices  */
1386:     nz = procsnz[0];
1387:     PetscMalloc(nz*sizeof(int),&mycols);
1388:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1390:     /* read in every one elses and ship off */
1391:     for (i=1; i<size; i++) {
1392:       nz   = procsnz[i];
1393:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1394:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1395:     }
1396:     PetscFree(cols);
1397:   } else {
1398:     /* determine buffer space needed for message */
1399:     nz = 0;
1400:     for (i=0; i<m; i++) {
1401:       nz += ourlens[i];
1402:     }
1403:     PetscMalloc((nz+1)*sizeof(int),&mycols);

1405:     /* receive message of column indices*/
1406:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1407:     MPI_Get_count(&status,MPI_INT,&maxnz);
1408:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1409:   }

1411:   /* loop over local rows, determining number of off diagonal entries */
1412:   PetscMemzero(offlens,m*sizeof(int));
1413:   jj = 0;
1414:   for (i=0; i<m; i++) {
1415:     for (j=0; j<ourlens[i]; j++) {
1416:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1417:       jj++;
1418:     }
1419:   }

1421:   /* create our matrix */
1422:   for (i=0; i<m; i++) {
1423:     ourlens[i] -= offlens[i];
1424:   }
1425:   MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1426:   MatSetType(*newmat,type);
1427:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1428:   A = *newmat;
1429:   for (i=0; i<m; i++) {
1430:     ourlens[i] += offlens[i];
1431:   }

1433:   if (!rank) {
1434:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1436:     /* read in my part of the matrix numerical values  */
1437:     nz = procsnz[0];
1438:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1439: 
1440:     /* insert into matrix */
1441:     jj      = rstart;
1442:     smycols = mycols;
1443:     svals   = vals;
1444:     for (i=0; i<m; i++) {
1445:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1446:       smycols += ourlens[i];
1447:       svals   += ourlens[i];
1448:       jj++;
1449:     }

1451:     /* read in other processors and ship out */
1452:     for (i=1; i<size; i++) {
1453:       nz   = procsnz[i];
1454:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1455:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1456:     }
1457:     PetscFree(procsnz);
1458:   } else {
1459:     /* receive numeric values */
1460:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1462:     /* receive message of values*/
1463:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1464:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1465:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1467:     /* insert into matrix */
1468:     jj      = rstart;
1469:     smycols = mycols;
1470:     svals   = vals;
1471:     for (i=0; i<m; i++) {
1472:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1473:       smycols += ourlens[i];
1474:       svals   += ourlens[i];
1475:       jj++;
1476:     }
1477:   }
1478:   PetscFree(ourlens);
1479:   PetscFree(vals);
1480:   PetscFree(mycols);
1481:   PetscFree(rowners);

1483:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1484:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1485:   return(0);
1486: }