Actual source code: mpidense.c
1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
9: EXTERN_C_BEGIN
12: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
13: {
14: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
15: int m = A->m,rstart = mdn->rstart,ierr;
16: PetscScalar *array;
17: MPI_Comm comm;
20: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22: /* The reuse aspect is not implemented efficiently */
23: if (reuse) { MatDestroy(*B);}
25: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
26: MatGetArray(mdn->A,&array);
27: MatCreate(comm,m,m,m,m,B);
28: MatSetType(*B,mdn->A->type_name);
29: MatSeqDenseSetPreallocation(*B,array+m*rstart);
30: MatRestoreArray(mdn->A,&array);
31: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
32: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
33:
34: *iscopy = PETSC_TRUE;
35: return(0);
36: }
37: EXTERN_C_END
41: int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
42: {
43: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
44: int ierr,i,j,rstart = A->rstart,rend = A->rend,row;
45: PetscTruth roworiented = A->roworiented;
48: for (i=0; i<m; i++) {
49: if (idxm[i] < 0) continue;
50: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51: if (idxm[i] >= rstart && idxm[i] < rend) {
52: row = idxm[i] - rstart;
53: if (roworiented) {
54: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
55: } else {
56: for (j=0; j<n; j++) {
57: if (idxn[j] < 0) continue;
58: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
60: }
61: }
62: } else {
63: if (!A->donotstash) {
64: if (roworiented) {
65: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
66: } else {
67: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
68: }
69: }
70: }
71: }
72: return(0);
73: }
77: int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
78: {
79: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80: int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83: for (i=0; i<m; i++) {
84: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
85: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
86: if (idxm[i] >= rstart && idxm[i] < rend) {
87: row = idxm[i] - rstart;
88: for (j=0; j<n; j++) {
89: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
90: if (idxn[j] >= mat->N) {
91: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
92: }
93: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
94: }
95: } else {
96: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
97: }
98: }
99: return(0);
100: }
104: int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
105: {
106: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
107: int ierr;
110: MatGetArray(a->A,array);
111: return(0);
112: }
116: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
117: {
118: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
119: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
120: int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
121: PetscScalar *av,*bv,*v = lmat->v;
122: Mat newmat;
125: ISGetIndices(isrow,&irow);
126: ISGetIndices(iscol,&icol);
127: ISGetLocalSize(isrow,&nrows);
128: ISGetLocalSize(iscol,&ncols);
130: /* No parallel redistribution currently supported! Should really check each index set
131: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
132: original matrix! */
134: MatGetLocalSize(A,&nlrows,&nlcols);
135: MatGetOwnershipRange(A,&rstart,&rend);
136:
137: /* Check submatrix call */
138: if (scall == MAT_REUSE_MATRIX) {
139: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
140: /* Really need to test rows and column sizes! */
141: newmat = *B;
142: } else {
143: /* Create and fill new matrix */
144: MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);
145: MatSetType(newmat,A->type_name);
146: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
147: }
149: /* Now extract the data pointers and do the copy, column at a time */
150: newmatd = (Mat_MPIDense*)newmat->data;
151: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
152:
153: for (i=0; i<ncols; i++) {
154: av = v + nlrows*icol[i];
155: for (j=0; j<nrows; j++) {
156: *bv++ = av[irow[j] - rstart];
157: }
158: }
160: /* Assemble the matrices so that the correct flags are set */
161: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
162: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
164: /* Free work space */
165: ISRestoreIndices(isrow,&irow);
166: ISRestoreIndices(iscol,&icol);
167: *B = newmat;
168: return(0);
169: }
173: int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
174: {
176: return(0);
177: }
181: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
182: {
183: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
184: MPI_Comm comm = mat->comm;
185: int ierr,nstash,reallocs;
186: InsertMode addv;
189: /* make sure all processors are either in INSERTMODE or ADDMODE */
190: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
191: if (addv == (ADD_VALUES|INSERT_VALUES)) {
192: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
193: }
194: mat->insertmode = addv; /* in case this processor had no cache */
196: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
197: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
198: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
199: return(0);
200: }
204: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
205: {
206: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
207: int i,n,ierr,*row,*col,flg,j,rstart,ncols;
208: PetscScalar *val;
209: InsertMode addv=mat->insertmode;
212: /* wait on receives */
213: while (1) {
214: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
215: if (!flg) break;
216:
217: for (i=0; i<n;) {
218: /* Now identify the consecutive vals belonging to the same row */
219: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
220: if (j < n) ncols = j-i;
221: else ncols = n-i;
222: /* Now assemble all these values with a single function call */
223: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
224: i = j;
225: }
226: }
227: MatStashScatterEnd_Private(&mat->stash);
228:
229: MatAssemblyBegin(mdn->A,mode);
230: MatAssemblyEnd(mdn->A,mode);
232: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
233: MatSetUpMultiply_MPIDense(mat);
234: }
235: return(0);
236: }
240: int MatZeroEntries_MPIDense(Mat A)
241: {
242: int ierr;
243: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
246: MatZeroEntries(l->A);
247: return(0);
248: }
252: int MatGetBlockSize_MPIDense(Mat A,int *bs)
253: {
255: *bs = 1;
256: return(0);
257: }
259: /* the code does not do the diagonal entries correctly unless the
260: matrix is square and the column and row owerships are identical.
261: This is a BUG. The only way to fix it seems to be to access
262: mdn->A and mdn->B directly and not through the MatZeroRows()
263: routine.
264: */
267: int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
268: {
269: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
270: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
271: int *nprocs,j,idx,nsends;
272: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
273: int *rvalues,tag = A->tag,count,base,slen,n,*source;
274: int *lens,imdex,*lrows,*values;
275: MPI_Comm comm = A->comm;
276: MPI_Request *send_waits,*recv_waits;
277: MPI_Status recv_status,*send_status;
278: IS istmp;
279: PetscTruth found;
282: ISGetLocalSize(is,&N);
283: ISGetIndices(is,&rows);
285: /* first count number of contributors to each processor */
286: PetscMalloc(2*size*sizeof(int),&nprocs);
287: PetscMemzero(nprocs,2*size*sizeof(int));
288: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
289: for (i=0; i<N; i++) {
290: idx = rows[i];
291: found = PETSC_FALSE;
292: for (j=0; j<size; j++) {
293: if (idx >= owners[j] && idx < owners[j+1]) {
294: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
295: }
296: }
297: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298: }
299: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
301: /* inform other processors of number of messages and max length*/
302: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
304: /* post receives: */
305: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
306: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
307: for (i=0; i<nrecvs; i++) {
308: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
309: }
311: /* do sends:
312: 1) starts[i] gives the starting index in svalues for stuff going to
313: the ith processor
314: */
315: PetscMalloc((N+1)*sizeof(int),&svalues);
316: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
317: PetscMalloc((size+1)*sizeof(int),&starts);
318: starts[0] = 0;
319: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
320: for (i=0; i<N; i++) {
321: svalues[starts[owner[i]]++] = rows[i];
322: }
323: ISRestoreIndices(is,&rows);
325: starts[0] = 0;
326: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
327: count = 0;
328: for (i=0; i<size; i++) {
329: if (nprocs[2*i+1]) {
330: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
331: }
332: }
333: PetscFree(starts);
335: base = owners[rank];
337: /* wait on receives */
338: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
339: source = lens + nrecvs;
340: count = nrecvs; slen = 0;
341: while (count) {
342: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
343: /* unpack receives into our local space */
344: MPI_Get_count(&recv_status,MPI_INT,&n);
345: source[imdex] = recv_status.MPI_SOURCE;
346: lens[imdex] = n;
347: slen += n;
348: count--;
349: }
350: PetscFree(recv_waits);
351:
352: /* move the data into the send scatter */
353: PetscMalloc((slen+1)*sizeof(int),&lrows);
354: count = 0;
355: for (i=0; i<nrecvs; i++) {
356: values = rvalues + i*nmax;
357: for (j=0; j<lens[i]; j++) {
358: lrows[count++] = values[j] - base;
359: }
360: }
361: PetscFree(rvalues);
362: PetscFree(lens);
363: PetscFree(owner);
364: PetscFree(nprocs);
365:
366: /* actually zap the local rows */
367: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
368: PetscLogObjectParent(A,istmp);
369: PetscFree(lrows);
370: MatZeroRows(l->A,istmp,diag);
371: ISDestroy(istmp);
373: /* wait on sends */
374: if (nsends) {
375: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
376: MPI_Waitall(nsends,send_waits,send_status);
377: PetscFree(send_status);
378: }
379: PetscFree(send_waits);
380: PetscFree(svalues);
382: return(0);
383: }
387: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
388: {
389: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
390: int ierr;
393: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
394: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
395: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
396: return(0);
397: }
401: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
402: {
403: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
404: int ierr;
407: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
408: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
409: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
410: return(0);
411: }
415: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
416: {
417: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
418: int ierr;
419: PetscScalar zero = 0.0;
422: VecSet(&zero,yy);
423: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
424: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
425: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
426: return(0);
427: }
431: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
432: {
433: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
434: int ierr;
437: VecCopy(yy,zz);
438: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
439: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
440: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
441: return(0);
442: }
446: int MatGetDiagonal_MPIDense(Mat A,Vec v)
447: {
448: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
449: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
450: int ierr,len,i,n,m = A->m,radd;
451: PetscScalar *x,zero = 0.0;
452:
454: VecSet(&zero,v);
455: VecGetArray(v,&x);
456: VecGetSize(v,&n);
457: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
458: len = PetscMin(a->A->m,a->A->n);
459: radd = a->rstart*m;
460: for (i=0; i<len; i++) {
461: x[i] = aloc->v[radd + i*m + i];
462: }
463: VecRestoreArray(v,&x);
464: return(0);
465: }
469: int MatDestroy_MPIDense(Mat mat)
470: {
471: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
472: int ierr;
476: #if defined(PETSC_USE_LOG)
477: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
478: #endif
479: MatStashDestroy_Private(&mat->stash);
480: PetscFree(mdn->rowners);
481: MatDestroy(mdn->A);
482: if (mdn->lvec) VecDestroy(mdn->lvec);
483: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
484: if (mdn->factor) {
485: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
486: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
487: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
488: PetscFree(mdn->factor);
489: }
490: PetscFree(mdn);
491: return(0);
492: }
496: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
497: {
498: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
499: int ierr;
502: if (mdn->size == 1) {
503: MatView(mdn->A,viewer);
504: }
505: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
506: return(0);
507: }
511: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
512: {
513: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
514: int ierr,size = mdn->size,rank = mdn->rank;
515: PetscViewerType vtype;
516: PetscTruth isascii,isdraw;
517: PetscViewer sviewer;
518: PetscViewerFormat format;
521: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
522: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
523: if (isascii) {
524: PetscViewerGetType(viewer,&vtype);
525: PetscViewerGetFormat(viewer,&format);
526: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
527: MatInfo info;
528: MatGetInfo(mat,MAT_LOCAL,&info);
529: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
530: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
531: PetscViewerFlush(viewer);
532: VecScatterView(mdn->Mvctx,viewer);
533: return(0);
534: } else if (format == PETSC_VIEWER_ASCII_INFO) {
535: return(0);
536: }
537: } else if (isdraw) {
538: PetscDraw draw;
539: PetscTruth isnull;
541: PetscViewerDrawGetDraw(viewer,0,&draw);
542: PetscDrawIsNull(draw,&isnull);
543: if (isnull) return(0);
544: }
546: if (size == 1) {
547: MatView(mdn->A,viewer);
548: } else {
549: /* assemble the entire matrix onto first processor. */
550: Mat A;
551: int M = mat->M,N = mat->N,m,row,i,nz,*cols;
552: PetscScalar *vals;
554: if (!rank) {
555: MatCreate(mat->comm,M,N,M,N,&A);
556: } else {
557: MatCreate(mat->comm,0,0,M,N,&A);
558: }
559: /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
560: MatSetType(A,MATMPIDENSE);
561: MatMPIDenseSetPreallocation(A,PETSC_NULL);
562: PetscLogObjectParent(mat,A);
564: /* Copy the matrix ... This isn't the most efficient means,
565: but it's quick for now */
566: row = mdn->rstart; m = mdn->A->m;
567: for (i=0; i<m; i++) {
568: MatGetRow(mat,row,&nz,&cols,&vals);
569: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
570: MatRestoreRow(mat,row,&nz,&cols,&vals);
571: row++;
572: }
574: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
575: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
576: PetscViewerGetSingleton(viewer,&sviewer);
577: if (!rank) {
578: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
579: }
580: PetscViewerRestoreSingleton(viewer,&sviewer);
581: PetscViewerFlush(viewer);
582: MatDestroy(A);
583: }
584: return(0);
585: }
589: int MatView_MPIDense(Mat mat,PetscViewer viewer)
590: {
591: int ierr;
592: PetscTruth isascii,isbinary,isdraw,issocket;
593:
595:
596: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
597: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
598: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
599: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
601: if (isascii || issocket || isdraw) {
602: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
603: } else if (isbinary) {
604: MatView_MPIDense_Binary(mat,viewer);
605: } else {
606: SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
607: }
608: return(0);
609: }
613: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
614: {
615: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
616: Mat mdn = mat->A;
617: int ierr;
618: PetscReal isend[5],irecv[5];
621: info->rows_global = (double)A->M;
622: info->columns_global = (double)A->N;
623: info->rows_local = (double)A->m;
624: info->columns_local = (double)A->N;
625: info->block_size = 1.0;
626: MatGetInfo(mdn,MAT_LOCAL,info);
627: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
628: isend[3] = info->memory; isend[4] = info->mallocs;
629: if (flag == MAT_LOCAL) {
630: info->nz_used = isend[0];
631: info->nz_allocated = isend[1];
632: info->nz_unneeded = isend[2];
633: info->memory = isend[3];
634: info->mallocs = isend[4];
635: } else if (flag == MAT_GLOBAL_MAX) {
636: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
637: info->nz_used = irecv[0];
638: info->nz_allocated = irecv[1];
639: info->nz_unneeded = irecv[2];
640: info->memory = irecv[3];
641: info->mallocs = irecv[4];
642: } else if (flag == MAT_GLOBAL_SUM) {
643: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
644: info->nz_used = irecv[0];
645: info->nz_allocated = irecv[1];
646: info->nz_unneeded = irecv[2];
647: info->memory = irecv[3];
648: info->mallocs = irecv[4];
649: }
650: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
651: info->fill_ratio_needed = 0;
652: info->factor_mallocs = 0;
653: return(0);
654: }
658: int MatSetOption_MPIDense(Mat A,MatOption op)
659: {
660: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
661: int ierr;
664: switch (op) {
665: case MAT_NO_NEW_NONZERO_LOCATIONS:
666: case MAT_YES_NEW_NONZERO_LOCATIONS:
667: case MAT_NEW_NONZERO_LOCATION_ERR:
668: case MAT_NEW_NONZERO_ALLOCATION_ERR:
669: case MAT_COLUMNS_SORTED:
670: case MAT_COLUMNS_UNSORTED:
671: MatSetOption(a->A,op);
672: break;
673: case MAT_ROW_ORIENTED:
674: a->roworiented = PETSC_TRUE;
675: MatSetOption(a->A,op);
676: break;
677: case MAT_ROWS_SORTED:
678: case MAT_ROWS_UNSORTED:
679: case MAT_YES_NEW_DIAGONALS:
680: case MAT_USE_HASH_TABLE:
681: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
682: break;
683: case MAT_COLUMN_ORIENTED:
684: a->roworiented = PETSC_FALSE;
685: MatSetOption(a->A,op);
686: break;
687: case MAT_IGNORE_OFF_PROC_ENTRIES:
688: a->donotstash = PETSC_TRUE;
689: break;
690: case MAT_NO_NEW_DIAGONALS:
691: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
692: case MAT_SYMMETRIC:
693: case MAT_STRUCTURALLY_SYMMETRIC:
694: case MAT_NOT_SYMMETRIC:
695: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
696: case MAT_HERMITIAN:
697: case MAT_NOT_HERMITIAN:
698: case MAT_SYMMETRY_ETERNAL:
699: case MAT_NOT_SYMMETRY_ETERNAL:
700: break;
701: default:
702: SETERRQ(PETSC_ERR_SUP,"unknown option");
703: }
704: return(0);
705: }
709: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
710: {
711: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
712: int lrow,rstart = mat->rstart,rend = mat->rend,ierr;
715: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
716: lrow = row - rstart;
717: MatGetRow(mat->A,lrow,nz,idx,v);
718: return(0);
719: }
723: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
724: {
728: if (idx) {PetscFree(*idx);}
729: if (v) {PetscFree(*v);}
730: return(0);
731: }
735: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
736: {
737: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
738: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
739: PetscScalar *l,*r,x,*v;
740: int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
743: MatGetLocalSize(A,&s2,&s3);
744: if (ll) {
745: VecGetLocalSize(ll,&s2a);
746: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
747: VecGetArray(ll,&l);
748: for (i=0; i<m; i++) {
749: x = l[i];
750: v = mat->v + i;
751: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
752: }
753: VecRestoreArray(ll,&l);
754: PetscLogFlops(n*m);
755: }
756: if (rr) {
757: VecGetSize(rr,&s3a);
758: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
759: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
760: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
761: VecGetArray(mdn->lvec,&r);
762: for (i=0; i<n; i++) {
763: x = r[i];
764: v = mat->v + i*m;
765: for (j=0; j<m; j++) { (*v++) *= x;}
766: }
767: VecRestoreArray(mdn->lvec,&r);
768: PetscLogFlops(n*m);
769: }
770: return(0);
771: }
775: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
776: {
777: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
778: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
779: int ierr,i,j;
780: PetscReal sum = 0.0;
781: PetscScalar *v = mat->v;
784: if (mdn->size == 1) {
785: MatNorm(mdn->A,type,nrm);
786: } else {
787: if (type == NORM_FROBENIUS) {
788: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
789: #if defined(PETSC_USE_COMPLEX)
790: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
791: #else
792: sum += (*v)*(*v); v++;
793: #endif
794: }
795: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
796: *nrm = sqrt(*nrm);
797: PetscLogFlops(2*mdn->A->n*mdn->A->m);
798: } else if (type == NORM_1) {
799: PetscReal *tmp,*tmp2;
800: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
801: tmp2 = tmp + A->N;
802: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
803: *nrm = 0.0;
804: v = mat->v;
805: for (j=0; j<mdn->A->n; j++) {
806: for (i=0; i<mdn->A->m; i++) {
807: tmp[j] += PetscAbsScalar(*v); v++;
808: }
809: }
810: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
811: for (j=0; j<A->N; j++) {
812: if (tmp2[j] > *nrm) *nrm = tmp2[j];
813: }
814: PetscFree(tmp);
815: PetscLogFlops(A->n*A->m);
816: } else if (type == NORM_INFINITY) { /* max row norm */
817: PetscReal ntemp;
818: MatNorm(mdn->A,type,&ntemp);
819: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
820: } else {
821: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
822: }
823: }
824: return(0);
825: }
829: int MatTranspose_MPIDense(Mat A,Mat *matout)
830: {
831: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
832: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
833: Mat B;
834: int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
835: int j,i,ierr;
836: PetscScalar *v;
839: if (!matout && M != N) {
840: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
841: }
842: MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);
843: MatSetType(B,A->type_name);
844: MatMPIDenseSetPreallocation(B,PETSC_NULL);
846: m = a->A->m; n = a->A->n; v = Aloc->v;
847: PetscMalloc(n*sizeof(int),&rwork);
848: for (j=0; j<n; j++) {
849: for (i=0; i<m; i++) rwork[i] = rstart + i;
850: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
851: v += m;
852: }
853: PetscFree(rwork);
854: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
855: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
856: if (matout) {
857: *matout = B;
858: } else {
859: MatHeaderCopy(A,B);
860: }
861: return(0);
862: }
864: #include petscblaslapack.h
867: int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
868: {
869: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
870: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
871: int one = 1,nz;
874: nz = inA->m*inA->N;
875: BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
876: PetscLogFlops(nz);
877: return(0);
878: }
880: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
884: int MatSetUpPreallocation_MPIDense(Mat A)
885: {
886: int ierr;
889: MatMPIDenseSetPreallocation(A,0);
890: return(0);
891: }
893: /* -------------------------------------------------------------------*/
894: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
895: MatGetRow_MPIDense,
896: MatRestoreRow_MPIDense,
897: MatMult_MPIDense,
898: /* 4*/ MatMultAdd_MPIDense,
899: MatMultTranspose_MPIDense,
900: MatMultTransposeAdd_MPIDense,
901: 0,
902: 0,
903: 0,
904: /*10*/ 0,
905: 0,
906: 0,
907: 0,
908: MatTranspose_MPIDense,
909: /*15*/ MatGetInfo_MPIDense,
910: 0,
911: MatGetDiagonal_MPIDense,
912: MatDiagonalScale_MPIDense,
913: MatNorm_MPIDense,
914: /*20*/ MatAssemblyBegin_MPIDense,
915: MatAssemblyEnd_MPIDense,
916: 0,
917: MatSetOption_MPIDense,
918: MatZeroEntries_MPIDense,
919: /*25*/ MatZeroRows_MPIDense,
920: 0,
921: 0,
922: 0,
923: 0,
924: /*30*/ MatSetUpPreallocation_MPIDense,
925: 0,
926: 0,
927: MatGetArray_MPIDense,
928: MatRestoreArray_MPIDense,
929: /*35*/ MatDuplicate_MPIDense,
930: 0,
931: 0,
932: 0,
933: 0,
934: /*40*/ 0,
935: MatGetSubMatrices_MPIDense,
936: 0,
937: MatGetValues_MPIDense,
938: 0,
939: /*45*/ 0,
940: MatScale_MPIDense,
941: 0,
942: 0,
943: 0,
944: /*50*/ MatGetBlockSize_MPIDense,
945: 0,
946: 0,
947: 0,
948: 0,
949: /*55*/ 0,
950: 0,
951: 0,
952: 0,
953: 0,
954: /*60*/ MatGetSubMatrix_MPIDense,
955: MatDestroy_MPIDense,
956: MatView_MPIDense,
957: MatGetPetscMaps_Petsc,
958: 0,
959: /*65*/ 0,
960: 0,
961: 0,
962: 0,
963: 0,
964: /*70*/ 0,
965: 0,
966: 0,
967: 0,
968: 0,
969: /*75*/ 0,
970: 0,
971: 0,
972: 0,
973: 0,
974: /*80*/ 0,
975: 0,
976: 0,
977: 0,
978: /*85*/ MatLoad_MPIDense};
980: EXTERN_C_BEGIN
983: int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
984: {
985: Mat_MPIDense *a;
986: int ierr;
989: mat->preallocated = PETSC_TRUE;
990: /* Note: For now, when data is specified above, this assumes the user correctly
991: allocates the local dense storage space. We should add error checking. */
993: a = (Mat_MPIDense*)mat->data;
994: MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);
995: MatSetType(a->A,MATSEQDENSE);
996: MatSeqDenseSetPreallocation(a->A,data);
997: PetscLogObjectParent(mat,a->A);
998: return(0);
999: }
1000: EXTERN_C_END
1002: /*MC
1003: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1005: Options Database Keys:
1006: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1008: Level: beginner
1010: .seealso: MatCreateMPIDense
1011: M*/
1013: EXTERN_C_BEGIN
1016: int MatCreate_MPIDense(Mat mat)
1017: {
1018: Mat_MPIDense *a;
1019: int ierr,i;
1022: PetscNew(Mat_MPIDense,&a);
1023: mat->data = (void*)a;
1024: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1025: mat->factor = 0;
1026: mat->mapping = 0;
1028: a->factor = 0;
1029: mat->insertmode = NOT_SET_VALUES;
1030: MPI_Comm_rank(mat->comm,&a->rank);
1031: MPI_Comm_size(mat->comm,&a->size);
1033: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1034: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1035: a->nvec = mat->n;
1037: /* the information in the maps duplicates the information computed below, eventually
1038: we should remove the duplicate information that is not contained in the maps */
1039: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1040: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
1042: /* build local table of row and column ownerships */
1043: PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
1044: a->cowners = a->rowners + a->size + 1;
1045: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1046: MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
1047: a->rowners[0] = 0;
1048: for (i=2; i<=a->size; i++) {
1049: a->rowners[i] += a->rowners[i-1];
1050: }
1051: a->rstart = a->rowners[a->rank];
1052: a->rend = a->rowners[a->rank+1];
1053: MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
1054: a->cowners[0] = 0;
1055: for (i=2; i<=a->size; i++) {
1056: a->cowners[i] += a->cowners[i-1];
1057: }
1059: /* build cache for off array entries formed */
1060: a->donotstash = PETSC_FALSE;
1061: MatStashCreate_Private(mat->comm,1,&mat->stash);
1063: /* stuff used for matrix vector multiply */
1064: a->lvec = 0;
1065: a->Mvctx = 0;
1066: a->roworiented = PETSC_TRUE;
1068: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1069: "MatGetDiagonalBlock_MPIDense",
1070: MatGetDiagonalBlock_MPIDense);
1071: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1072: "MatMPIDenseSetPreallocation_MPIDense",
1073: MatMPIDenseSetPreallocation_MPIDense);
1074: return(0);
1075: }
1076: EXTERN_C_END
1078: /*MC
1079: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1081: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1082: and MATMPIDENSE otherwise.
1084: Options Database Keys:
1085: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1087: Level: beginner
1089: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1090: M*/
1092: EXTERN_C_BEGIN
1095: int MatCreate_Dense(Mat A) {
1096: int ierr,size;
1099: PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1100: MPI_Comm_size(A->comm,&size);
1101: if (size == 1) {
1102: MatSetType(A,MATSEQDENSE);
1103: } else {
1104: MatSetType(A,MATMPIDENSE);
1105: }
1106: return(0);
1107: }
1108: EXTERN_C_END
1112: /*@C
1113: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1115: Not collective
1117: Input Parameters:
1118: . A - the matrix
1119: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1120: to control all matrix memory allocation.
1122: Notes:
1123: The dense format is fully compatible with standard Fortran 77
1124: storage by columns.
1126: The data input variable is intended primarily for Fortran programmers
1127: who wish to allocate their own matrix memory space. Most users should
1128: set data=PETSC_NULL.
1130: Level: intermediate
1132: .keywords: matrix,dense, parallel
1134: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1135: @*/
1136: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1137: {
1138: int ierr,(*f)(Mat,PetscScalar *);
1141: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1142: if (f) {
1143: (*f)(mat,data);
1144: }
1145: return(0);
1146: }
1150: /*@C
1151: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1153: Collective on MPI_Comm
1155: Input Parameters:
1156: + comm - MPI communicator
1157: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1158: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1159: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1160: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1161: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1162: to control all matrix memory allocation.
1164: Output Parameter:
1165: . A - the matrix
1167: Notes:
1168: The dense format is fully compatible with standard Fortran 77
1169: storage by columns.
1171: The data input variable is intended primarily for Fortran programmers
1172: who wish to allocate their own matrix memory space. Most users should
1173: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1175: The user MUST specify either the local or global matrix dimensions
1176: (possibly both).
1178: Level: intermediate
1180: .keywords: matrix,dense, parallel
1182: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1183: @*/
1184: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1185: {
1186: int ierr,size;
1189: MatCreate(comm,m,n,M,N,A);
1190: MPI_Comm_size(comm,&size);
1191: if (size > 1) {
1192: MatSetType(*A,MATMPIDENSE);
1193: MatMPIDenseSetPreallocation(*A,data);
1194: } else {
1195: MatSetType(*A,MATSEQDENSE);
1196: MatSeqDenseSetPreallocation(*A,data);
1197: }
1198: return(0);
1199: }
1203: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1204: {
1205: Mat mat;
1206: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1207: int ierr;
1210: *newmat = 0;
1211: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1212: MatSetType(mat,A->type_name);
1213: PetscNew(Mat_MPIDense,&a);
1214: mat->data = (void*)a;
1215: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1216: mat->factor = A->factor;
1217: mat->assembled = PETSC_TRUE;
1218: mat->preallocated = PETSC_TRUE;
1220: a->rstart = oldmat->rstart;
1221: a->rend = oldmat->rend;
1222: a->size = oldmat->size;
1223: a->rank = oldmat->rank;
1224: mat->insertmode = NOT_SET_VALUES;
1225: a->nvec = oldmat->nvec;
1226: a->donotstash = oldmat->donotstash;
1227: PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1228: PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1229: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1230: MatStashCreate_Private(A->comm,1,&mat->stash);
1232: MatSetUpMultiply_MPIDense(mat);
1233: MatDuplicate(oldmat->A,cpvalues,&a->A);
1234: PetscLogObjectParent(mat,a->A);
1235: *newmat = mat;
1236: return(0);
1237: }
1239: #include petscsys.h
1243: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1244: {
1245: int *rowners,i,size,rank,m,ierr,nz,j;
1246: PetscScalar *array,*vals,*vals_ptr;
1247: MPI_Status status;
1250: MPI_Comm_rank(comm,&rank);
1251: MPI_Comm_size(comm,&size);
1253: /* determine ownership of all rows */
1254: m = M/size + ((M % size) > rank);
1255: PetscMalloc((size+2)*sizeof(int),&rowners);
1256: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1257: rowners[0] = 0;
1258: for (i=2; i<=size; i++) {
1259: rowners[i] += rowners[i-1];
1260: }
1262: MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1263: MatSetType(*newmat,type);
1264: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1265: MatGetArray(*newmat,&array);
1267: if (!rank) {
1268: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1270: /* read in my part of the matrix numerical values */
1271: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1272:
1273: /* insert into matrix-by row (this is why cannot directly read into array */
1274: vals_ptr = vals;
1275: for (i=0; i<m; i++) {
1276: for (j=0; j<N; j++) {
1277: array[i + j*m] = *vals_ptr++;
1278: }
1279: }
1281: /* read in other processors and ship out */
1282: for (i=1; i<size; i++) {
1283: nz = (rowners[i+1] - rowners[i])*N;
1284: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1285: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1286: }
1287: } else {
1288: /* receive numeric values */
1289: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1291: /* receive message of values*/
1292: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1294: /* insert into matrix-by row (this is why cannot directly read into array */
1295: vals_ptr = vals;
1296: for (i=0; i<m; i++) {
1297: for (j=0; j<N; j++) {
1298: array[i + j*m] = *vals_ptr++;
1299: }
1300: }
1301: }
1302: PetscFree(rowners);
1303: PetscFree(vals);
1304: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1305: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1306: return(0);
1307: }
1311: int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1312: {
1313: Mat A;
1314: PetscScalar *vals,*svals;
1315: MPI_Comm comm = ((PetscObject)viewer)->comm;
1316: MPI_Status status;
1317: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1318: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1319: int tag = ((PetscObject)viewer)->tag;
1320: int i,nz,ierr,j,rstart,rend,fd;
1323: MPI_Comm_size(comm,&size);
1324: MPI_Comm_rank(comm,&rank);
1325: if (!rank) {
1326: PetscViewerBinaryGetDescriptor(viewer,&fd);
1327: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1328: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1329: }
1331: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1332: M = header[1]; N = header[2]; nz = header[3];
1334: /*
1335: Handle case where matrix is stored on disk as a dense matrix
1336: */
1337: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1338: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1339: return(0);
1340: }
1342: /* determine ownership of all rows */
1343: m = M/size + ((M % size) > rank);
1344: PetscMalloc((size+2)*sizeof(int),&rowners);
1345: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1346: rowners[0] = 0;
1347: for (i=2; i<=size; i++) {
1348: rowners[i] += rowners[i-1];
1349: }
1350: rstart = rowners[rank];
1351: rend = rowners[rank+1];
1353: /* distribute row lengths to all processors */
1354: PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1355: offlens = ourlens + (rend-rstart);
1356: if (!rank) {
1357: PetscMalloc(M*sizeof(int),&rowlengths);
1358: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1359: PetscMalloc(size*sizeof(int),&sndcounts);
1360: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1361: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1362: PetscFree(sndcounts);
1363: } else {
1364: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1365: }
1367: if (!rank) {
1368: /* calculate the number of nonzeros on each processor */
1369: PetscMalloc(size*sizeof(int),&procsnz);
1370: PetscMemzero(procsnz,size*sizeof(int));
1371: for (i=0; i<size; i++) {
1372: for (j=rowners[i]; j< rowners[i+1]; j++) {
1373: procsnz[i] += rowlengths[j];
1374: }
1375: }
1376: PetscFree(rowlengths);
1378: /* determine max buffer needed and allocate it */
1379: maxnz = 0;
1380: for (i=0; i<size; i++) {
1381: maxnz = PetscMax(maxnz,procsnz[i]);
1382: }
1383: PetscMalloc(maxnz*sizeof(int),&cols);
1385: /* read in my part of the matrix column indices */
1386: nz = procsnz[0];
1387: PetscMalloc(nz*sizeof(int),&mycols);
1388: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1390: /* read in every one elses and ship off */
1391: for (i=1; i<size; i++) {
1392: nz = procsnz[i];
1393: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1394: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1395: }
1396: PetscFree(cols);
1397: } else {
1398: /* determine buffer space needed for message */
1399: nz = 0;
1400: for (i=0; i<m; i++) {
1401: nz += ourlens[i];
1402: }
1403: PetscMalloc((nz+1)*sizeof(int),&mycols);
1405: /* receive message of column indices*/
1406: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1407: MPI_Get_count(&status,MPI_INT,&maxnz);
1408: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1409: }
1411: /* loop over local rows, determining number of off diagonal entries */
1412: PetscMemzero(offlens,m*sizeof(int));
1413: jj = 0;
1414: for (i=0; i<m; i++) {
1415: for (j=0; j<ourlens[i]; j++) {
1416: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1417: jj++;
1418: }
1419: }
1421: /* create our matrix */
1422: for (i=0; i<m; i++) {
1423: ourlens[i] -= offlens[i];
1424: }
1425: MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);
1426: MatSetType(*newmat,type);
1427: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1428: A = *newmat;
1429: for (i=0; i<m; i++) {
1430: ourlens[i] += offlens[i];
1431: }
1433: if (!rank) {
1434: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1436: /* read in my part of the matrix numerical values */
1437: nz = procsnz[0];
1438: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1439:
1440: /* insert into matrix */
1441: jj = rstart;
1442: smycols = mycols;
1443: svals = vals;
1444: for (i=0; i<m; i++) {
1445: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1446: smycols += ourlens[i];
1447: svals += ourlens[i];
1448: jj++;
1449: }
1451: /* read in other processors and ship out */
1452: for (i=1; i<size; i++) {
1453: nz = procsnz[i];
1454: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1455: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1456: }
1457: PetscFree(procsnz);
1458: } else {
1459: /* receive numeric values */
1460: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1462: /* receive message of values*/
1463: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1464: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1465: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1467: /* insert into matrix */
1468: jj = rstart;
1469: smycols = mycols;
1470: svals = vals;
1471: for (i=0; i<m; i++) {
1472: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1473: smycols += ourlens[i];
1474: svals += ourlens[i];
1475: jj++;
1476: }
1477: }
1478: PetscFree(ourlens);
1479: PetscFree(vals);
1480: PetscFree(mycols);
1481: PetscFree(rowners);
1483: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1484: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1485: return(0);
1486: }