Actual source code: mpibdiag.c
1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
2: /*
3: The basic matrix operations for the Block diagonal parallel
4: matrices.
5: */
6: #include src/mat/impls/bdiag/mpi/mpibdiag.h
10: int MatSetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
11: {
12: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
13: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
14: PetscTruth roworiented = mbd->roworiented;
17: for (i=0; i<m; i++) {
18: if (idxm[i] < 0) continue;
19: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
20: if (idxm[i] >= rstart && idxm[i] < rend) {
21: row = idxm[i] - rstart;
22: for (j=0; j<n; j++) {
23: if (idxn[j] < 0) continue;
24: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
25: if (roworiented) {
26: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
27: } else {
28: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
29: }
30: }
31: } else {
32: if (!mbd->donotstash) {
33: if (roworiented) {
34: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
35: } else {
36: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
37: }
38: }
39: }
40: }
41: return(0);
42: }
46: int MatGetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
47: {
48: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
49: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
52: for (i=0; i<m; i++) {
53: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
54: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
55: if (idxm[i] >= rstart && idxm[i] < rend) {
56: row = idxm[i] - rstart;
57: for (j=0; j<n; j++) {
58: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
59: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
61: }
62: } else {
63: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
64: }
65: }
66: return(0);
67: }
71: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
72: {
73: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
74: MPI_Comm comm = mat->comm;
75: int ierr,nstash,reallocs;
76: InsertMode addv;
79: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
80: if (addv == (ADD_VALUES|INSERT_VALUES)) {
81: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
82: }
83: mat->insertmode = addv; /* in case this processor had no cache */
84: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
85: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
86: PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
87: return(0);
88: }
92: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
93: {
94: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
95: Mat_SeqBDiag *mlocal;
96: int i,n,*row,*col;
97: int *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
98: PetscScalar *val;
99: InsertMode addv = mat->insertmode;
103: while (1) {
104: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
105: if (!flg) break;
106:
107: for (i=0; i<n;) {
108: /* Now identify the consecutive vals belonging to the same row */
109: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
110: if (j < n) ncols = j-i;
111: else ncols = n-i;
112: /* Now assemble all these values with a single function call */
113: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
114: i = j;
115: }
116: }
117: MatStashScatterEnd_Private(&mat->stash);
119: MatAssemblyBegin(mbd->A,mode);
120: MatAssemblyEnd(mbd->A,mode);
122: /* Fix main diagonal location and determine global diagonals */
123: mlocal = (Mat_SeqBDiag*)mbd->A->data;
124: Mblock = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
125: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
126: PetscMalloc(2*len*sizeof(int),&tmp1);
127: tmp2 = tmp1 + len;
128: PetscMemzero(tmp1,2*len*sizeof(int));
129: mlocal->mainbd = -1;
130: for (i=0; i<mlocal->nd; i++) {
131: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
132: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
133: }
134: MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
135: ict = 0;
136: for (i=0; i<len; i++) {
137: if (tmp2[i]) {
138: mbd->gdiag[ict] = i - Mblock;
139: ict++;
140: }
141: }
142: mbd->gnd = ict;
143: PetscFree(tmp1);
145: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
146: MatSetUpMultiply_MPIBDiag(mat);
147: }
148: return(0);
149: }
153: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
154: {
155: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
156: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
159: *bs = dmat->bs;
160: return(0);
161: }
165: int MatZeroEntries_MPIBDiag(Mat A)
166: {
167: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
168: int ierr;
171: MatZeroEntries(l->A);
172: return(0);
173: }
175: /* again this uses the same basic stratagy as in the assembly and
176: scatter create routines, we should try to do it systematically
177: if we can figure out the proper level of generality. */
179: /* the code does not do the diagonal entries correctly unless the
180: matrix is square and the column and row owerships are identical.
181: This is a BUG. The only way to fix it seems to be to access
182: aij->A and aij->B directly and not through the MatZeroRows()
183: routine.
184: */
188: int MatZeroRows_MPIBDiag(Mat A,IS is,const PetscScalar *diag)
189: {
190: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
191: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
192: int *nprocs,j,idx,nsends;
193: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
194: int *rvalues,tag = A->tag,count,base,slen,n,*source;
195: int *lens,imdex,*lrows,*values;
196: MPI_Comm comm = A->comm;
197: MPI_Request *send_waits,*recv_waits;
198: MPI_Status recv_status,*send_status;
199: IS istmp;
200: PetscTruth found;
203: ISGetLocalSize(is,&N);
204: ISGetIndices(is,&rows);
206: /* first count number of contributors to each processor */
207: PetscMalloc(2*size*sizeof(int),&nprocs);
208: PetscMemzero(nprocs,2*size*sizeof(int));
209: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
210: for (i=0; i<N; i++) {
211: idx = rows[i];
212: found = PETSC_FALSE;
213: for (j=0; j<size; j++) {
214: if (idx >= owners[j] && idx < owners[j+1]) {
215: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
216: }
217: }
218: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
219: }
220: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
222: /* inform other processors of number of messages and max length*/
223: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
225: /* post receives: */
226: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
227: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
228: for (i=0; i<nrecvs; i++) {
229: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
230: }
232: /* do sends:
233: 1) starts[i] gives the starting index in svalues for stuff going to
234: the ith processor
235: */
236: PetscMalloc((N+1)*sizeof(int),&svalues);
237: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
238: PetscMalloc((size+1)*sizeof(int),&starts);
239: starts[0] = 0;
240: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
241: for (i=0; i<N; i++) {
242: svalues[starts[owner[i]]++] = rows[i];
243: }
244: ISRestoreIndices(is,&rows);
246: starts[0] = 0;
247: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
248: count = 0;
249: for (i=0; i<size; i++) {
250: if (nprocs[2*i+1]) {
251: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
252: }
253: }
254: PetscFree(starts);
256: base = owners[rank];
258: /* wait on receives */
259: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
260: source = lens + nrecvs;
261: count = nrecvs;
262: slen = 0;
263: while (count) {
264: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
265: /* unpack receives into our local space */
266: MPI_Get_count(&recv_status,MPI_INT,&n);
267: source[imdex] = recv_status.MPI_SOURCE;
268: lens[imdex] = n;
269: slen += n;
270: count--;
271: }
272: PetscFree(recv_waits);
273:
274: /* move the data into the send scatter */
275: PetscMalloc((slen+1)*sizeof(int),&lrows);
276: count = 0;
277: for (i=0; i<nrecvs; i++) {
278: values = rvalues + i*nmax;
279: for (j=0; j<lens[i]; j++) {
280: lrows[count++] = values[j] - base;
281: }
282: }
283: PetscFree(rvalues);
284: PetscFree(lens);
285: PetscFree(owner);
286: PetscFree(nprocs);
287:
288: /* actually zap the local rows */
289: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
290: PetscLogObjectParent(A,istmp);
291: PetscFree(lrows);
292: MatZeroRows(l->A,istmp,diag);
293: ISDestroy(istmp);
295: /* wait on sends */
296: if (nsends) {
297: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
298: MPI_Waitall(nsends,send_waits,send_status);
299: PetscFree(send_status);
300: }
301: PetscFree(send_waits);
302: PetscFree(svalues);
304: return(0);
305: }
309: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
310: {
311: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
312: int ierr;
315: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
316: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
318: return(0);
319: }
323: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
324: {
325: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
326: int ierr;
329: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
330: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
331: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
332: return(0);
333: }
337: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
338: {
339: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
340: int ierr;
341: PetscScalar zero = 0.0;
344: VecSet(&zero,yy);
345: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
346: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
347: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348: return(0);
349: }
353: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
354: {
355: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
356: int ierr;
359: VecCopy(yy,zz);
360: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
361: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
362: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
363: return(0);
364: }
368: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
369: {
370: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
371: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
372: int ierr;
373: PetscReal isend[5],irecv[5];
376: info->block_size = (PetscReal)dmat->bs;
377: MatGetInfo(mat->A,MAT_LOCAL,info);
378: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
379: isend[3] = info->memory; isend[4] = info->mallocs;
380: if (flag == MAT_LOCAL) {
381: info->nz_used = isend[0];
382: info->nz_allocated = isend[1];
383: info->nz_unneeded = isend[2];
384: info->memory = isend[3];
385: info->mallocs = isend[4];
386: } else if (flag == MAT_GLOBAL_MAX) {
387: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
388: info->nz_used = irecv[0];
389: info->nz_allocated = irecv[1];
390: info->nz_unneeded = irecv[2];
391: info->memory = irecv[3];
392: info->mallocs = irecv[4];
393: } else if (flag == MAT_GLOBAL_SUM) {
394: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
395: info->nz_used = irecv[0];
396: info->nz_allocated = irecv[1];
397: info->nz_unneeded = irecv[2];
398: info->memory = irecv[3];
399: info->mallocs = irecv[4];
400: }
401: info->rows_global = (double)matin->M;
402: info->columns_global = (double)matin->N;
403: info->rows_local = (double)matin->m;
404: info->columns_local = (double)matin->N;
405: return(0);
406: }
410: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
411: {
412: int ierr;
413: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
416: MatGetDiagonal(A->A,v);
417: return(0);
418: }
422: int MatDestroy_MPIBDiag(Mat mat)
423: {
424: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
425: int ierr;
426: #if defined(PETSC_USE_LOG)
427: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
430: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
431: #else
433: #endif
434: MatStashDestroy_Private(&mat->stash);
435: PetscFree(mbd->rowners);
436: PetscFree(mbd->gdiag);
437: MatDestroy(mbd->A);
438: if (mbd->lvec) {VecDestroy(mbd->lvec);}
439: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
440: PetscFree(mbd);
441: return(0);
442: }
447: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
448: {
449: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
450: int ierr;
453: if (mbd->size == 1) {
454: MatView(mbd->A,viewer);
455: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
456: return(0);
457: }
461: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
462: {
463: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
464: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
465: int ierr,i,size = mbd->size,rank = mbd->rank;
466: PetscTruth isascii,isdraw;
467: PetscViewer sviewer;
468: PetscViewerFormat format;
471: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
472: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
473: if (isascii) {
474: PetscViewerGetFormat(viewer,&format);
475: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
476: int nline = PetscMin(10,mbd->gnd),k,nk,np;
477: PetscViewerASCIIPrintf(viewer," block size=%d, total number of diagonals=%d\n",dmat->bs,mbd->gnd);
478: nk = (mbd->gnd-1)/nline + 1;
479: for (k=0; k<nk; k++) {
480: PetscViewerASCIIPrintf(viewer," global diag numbers:");
481: np = PetscMin(nline,mbd->gnd - nline*k);
482: for (i=0; i<np; i++) {
483: PetscViewerASCIIPrintf(viewer," %d",mbd->gdiag[i+nline*k]);
484: }
485: PetscViewerASCIIPrintf(viewer,"\n");
486: }
487: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
488: MatInfo info;
489: MPI_Comm_rank(mat->comm,&rank);
490: MatGetInfo(mat,MAT_LOCAL,&info);
491: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
492: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
493: PetscViewerFlush(viewer);
494: VecScatterView(mbd->Mvctx,viewer);
495: }
496: return(0);
497: }
498: }
500: if (isdraw) {
501: PetscDraw draw;
502: PetscTruth isnull;
503: PetscViewerDrawGetDraw(viewer,0,&draw);
504: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
505: }
507: if (size == 1) {
508: MatView(mbd->A,viewer);
509: } else {
510: /* assemble the entire matrix onto first processor. */
511: Mat A;
512: int M = mat->M,N = mat->N,m,row,nz,*cols;
513: PetscScalar *vals;
514: Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;
516: /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
517: if (!rank) {
518: MatCreate(mat->comm,M,M,M,N,&A);
519: MatSetType(A,MATMPIBDIAG);
520: MatMPIBDiagSetPreallocation(A,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL);
521: } else {
522: MatCreate(mat->comm,0,0,M,N,&A);
523: MatSetType(A,MATMPIBDIAG);
524: MatMPIBDiagSetPreallocation(A,0,Ambd->bs,PETSC_NULL,PETSC_NULL);
525: }
526: PetscLogObjectParent(mat,A);
528: /* Copy the matrix ... This isn't the most efficient means,
529: but it's quick for now */
530: row = mbd->rstart;
531: m = mbd->A->m;
532: for (i=0; i<m; i++) {
533: MatGetRow(mat,row,&nz,&cols,&vals);
534: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
535: MatRestoreRow(mat,row,&nz,&cols,&vals);
536: row++;
537: }
538: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
539: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
540: PetscViewerGetSingleton(viewer,&sviewer);
541: if (!rank) {
542: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
543: }
544: PetscViewerRestoreSingleton(viewer,&sviewer);
545: PetscViewerFlush(viewer);
546: MatDestroy(A);
547: }
548: return(0);
549: }
553: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
554: {
555: int ierr;
556: PetscTruth isascii,isdraw,isbinary;
559: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
560: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
561: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
562: if (isascii || isdraw) {
563: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
564: } else if (isbinary) {
565: MatView_MPIBDiag_Binary(mat,viewer);
566: } else {
567: SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
568: }
569: return(0);
570: }
574: int MatSetOption_MPIBDiag(Mat A,MatOption op)
575: {
576: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
577: int ierr;
579: switch (op) {
580: case MAT_NO_NEW_NONZERO_LOCATIONS:
581: case MAT_YES_NEW_NONZERO_LOCATIONS:
582: case MAT_NEW_NONZERO_LOCATION_ERR:
583: case MAT_NEW_NONZERO_ALLOCATION_ERR:
584: case MAT_NO_NEW_DIAGONALS:
585: case MAT_YES_NEW_DIAGONALS:
586: MatSetOption(mbd->A,op);
587: break;
588: case MAT_ROW_ORIENTED:
589: mbd->roworiented = PETSC_TRUE;
590: MatSetOption(mbd->A,op);
591: break;
592: case MAT_COLUMN_ORIENTED:
593: mbd->roworiented = PETSC_FALSE;
594: MatSetOption(mbd->A,op);
595: break;
596: case MAT_IGNORE_OFF_PROC_ENTRIES:
597: mbd->donotstash = PETSC_TRUE;
598: break;
599: case MAT_ROWS_SORTED:
600: case MAT_ROWS_UNSORTED:
601: case MAT_COLUMNS_SORTED:
602: case MAT_COLUMNS_UNSORTED:
603: PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignored\n");
604: break;
605: case MAT_SYMMETRIC:
606: case MAT_STRUCTURALLY_SYMMETRIC:
607: case MAT_NOT_SYMMETRIC:
608: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
609: case MAT_HERMITIAN:
610: case MAT_NOT_HERMITIAN:
611: case MAT_SYMMETRY_ETERNAL:
612: case MAT_NOT_SYMMETRY_ETERNAL:
613: break;
614: default:
615: SETERRQ(PETSC_ERR_SUP,"unknown option");
616: }
617: return(0);
618: }
622: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
623: {
624: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
625: int lrow,ierr;
628: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
629: lrow = row - mat->rstart;
630: MatGetRow(mat->A,lrow,nz,idx,v);
631: return(0);
632: }
636: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
637: PetscScalar **v)
638: {
639: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
640: int lrow,ierr;
643: lrow = row - mat->rstart;
644: MatRestoreRow(mat->A,lrow,nz,idx,v);
645: return(0);
646: }
651: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
652: {
653: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
654: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
655: PetscReal sum = 0.0;
656: int ierr,d,i,nd = a->nd,bs = a->bs,len;
657: PetscScalar *dv;
660: if (type == NORM_FROBENIUS) {
661: for (d=0; d<nd; d++) {
662: dv = a->diagv[d];
663: len = a->bdlen[d]*bs*bs;
664: for (i=0; i<len; i++) {
665: #if defined(PETSC_USE_COMPLEX)
666: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
667: #else
668: sum += dv[i]*dv[i];
669: #endif
670: }
671: }
672: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
673: *nrm = sqrt(*nrm);
674: PetscLogFlops(2*A->n*A->m);
675: } else if (type == NORM_1) { /* max column norm */
676: PetscReal *tmp,*tmp2;
677: int j;
678: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
679: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
680: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
681: *nrm = 0.0;
682: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
683: for (j=0; j<mbd->A->n; j++) {
684: if (tmp2[j] > *nrm) *nrm = tmp2[j];
685: }
686: PetscFree(tmp);
687: PetscFree(tmp2);
688: } else if (type == NORM_INFINITY) { /* max row norm */
689: PetscReal normtemp;
690: MatNorm(mbd->A,type,&normtemp);
691: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
692: }
693: return(0);
694: }
698: int MatPrintHelp_MPIBDiag(Mat A)
699: {
700: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
701: int ierr;
704: if (!a->rank) {
705: MatPrintHelp_SeqBDiag(a->A);
706: }
707: return(0);
708: }
712: int MatScale_MPIBDiag(const PetscScalar *alpha,Mat A)
713: {
714: int ierr;
715: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
718: MatScale_SeqBDiag(alpha,a->A);
719: return(0);
720: }
724: int MatSetUpPreallocation_MPIBDiag(Mat A)
725: {
726: int ierr;
729: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
730: return(0);
731: }
733: /* -------------------------------------------------------------------*/
735: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
736: MatGetRow_MPIBDiag,
737: MatRestoreRow_MPIBDiag,
738: MatMult_MPIBDiag,
739: /* 4*/ MatMultAdd_MPIBDiag,
740: MatMultTranspose_MPIBDiag,
741: MatMultTransposeAdd_MPIBDiag,
742: 0,
743: 0,
744: 0,
745: /*10*/ 0,
746: 0,
747: 0,
748: 0,
749: 0,
750: /*15*/ MatGetInfo_MPIBDiag,
751: 0,
752: MatGetDiagonal_MPIBDiag,
753: 0,
754: MatNorm_MPIBDiag,
755: /*20*/ MatAssemblyBegin_MPIBDiag,
756: MatAssemblyEnd_MPIBDiag,
757: 0,
758: MatSetOption_MPIBDiag,
759: MatZeroEntries_MPIBDiag,
760: /*25*/ MatZeroRows_MPIBDiag,
761: 0,
762: 0,
763: 0,
764: 0,
765: /*30*/ MatSetUpPreallocation_MPIBDiag,
766: 0,
767: 0,
768: 0,
769: 0,
770: /*35*/ 0,
771: 0,
772: 0,
773: 0,
774: 0,
775: /*40*/ 0,
776: 0,
777: 0,
778: MatGetValues_MPIBDiag,
779: 0,
780: /*45*/ MatPrintHelp_MPIBDiag,
781: MatScale_MPIBDiag,
782: 0,
783: 0,
784: 0,
785: /*50*/ MatGetBlockSize_MPIBDiag,
786: 0,
787: 0,
788: 0,
789: 0,
790: /*55*/ 0,
791: 0,
792: 0,
793: 0,
794: 0,
795: /*60*/ 0,
796: MatDestroy_MPIBDiag,
797: MatView_MPIBDiag,
798: MatGetPetscMaps_Petsc,
799: 0,
800: /*65*/ 0,
801: 0,
802: 0,
803: 0,
804: 0,
805: /*70*/ 0,
806: 0,
807: 0,
808: 0,
809: 0,
810: /*75*/ 0,
811: 0,
812: 0,
813: 0,
814: 0,
815: /*80*/ 0,
816: 0,
817: 0,
818: 0,
819: /*85*/ MatLoad_MPIBDiag
820: };
822: EXTERN_C_BEGIN
825: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
826: {
827: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
828: int ierr,lrows,lcols,rstart,rend;
829: IS localc,localr;
832: MatGetLocalSize(A,&lrows,&lcols);
833: MatGetOwnershipRange(A,&rstart,&rend);
834: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
835: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
836: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
837: ISDestroy(localr);
838: ISDestroy(localc);
840: *iscopy = PETSC_TRUE;
841: return(0);
842: }
843: EXTERN_C_END
845: EXTERN_C_BEGIN
848: int MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
849: {
850: Mat_MPIBDiag *b;
851: int ierr,i,k,*ldiag,len,nd2;
852: PetscScalar **ldiagv = 0;
853: PetscTruth flg2;
856: B->preallocated = PETSC_TRUE;
857: if (bs == PETSC_DEFAULT) bs = 1;
858: if (nd == PETSC_DEFAULT) nd = 0;
859: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
860: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
861: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
862: if (nd && !diag) {
863: PetscMalloc(nd*sizeof(int),&diag);
864: nd2 = nd;
865: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
866: if (nd2 != nd) {
867: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
868: }
869: } else if (flg2) {
870: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
871: }
873: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
875: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
876: B->n = B->N = PetscMax(B->n,B->N);
878: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
879: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
880: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
882: /* the information in the maps duplicates the information computed below, eventually
883: we should remove the duplicate information that is not contained in the maps */
884: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
885: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
888: b = (Mat_MPIBDiag*)B->data;
889: b->gnd = nd;
891: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
892: b->rowners[0] = 0;
893: for (i=2; i<=b->size; i++) {
894: b->rowners[i] += b->rowners[i-1];
895: }
896: b->rstart = b->rowners[b->rank];
897: b->rend = b->rowners[b->rank+1];
898: b->brstart = (b->rstart)/bs;
899: b->brend = (b->rend)/bs;
902: /* Determine local diagonals; for now, assume global rows = global cols */
903: /* These are sorted in MatCreateSeqBDiag */
904: PetscMalloc((nd+1)*sizeof(int),&ldiag);
905: len = B->M/bs + B->N/bs + 1;
906: PetscMalloc(len*sizeof(int),&b->gdiag);
907: k = 0;
908: PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
909: + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
910: if (diagv) {
911: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
912: }
913: for (i=0; i<nd; i++) {
914: b->gdiag[i] = diag[i];
915: if (diag[i] > 0) { /* lower triangular */
916: if (diag[i] < b->brend) {
917: ldiag[k] = diag[i] - b->brstart;
918: if (diagv) ldiagv[k] = diagv[i];
919: k++;
920: }
921: } else { /* upper triangular */
922: if (B->M/bs - diag[i] > B->N/bs) {
923: if (B->M/bs + diag[i] > b->brstart) {
924: ldiag[k] = diag[i] - b->brstart;
925: if (diagv) ldiagv[k] = diagv[i];
926: k++;
927: }
928: } else {
929: if (B->M/bs > b->brstart) {
930: ldiag[k] = diag[i] - b->brstart;
931: if (diagv) ldiagv[k] = diagv[i];
932: k++;
933: }
934: }
935: }
936: }
938: /* Form local matrix */
939: MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
940: MatSetType(b->A,MATSEQBDIAG);
941: MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
942: PetscLogObjectParent(B,b->A);
943: PetscFree(ldiag);
944: if (ldiagv) {PetscFree(ldiagv);}
946: return(0);
947: }
948: EXTERN_C_END
950: /*MC
951: MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.
953: Options Database Keys:
954: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()
956: Level: beginner
958: .seealso: MatCreateMPIBDiag
959: M*/
961: EXTERN_C_BEGIN
964: int MatCreate_MPIBDiag(Mat B)
965: {
966: Mat_MPIBDiag *b;
967: int ierr;
970: PetscNew(Mat_MPIBDiag,&b);
971: B->data = (void*)b;
972: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
973: B->factor = 0;
974: B->mapping = 0;
976: B->insertmode = NOT_SET_VALUES;
977: MPI_Comm_rank(B->comm,&b->rank);
978: MPI_Comm_size(B->comm,&b->size);
980: /* build local table of row ownerships */
981: PetscMalloc((b->size+2)*sizeof(int),&b->rowners);
983: /* build cache for off array entries formed */
984: MatStashCreate_Private(B->comm,1,&B->stash);
985: b->donotstash = PETSC_FALSE;
987: /* stuff used for matrix-vector multiply */
988: b->lvec = 0;
989: b->Mvctx = 0;
991: /* used for MatSetValues() input */
992: b->roworiented = PETSC_TRUE;
994: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
995: "MatGetDiagonalBlock_MPIBDiag",
996: MatGetDiagonalBlock_MPIBDiag);
997: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
998: "MatMPIBDiagSetPreallocation_MPIBDiag",
999: MatMPIBDiagSetPreallocation_MPIBDiag);
1000: return(0);
1001: }
1002: EXTERN_C_END
1004: /*MC
1005: MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.
1007: This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
1008: and MATMPIBDIAG otherwise.
1010: Options Database Keys:
1011: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()
1013: Level: beginner
1015: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1016: M*/
1018: EXTERN_C_BEGIN
1021: int MatCreate_BDiag(Mat A) {
1022: int ierr,size;
1025: PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1026: MPI_Comm_size(A->comm,&size);
1027: if (size == 1) {
1028: MatSetType(A,MATSEQBDIAG);
1029: } else {
1030: MatSetType(A,MATMPIBDIAG);
1031: }
1032: return(0);
1033: }
1034: EXTERN_C_END
1038: /*@C
1039: MatMPIBDiagSetPreallocation -
1041: Collective on Mat
1043: Input Parameters:
1044: + A - the matrix
1045: . nd - number of block diagonals (global) (optional)
1046: . bs - each element of a diagonal is an bs x bs dense matrix
1047: . diag - optional array of block diagonal numbers (length nd).
1048: For a matrix element A[i,j], where i=row and j=column, the
1049: diagonal number is
1050: $ diag = i/bs - j/bs (integer division)
1051: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1052: needed (expensive).
1053: - diagv - pointer to actual diagonals (in same order as diag array),
1054: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1055: to control memory allocation.
1058: Options Database Keys:
1059: . -mat_block_size <bs> - Sets blocksize
1060: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1062: Notes:
1063: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1064: than it must be used on all processors that share the object for that argument.
1066: The parallel matrix is partitioned across the processors by rows, where
1067: each local rectangular matrix is stored in the uniprocessor block
1068: diagonal format. See the users manual for further details.
1070: The user MUST specify either the local or global numbers of rows
1071: (possibly both).
1073: The case bs=1 (conventional diagonal storage) is implemented as
1074: a special case.
1076: Fortran Notes:
1077: Fortran programmers cannot set diagv; this variable is ignored.
1079: Level: intermediate
1081: .keywords: matrix, block, diagonal, parallel, sparse
1083: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1084: @*/
1085: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,const int diag[],PetscScalar *diagv[])
1086: {
1087: int ierr,(*f)(Mat,int,int,const int[],PetscScalar*[]);
1090: PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1091: if (f) {
1092: (*f)(B,nd,bs,diag,diagv);
1093: }
1094: return(0);
1095: }
1099: /*@C
1100: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1102: Collective on MPI_Comm
1104: Input Parameters:
1105: + comm - MPI communicator
1106: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1107: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1108: . N - number of columns (local and global)
1109: . nd - number of block diagonals (global) (optional)
1110: . bs - each element of a diagonal is an bs x bs dense matrix
1111: . diag - optional array of block diagonal numbers (length nd).
1112: For a matrix element A[i,j], where i=row and j=column, the
1113: diagonal number is
1114: $ diag = i/bs - j/bs (integer division)
1115: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1116: needed (expensive).
1117: - diagv - pointer to actual diagonals (in same order as diag array),
1118: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1119: to control memory allocation.
1121: Output Parameter:
1122: . A - the matrix
1124: Options Database Keys:
1125: . -mat_block_size <bs> - Sets blocksize
1126: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1128: Notes:
1129: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1130: than it must be used on all processors that share the object for that argument.
1132: The parallel matrix is partitioned across the processors by rows, where
1133: each local rectangular matrix is stored in the uniprocessor block
1134: diagonal format. See the users manual for further details.
1136: The user MUST specify either the local or global numbers of rows
1137: (possibly both).
1139: The case bs=1 (conventional diagonal storage) is implemented as
1140: a special case.
1142: Fortran Notes:
1143: Fortran programmers cannot set diagv; this variable is ignored.
1145: Level: intermediate
1147: .keywords: matrix, block, diagonal, parallel, sparse
1149: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1150: @*/
1151: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,const int diag[],PetscScalar *diagv[],Mat *A)
1152: {
1153: int ierr,size;
1156: MatCreate(comm,m,m,M,N,A);
1157: MPI_Comm_size(comm,&size);
1158: if (size > 1) {
1159: MatSetType(*A,MATMPIBDIAG);
1160: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1161: } else {
1162: MatSetType(*A,MATSEQBDIAG);
1163: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1164: }
1165: return(0);
1166: }
1170: /*@C
1171: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1172: For the parallel case, this returns information for the local submatrix.
1174: Input Parameters:
1175: . mat - the matrix, stored in block diagonal format.
1177: Not Collective
1179: Output Parameters:
1180: + m - number of rows
1181: . n - number of columns
1182: . nd - number of block diagonals
1183: . bs - each element of a diagonal is an bs x bs dense matrix
1184: . bdlen - array of total block lengths of block diagonals
1185: . diag - optional array of block diagonal numbers (length nd).
1186: For a matrix element A[i,j], where i=row and j=column, the
1187: diagonal number is
1188: $ diag = i/bs - j/bs (integer division)
1189: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1190: needed (expensive).
1191: - diagv - pointer to actual diagonals (in same order as diag array),
1193: Level: advanced
1195: Notes:
1196: See the users manual for further details regarding this storage format.
1198: .keywords: matrix, block, diagonal, get, data
1200: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1201: @*/
1202: int MatBDiagGetData(Mat mat,int *nd,int *bs,int *diag[],int *bdlen[],PetscScalar ***diagv)
1203: {
1204: Mat_MPIBDiag *pdmat;
1205: Mat_SeqBDiag *dmat = 0;
1206: PetscTruth isseq,ismpi;
1207: int ierr;
1211: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1212: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1213: if (isseq) {
1214: dmat = (Mat_SeqBDiag*)mat->data;
1215: } else if (ismpi) {
1216: pdmat = (Mat_MPIBDiag*)mat->data;
1217: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1218: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1219: *nd = dmat->nd;
1220: *bs = dmat->bs;
1221: *diag = dmat->diag;
1222: *bdlen = dmat->bdlen;
1223: *diagv = dmat->diagv;
1224: return(0);
1225: }
1227: #include petscsys.h
1231: int MatLoad_MPIBDiag(PetscViewer viewer,const MatType type,Mat *newmat)
1232: {
1233: Mat A;
1234: PetscScalar *vals,*svals;
1235: MPI_Comm comm = ((PetscObject)viewer)->comm;
1236: MPI_Status status;
1237: int bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1238: int header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1239: int *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1240: int tag = ((PetscObject)viewer)->tag,extra_rows;
1243: MPI_Comm_size(comm,&size);
1244: MPI_Comm_rank(comm,&rank);
1245: if (!rank) {
1246: PetscViewerBinaryGetDescriptor(viewer,&fd);
1247: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1248: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1249: if (header[3] < 0) {
1250: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1251: }
1252: }
1253: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1254: M = header[1]; N = header[2];
1256: bs = 1; /* uses a block size of 1 by default; */
1257: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1259: /*
1260: This code adds extra rows to make sure the number of rows is
1261: divisible by the blocksize
1262: */
1263: Mbs = M/bs;
1264: extra_rows = bs - M + bs*(Mbs);
1265: if (extra_rows == bs) extra_rows = 0;
1266: else Mbs++;
1267: if (extra_rows && !rank) {
1268: PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n");
1269: }
1271: /* determine ownership of all rows */
1272: m = bs*(Mbs/size + ((Mbs % size) > rank));
1273: PetscMalloc((size+2)*sizeof(int),&rowners);
1274: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1275: rowners[0] = 0;
1276: for (i=2; i<=size; i++) {
1277: rowners[i] += rowners[i-1];
1278: }
1279: rstart = rowners[rank];
1280: rend = rowners[rank+1];
1282: /* distribute row lengths to all processors */
1283: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1284: if (!rank) {
1285: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1286: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1287: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1288: PetscMalloc(size*sizeof(int),&sndcounts);
1289: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1290: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1291: PetscFree(sndcounts);
1292: } else {
1293: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1294: }
1296: if (!rank) {
1297: /* calculate the number of nonzeros on each processor */
1298: PetscMalloc(size*sizeof(int),&procsnz);
1299: PetscMemzero(procsnz,size*sizeof(int));
1300: for (i=0; i<size; i++) {
1301: for (j=rowners[i]; j<rowners[i+1]; j++) {
1302: procsnz[i] += rowlengths[j];
1303: }
1304: }
1305: PetscFree(rowlengths);
1307: /* determine max buffer needed and allocate it */
1308: maxnz = 0;
1309: for (i=0; i<size; i++) {
1310: maxnz = PetscMax(maxnz,procsnz[i]);
1311: }
1312: PetscMalloc(maxnz*sizeof(int),&cols);
1314: /* read in my part of the matrix column indices */
1315: nz = procsnz[0];
1316: PetscMalloc(nz*sizeof(int),&mycols);
1317: if (size == 1) nz -= extra_rows;
1318: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1319: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1321: /* read in every one elses and ship off */
1322: for (i=1; i<size-1; i++) {
1323: nz = procsnz[i];
1324: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1325: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1326: }
1327: /* read in the stuff for the last proc */
1328: if (size != 1) {
1329: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1330: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1331: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1332: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1333: }
1334: PetscFree(cols);
1335: } else {
1336: /* determine buffer space needed for message */
1337: nz = 0;
1338: for (i=0; i<m; i++) {
1339: nz += ourlens[i];
1340: }
1341: PetscMalloc(nz*sizeof(int),&mycols);
1343: /* receive message of column indices*/
1344: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1345: MPI_Get_count(&status,MPI_INT,&maxnz);
1346: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1347: }
1349: MatCreate(comm,m,m,M+extra_rows,N+extra_rows,newmat);
1350: MatSetType(*newmat,type);
1351: MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1352: A = *newmat;
1354: if (!rank) {
1355: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1357: /* read in my part of the matrix numerical values */
1358: nz = procsnz[0];
1359: if (size == 1) nz -= extra_rows;
1360: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1361: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1363: /* insert into matrix */
1364: jj = rstart;
1365: smycols = mycols;
1366: svals = vals;
1367: for (i=0; i<m; i++) {
1368: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1369: smycols += ourlens[i];
1370: svals += ourlens[i];
1371: jj++;
1372: }
1374: /* read in other processors (except the last one) and ship out */
1375: for (i=1; i<size-1; i++) {
1376: nz = procsnz[i];
1377: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1378: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1379: }
1380: /* the last proc */
1381: if (size != 1){
1382: nz = procsnz[i] - extra_rows;
1383: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1384: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1385: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1386: }
1387: PetscFree(procsnz);
1388: } else {
1389: /* receive numeric values */
1390: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1392: /* receive message of values*/
1393: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1394: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1395: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1397: /* insert into matrix */
1398: jj = rstart;
1399: smycols = mycols;
1400: svals = vals;
1401: for (i=0; i<m; i++) {
1402: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1403: smycols += ourlens[i];
1404: svals += ourlens[i];
1405: jj++;
1406: }
1407: }
1408: PetscFree(ourlens);
1409: PetscFree(vals);
1410: PetscFree(mycols);
1411: PetscFree(rowners);
1413: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1414: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1415: return(0);
1416: }