Actual source code: mpirowbs.c
1: /* $Id: mpirowbs.c,v 2.9 2001/08/10 03:30:53 bsmith Exp $*/
3: #include src/mat/impls/rowbs/mpi/mpirowbs.h
5: #define CHUNCKSIZE_LOCAL 10
9: static int MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
10: {
14: if (v) {
15: #if defined(PETSC_USE_LOG)
16: int len = -n*(sizeof(int)+sizeof(PetscScalar));
17: #endif
18: PetscFree(v);
19: PetscLogObjectMemory(A,len);
20: }
21: return(0);
22: }
26: static int MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
27: {
28: int len,ierr;
31: if (!n) {
32: *i = 0; *v = 0;
33: } else {
34: len = n*(sizeof(int) + sizeof(PetscScalar));
35: PetscMalloc(len,v);
36: PetscLogObjectMemory(A,len);
37: *i = (int *)(*v + n);
38: }
39: return(0);
40: }
44: int MatScale_MPIRowbs(const PetscScalar *alphain,Mat inA)
45: {
46: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
47: BSspmat *A = a->A;
48: BSsprow *vs;
49: PetscScalar *ap,alpha = *alphain;
50: int i,m = inA->m,nrow,j;
53: for (i=0; i<m; i++) {
54: vs = A->rows[i];
55: nrow = vs->length;
56: ap = vs->nz;
57: for (j=0; j<nrow; j++) {
58: ap[j] *= alpha;
59: }
60: }
61: PetscLogFlops(a->nz);
62: return(0);
63: }
65: /* ----------------------------------------------------------------- */
68: static int MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
69: {
70: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
71: int ierr,i,len,m = A->m,*tnnz;
72: BSspmat *bsmat;
73: BSsprow *vs;
76: PetscMalloc((m+1)*sizeof(int),&tnnz);
77: if (!nnz) {
78: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
79: if (nz <= 0) nz = 1;
80: for (i=0; i<m; i++) tnnz[i] = nz;
81: nz = nz*m;
82: } else {
83: nz = 0;
84: for (i=0; i<m; i++) {
85: if (nnz[i] <= 0) tnnz[i] = 1;
86: else tnnz[i] = nnz[i];
87: nz += tnnz[i];
88: }
89: }
91: /* Allocate BlockSolve matrix context */
92: PetscNew(BSspmat,&bsif->A);
93: bsmat = bsif->A;
94: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
95: BSset_mat_symmetric(bsmat,PETSC_FALSE);
96: len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
97: PetscMalloc(len,&bsmat->rows);
98: bsmat->num_rows = m;
99: bsmat->global_num_rows = A->M;
100: bsmat->map = bsif->bsmap;
101: vs = (BSsprow*)(bsmat->rows + m);
102: for (i=0; i<m; i++) {
103: bsmat->rows[i] = vs;
104: bsif->imax[i] = tnnz[i];
105: vs->diag_ind = -1;
106: MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
107: /* put zero on diagonal */
108: /*vs->length = 1;
109: vs->col[0] = i + bsif->rstart;
110: vs->nz[0] = 0.0;*/
111: vs->length = 0;
112: vs++;
113: }
114: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
115: bsif->nz = 0;
116: bsif->maxnz = nz;
117: bsif->sorted = 0;
118: bsif->roworiented = PETSC_TRUE;
119: bsif->nonew = 0;
120: bsif->bs_color_single = 0;
122: PetscFree(tnnz);
123: return(0);
124: }
128: static int MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
129: {
130: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
131: BSspmat *A = mat->A;
132: BSsprow *vs;
133: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax,ierr;
134: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
135: PetscScalar *ap,value;
138: for (k=0; k<m; k++) { /* loop over added rows */
139: row = im[k];
140: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
141: if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
142: vs = A->rows[row];
143: ap = vs->nz; rp = vs->col;
144: rmax = imax[row]; nrow = vs->length;
145: a = 0;
146: for (l=0; l<n; l++) { /* loop over added columns */
147: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
148: if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
149: col = in[l]; value = *v++;
150: if (!sorted) a = 0; b = nrow;
151: while (b-a > 5) {
152: t = (b+a)/2;
153: if (rp[t] > col) b = t;
154: else a = t;
155: }
156: for (i=a; i<b; i++) {
157: if (rp[i] > col) break;
158: if (rp[i] == col) {
159: if (addv == ADD_VALUES) ap[i] += value;
160: else ap[i] = value;
161: goto noinsert;
162: }
163: }
164: if (nonew) goto noinsert;
165: if (nrow >= rmax) {
166: /* there is no extra room in row, therefore enlarge */
167: int *itemp,*iout,*iin = vs->col;
168: PetscScalar *vout,*vin = vs->nz,*vtemp;
170: /* malloc new storage space */
171: imax[row] += CHUNCKSIZE_LOCAL;
172: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
173: vout = vtemp; iout = itemp;
174: for (ii=0; ii<i; ii++) {
175: vout[ii] = vin[ii];
176: iout[ii] = iin[ii];
177: }
178: vout[i] = value;
179: iout[i] = col;
180: for (ii=i+1; ii<=nrow; ii++) {
181: vout[ii] = vin[ii-1];
182: iout[ii] = iin[ii-1];
183: }
184: /* free old row storage */
185: if (rmax > 0) {
186: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
187: }
188: vs->col = iout; vs->nz = vout;
189: rmax = imax[row];
190: mat->maxnz += CHUNCKSIZE_LOCAL;
191: mat->reallocs++;
192: } else {
193: /* shift higher columns over to make room for newie */
194: for (ii=nrow-1; ii>=i; ii--) {
195: rp[ii+1] = rp[ii];
196: ap[ii+1] = ap[ii];
197: }
198: rp[i] = col;
199: ap[i] = value;
200: }
201: nrow++;
202: mat->nz++;
203: AA->same_nonzero = PETSC_FALSE;
204: noinsert:;
205: a = i + 1;
206: }
207: vs->length = nrow;
208: }
209: return(0);
210: }
215: static int MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
216: {
218: return(0);
219: }
223: static int MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
224: {
225: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
226: BSspmat *A = a->A;
227: BSsprow *vs;
228: int i,j,rstart = a->rstart;
231: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
233: /* Mark location of diagonal */
234: for (i=0; i<AA->m; i++) {
235: vs = A->rows[i];
236: for (j=0; j<vs->length; j++) {
237: if (vs->col[j] == i + rstart) {
238: vs->diag_ind = j;
239: break;
240: }
241: }
242: if (vs->diag_ind == -1) {
243: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
244: }
245: }
246: return(0);
247: }
251: static int MatZeroRows_MPIRowbs_local(Mat A,IS is,const PetscScalar *diag)
252: {
253: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
254: BSspmat *l = a->A;
255: int i,ierr,N,*rz,m = A->m - 1,col,base=a->rowners[a->rank];
258: ISGetLocalSize(is,&N);
259: ISGetIndices(is,&rz);
260: if (a->keepzeroedrows) {
261: for (i=0; i<N; i++) {
262: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
263: PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
264: if (diag) {
265: col=rz[i]+base;
266: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
267: }
268: }
269: } else {
270: if (diag) {
271: for (i=0; i<N; i++) {
272: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
273: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
274: l->rows[rz[i]]->length = 1;
275: l->rows[rz[i]]->nz[0] = *diag;
276: l->rows[rz[i]]->col[0] = a->rstart + rz[i];
277: } else {
278: col=rz[i]+base;
279: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
280: }
281: }
282: } else {
283: for (i=0; i<N; i++) {
284: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
285: l->rows[rz[i]]->length = 0;
286: }
287: }
288: A->same_nonzero = PETSC_FALSE;
289: }
290: ISRestoreIndices(is,&rz);
291: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
292: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
293: return(0);
294: }
298: static int MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
299: {
300: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
301: BSsprow *vs,**rs;
302: PetscScalar *xv;
303: PetscReal sum = 0.0;
304: int *xi,nz,i,j,ierr;
307: rs = mat->A->rows;
308: if (type == NORM_FROBENIUS) {
309: for (i=0; i<A->m; i++) {
310: vs = *rs++;
311: nz = vs->length;
312: xv = vs->nz;
313: while (nz--) {
314: #if defined(PETSC_USE_COMPLEX)
315: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
316: #else
317: sum += (*xv)*(*xv); xv++;
318: #endif
319: }
320: }
321: *norm = sqrt(sum);
322: } else if (type == NORM_1) { /* max column norm */
323: PetscReal *tmp;
324: PetscMalloc(A->n*sizeof(PetscReal),&tmp);
325: PetscMemzero(tmp,A->n*sizeof(PetscReal));
326: *norm = 0.0;
327: for (i=0; i<A->m; i++) {
328: vs = *rs++;
329: nz = vs->length;
330: xi = vs->col;
331: xv = vs->nz;
332: while (nz--) {
333: tmp[*xi] += PetscAbsScalar(*xv);
334: xi++; xv++;
335: }
336: }
337: for (j=0; j<A->n; j++) {
338: if (tmp[j] > *norm) *norm = tmp[j];
339: }
340: PetscFree(tmp);
341: } else if (type == NORM_INFINITY) { /* max row norm */
342: *norm = 0.0;
343: for (i=0; i<A->m; i++) {
344: vs = *rs++;
345: nz = vs->length;
346: xv = vs->nz;
347: sum = 0.0;
348: while (nz--) {
349: sum += PetscAbsScalar(*xv); xv++;
350: }
351: if (sum > *norm) *norm = sum;
352: }
353: } else {
354: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
355: }
356: return(0);
357: }
359: /* ----------------------------------------------------------------- */
363: int MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
364: {
365: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
366: int ierr,i,j,row,col,rstart = a->rstart,rend = a->rend;
367: PetscTruth roworiented = a->roworiented;
370: /* Note: There's no need to "unscale" the matrix, since scaling is
371: confined to a->pA, and we're working with a->A here */
372: for (i=0; i<m; i++) {
373: if (im[i] < 0) continue;
374: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
375: if (im[i] >= rstart && im[i] < rend) {
376: row = im[i] - rstart;
377: for (j=0; j<n; j++) {
378: if (in[j] < 0) continue;
379: if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
380: if (in[j] >= 0 && in[j] < mat->N){
381: col = in[j];
382: if (roworiented) {
383: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
384: } else {
385: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
386: }
387: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
388: }
389: } else {
390: if (!a->donotstash) {
391: if (roworiented) {
392: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
393: } else {
394: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
395: }
396: }
397: }
398: }
399: return(0);
400: }
404: int MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
405: {
406: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
407: MPI_Comm comm = mat->comm;
408: int ierr,nstash,reallocs;
409: InsertMode addv;
412: /* Note: There's no need to "unscale" the matrix, since scaling is
413: confined to a->pA, and we're working with a->A here */
415: /* make sure all processors are either in INSERTMODE or ADDMODE */
416: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
417: if (addv == (ADD_VALUES|INSERT_VALUES)) {
418: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
419: }
420: mat->insertmode = addv; /* in case this processor had no cache */
422: MatStashScatterBegin_Private(&mat->stash,a->rowners);
423: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
424: PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
425: return(0);
426: }
428: #include petscviewer.h
432: static int MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
433: {
434: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
435: int ierr,i,j;
436: PetscTruth isascii;
437: BSspmat *A = a->A;
438: BSsprow **rs = A->rows;
439: PetscViewerFormat format;
442: PetscViewerGetFormat(viewer,&format);
443: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
445: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
446: int ind_l,ind_g,clq_l,clq_g,color;
447: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
448: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
449: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
450: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
451: color = BSnum_colors(a->pA);CHKERRBS(0);
452: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
453: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
454: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
455: for (i=0; i<A->num_rows; i++) {
456: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
457: for (j=0; j<rs[i]->length; j++) {
458: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
459: }
460: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
461: }
462: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
463: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
464: } else {
465: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
466: for (i=0; i<A->num_rows; i++) {
467: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
468: for (j=0; j<rs[i]->length; j++) {
469: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
470: }
471: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
472: }
473: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
474: }
475: PetscViewerFlush(viewer);
476: return(0);
477: }
481: static int MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
482: {
483: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
484: int ierr,i,M,m,rank,size,*sbuff,*rowlengths;
485: int *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
486: BSspmat *A = a->A;
487: BSsprow **rs = A->rows;
488: MPI_Comm comm = mat->comm;
489: MPI_Status status;
490: PetscScalar *vals;
491: MatInfo info;
494: MPI_Comm_size(comm,&size);
495: MPI_Comm_rank(comm,&rank);
497: M = mat->M; m = mat->m;
498: /* First gather together on the first processor the lengths of
499: each row, and write them out to the file */
500: PetscMalloc(m*sizeof(int),&sbuff);
501: for (i=0; i<A->num_rows; i++) {
502: sbuff[i] = rs[i]->length;
503: }
504: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
505: if (!rank) {
506: PetscViewerBinaryGetDescriptor(viewer,&fd);
507: PetscMalloc((4+M)*sizeof(int),&rowlengths);
508: PetscMalloc(size*sizeof(int),&recvcts);
509: recvdisp = a->rowners;
510: for (i=0; i<size; i++) {
511: recvcts[i] = recvdisp[i+1] - recvdisp[i];
512: }
513: /* first four elements of rowlength are the header */
514: rowlengths[0] = mat->cookie;
515: rowlengths[1] = mat->M;
516: rowlengths[2] = mat->N;
517: rowlengths[3] = (int)info.nz_used;
518: MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
519: PetscFree(sbuff);
520: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,0);
521: /* count the number of nonzeros on each processor */
522: PetscMemzero(recvcts,size*sizeof(int));
523: for (i=0; i<size; i++) {
524: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
525: recvcts[i] += rowlengths[j+3];
526: }
527: }
528: /* allocate buffer long enough to hold largest one */
529: maxnz = 0;
530: for (i=0; i<size; i++) {
531: maxnz = PetscMax(maxnz,recvcts[i]);
532: }
533: PetscFree(rowlengths);
534: PetscFree(recvcts);
535: PetscMalloc(maxnz*sizeof(int),&cols);
537: /* binary store column indices for 0th processor */
538: nz = 0;
539: for (i=0; i<A->num_rows; i++) {
540: for (j=0; j<rs[i]->length; j++) {
541: cols[nz++] = rs[i]->col[j];
542: }
543: }
544: PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
546: /* receive and store column indices for all other processors */
547: for (i=1; i<size; i++) {
548: /* should tell processor that I am now ready and to begin the send */
549: MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
550: MPI_Get_count(&status,MPI_INT,&nz);
551: PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
552: }
553: PetscFree(cols);
554: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
556: /* binary store values for 0th processor */
557: nz = 0;
558: for (i=0; i<A->num_rows; i++) {
559: for (j=0; j<rs[i]->length; j++) {
560: vals[nz++] = rs[i]->nz[j];
561: }
562: }
563: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
565: /* receive and store nonzeros for all other processors */
566: for (i=1; i<size; i++) {
567: /* should tell processor that I am now ready and to begin the send */
568: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
569: MPI_Get_count(&status,MPIU_SCALAR,&nz);
570: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
571: }
572: PetscFree(vals);
573: } else {
574: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
575: PetscFree(sbuff);
577: /* count local nonzeros */
578: nz = 0;
579: for (i=0; i<A->num_rows; i++) {
580: for (j=0; j<rs[i]->length; j++) {
581: nz++;
582: }
583: }
584: /* copy into buffer column indices */
585: PetscMalloc(nz*sizeof(int),&cols);
586: nz = 0;
587: for (i=0; i<A->num_rows; i++) {
588: for (j=0; j<rs[i]->length; j++) {
589: cols[nz++] = rs[i]->col[j];
590: }
591: }
592: /* send */ /* should wait until processor zero tells me to go */
593: MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
594: PetscFree(cols);
596: /* copy into buffer column values */
597: PetscMalloc(nz*sizeof(PetscScalar),&vals);
598: nz = 0;
599: for (i=0; i<A->num_rows; i++) {
600: for (j=0; j<rs[i]->length; j++) {
601: vals[nz++] = rs[i]->nz[j];
602: }
603: }
604: /* send */ /* should wait until processor zero tells me to go */
605: MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
606: PetscFree(vals);
607: }
609: return(0);
610: }
614: int MatView_MPIRowbs(Mat mat,PetscViewer viewer)
615: {
616: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
617: int ierr;
618: PetscTruth isascii,isbinary;
621: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
622: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
623: if (!bsif->blocksolveassembly) {
624: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
625: }
626: if (isascii) {
627: MatView_MPIRowbs_ASCII(mat,viewer);
628: } else if (isbinary) {
629: MatView_MPIRowbs_Binary(mat,viewer);
630: } else {
631: SETERRQ1(1,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
632: }
633: return(0);
634: }
635:
638: static int MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
639: {
640: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
641: BSspmat *A = a->A;
642: BSsprow *vs;
643: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
644: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
645: int ctr_j,*sbuf1_j,k,ierr;
646: PetscScalar val=0.0;
647: MPI_Comm comm;
648: MPI_Request *s_waits1,*r_waits1;
649: MPI_Status *s_status,*r_status;
652: comm = mat->comm;
653: tag = mat->tag;
654: size = a->size;
655: rank = a->rank;
656: M = mat->M;
657: rstart = a->rstart;
659: PetscMalloc(M*sizeof(int),&rtable);
660: /* Create hash table for the mapping :row -> proc */
661: for (i=0,j=0; i<size; i++) {
662: len = a->rowners[i+1];
663: for (; j<len; j++) {
664: rtable[j] = i;
665: }
666: }
668: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
669: required. Based on this, buffers are allocated, and data copied into them. */
670: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
671: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
672: w4 = w3 + size; /* temp work space used in determining w1, w3 */
673: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
675: for (i=0; i<mat->m; i++) {
676: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
677: vs = A->rows[i];
678: for (j=0; j<vs->length; j++) {
679: proc = rtable[vs->col[j]];
680: w4[proc]++;
681: }
682: for (j=0; j<size; j++) {
683: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
684: }
685: }
686:
687: nrqs = 0; /* number of outgoing messages */
688: msz = 0; /* total mesg length (for all proc */
689: w1[2*rank] = 0; /* no mesg sent to itself */
690: w3[rank] = 0;
691: for (i=0; i<size; i++) {
692: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
693: }
694: /* pa - is list of processors to communicate with */
695: PetscMalloc((nrqs+1)*sizeof(int),&pa);
696: for (i=0,j=0; i<size; i++) {
697: if (w1[2*i]) {pa[j] = i; j++;}
698: }
700: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
701: for (i=0; i<nrqs; i++) {
702: j = pa[i];
703: w1[2*j] += w1[2*j+1] + 2*w3[j];
704: msz += w1[2*j];
705: }
706:
707: /* Do a global reduction to determine how many messages to expect */
708: PetscMaxSum(comm,w1,&bsz,&nrqr);
710: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
711: len = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
712: PetscMalloc(len,&rbuf1);
713: rbuf1[0] = (int*)(rbuf1 + nrqr);
714: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
716: /* Post the receives */
717: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
718: for (i=0; i<nrqr; ++i){
719: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
720: }
721:
722: /* Allocate Memory for outgoing messages */
723: len = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
724: PetscMalloc(len,&sbuf1);
725: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
726: PetscMemzero(sbuf1,2*size*sizeof(int*));
727: tmp = (int*)(sbuf1 + 2*size);
728: ctr = tmp + msz;
730: {
731: int *iptr = tmp,ict = 0;
732: for (i=0; i<nrqs; i++) {
733: j = pa[i];
734: iptr += ict;
735: sbuf1[j] = iptr;
736: ict = w1[2*j];
737: }
738: }
740: /* Form the outgoing messages */
741: /* Clean up the header space */
742: for (i=0; i<nrqs; i++) {
743: j = pa[i];
744: sbuf1[j][0] = 0;
745: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
746: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
747: }
749: /* Parse the matrix and copy the data into sbuf1 */
750: for (i=0; i<mat->m; i++) {
751: PetscMemzero(ctr,size*sizeof(int));
752: vs = A->rows[i];
753: for (j=0; j<vs->length; j++) {
754: col = vs->col[j];
755: proc = rtable[col];
756: if (proc != rank) { /* copy to the outgoing buffer */
757: ctr[proc]++;
758: *ptr[proc] = col;
759: ptr[proc]++;
760: } else {
761: row = col - rstart;
762: col = i + rstart;
763: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
764: }
765: }
766: /* Update the headers for the current row */
767: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
768: if ((ctr_j = ctr[j])) {
769: sbuf1_j = sbuf1[j];
770: k = ++sbuf1_j[0];
771: sbuf1_j[2*k] = ctr_j;
772: sbuf1_j[2*k-1] = i + rstart;
773: }
774: }
775: }
776: /* Check Validity of the outgoing messages */
777: {
778: int sum;
779: for (i=0 ; i<nrqs ; i++) {
780: j = pa[i];
781: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
782: }
784: for (i=0 ; i<nrqs ; i++) {
785: j = pa[i];
786: sum = 1;
787: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
788: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
789: }
790: }
791:
792: /* Now post the sends */
793: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
794: for (i=0; i<nrqs; ++i) {
795: j = pa[i];
796: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
797: }
798:
799: /* Receive messages*/
800: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
801: for (i=0; i<nrqr; ++i) {
802: MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
803: /* Process the Message */
804: {
805: int *rbuf1_i,n_row,ct1;
807: rbuf1_i = rbuf1[idx];
808: n_row = rbuf1_i[0];
809: ct1 = 2*n_row+1;
810: val = 0.0;
811: /* Optimise this later */
812: for (j=1; j<=n_row; j++) {
813: col = rbuf1_i[2*j-1];
814: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
815: row = rbuf1_i[ct1] - rstart;
816: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
817: }
818: }
819: }
820: }
822: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
823: MPI_Waitall(nrqs,s_waits1,s_status);
825: PetscFree(rtable);
826: PetscFree(w1);
827: PetscFree(pa);
828: PetscFree(rbuf1);
829: PetscFree(sbuf1);
830: PetscFree(r_waits1);
831: PetscFree(s_waits1);
832: PetscFree(r_status);
833: PetscFree(s_status);
834: return(0);
835: }
837: /*
838: This does the BlockSolve portion of the matrix assembly.
839: It is provided in a seperate routine so that users can
840: operate on the matrix (using MatScale(), MatShift() etc.) after
841: the matrix has been assembled but before BlockSolve has sucked it
842: in and devoured it.
843: */
846: int MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
847: {
848: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
849: int ierr,ldim,low,high,i;
850: PetscScalar *diag;
853: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
854: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
855: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
856: }
858: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
859: /* Indicates bypassing cliques in coloring */
860: if (a->bs_color_single) {
861: BSctx_set_si(a->procinfo,100);
862: }
863: /* Form permuted matrix for efficient parallel execution */
864: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
865: /* Set up the communication */
866: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
867: } else {
868: /* Repermute the matrix */
869: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
870: }
872: /* Symmetrically scale the matrix by the diagonal */
873: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
875: /* Store inverse of square root of permuted diagonal scaling matrix */
876: VecGetLocalSize(a->diag,&ldim);
877: VecGetOwnershipRange(a->diag,&low,&high);
878: VecGetArray(a->diag,&diag);
879: for (i=0; i<ldim; i++) {
880: if (a->pA->scale_diag[i] != 0.0) {
881: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
882: } else {
883: diag[i] = 1.0;
884: }
885: }
886: VecRestoreArray(a->diag,&diag);
887: a->assembled_icc_storage = a->A->icc_storage;
888: a->blocksolveassembly = 1;
889: mat->was_assembled = PETSC_TRUE;
890: mat->same_nonzero = PETSC_TRUE;
891: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n");
892: return(0);
893: }
897: int MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
898: {
899: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
900: int i,n,row,col,*rows,*cols,ierr,rstart,nzcount,flg,j,ncols;
901: PetscScalar *vals,val;
902: InsertMode addv = mat->insertmode;
905: while (1) {
906: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
907: if (!flg) break;
908:
909: for (i=0; i<n;) {
910: /* Now identify the consecutive vals belonging to the same row */
911: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
912: if (j < n) ncols = j-i;
913: else ncols = n-i;
914: /* Now assemble all these values with a single function call */
915: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
916: i = j;
917: }
918: }
919: MatStashScatterEnd_Private(&mat->stash);
921: rstart = a->rstart;
922: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
923: /* BlockSolve requires that the matrix is structurally symmetric */
924: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
925: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
926: }
927:
928: /* BlockSolve requires that all the diagonal elements are set */
929: val = 0.0;
930: for (i=0; i<mat->m; i++) {
931: row = i; col = i + rstart;
932: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
933: }
934:
935: MatAssemblyBegin_MPIRowbs_local(mat,mode);
936: MatAssemblyEnd_MPIRowbs_local(mat,mode);
937:
938: a->blocksolveassembly = 0;
939: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz);
940: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
941: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs);
942: return(0);
943: }
947: int MatZeroEntries_MPIRowbs(Mat mat)
948: {
949: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
950: BSspmat *A = l->A;
951: BSsprow *vs;
952: int i,j;
955: for (i=0; i <mat->m; i++) {
956: vs = A->rows[i];
957: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
958: }
959: return(0);
960: }
962: /* the code does not do the diagonal entries correctly unless the
963: matrix is square and the column and row owerships are identical.
964: This is a BUG.
965: */
969: int MatZeroRows_MPIRowbs(Mat A,IS is,const PetscScalar *diag)
970: {
971: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
972: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
973: int *nprocs,j,idx,nsends;
974: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
975: int *rvalues,tag = A->tag,count,base,slen,n,*source;
976: int *lens,imdex,*lrows,*values;
977: MPI_Comm comm = A->comm;
978: MPI_Request *send_waits,*recv_waits;
979: MPI_Status recv_status,*send_status;
980: IS istmp;
981: PetscTruth found;
984: ISGetLocalSize(is,&N);
985: ISGetIndices(is,&rows);
987: /* first count number of contributors to each processor */
988: PetscMalloc(2*size*sizeof(int),&nprocs);
989: PetscMemzero(nprocs,2*size*sizeof(int));
990: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
991: for (i=0; i<N; i++) {
992: idx = rows[i];
993: found = PETSC_FALSE;
994: for (j=0; j<size; j++) {
995: if (idx >= owners[j] && idx < owners[j+1]) {
996: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
997: }
998: }
999: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1000: }
1001: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
1003: /* inform other processors of number of messages and max length*/
1004: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1006: /* post receives: */
1007: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1008: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1009: for (i=0; i<nrecvs; i++) {
1010: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1011: }
1013: /* do sends:
1014: 1) starts[i] gives the starting index in svalues for stuff going to
1015: the ith processor
1016: */
1017: PetscMalloc((N+1)*sizeof(int),&svalues);
1018: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1019: PetscMalloc((size+1)*sizeof(int),&starts);
1020: starts[0] = 0;
1021: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1022: for (i=0; i<N; i++) {
1023: svalues[starts[owner[i]]++] = rows[i];
1024: }
1025: ISRestoreIndices(is,&rows);
1027: starts[0] = 0;
1028: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1029: count = 0;
1030: for (i=0; i<size; i++) {
1031: if (nprocs[2*i+1]) {
1032: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1033: }
1034: }
1035: PetscFree(starts);
1037: base = owners[rank];
1039: /* wait on receives */
1040: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1041: source = lens + nrecvs;
1042: count = nrecvs; slen = 0;
1043: while (count) {
1044: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1045: /* unpack receives into our local space */
1046: MPI_Get_count(&recv_status,MPI_INT,&n);
1047: source[imdex] = recv_status.MPI_SOURCE;
1048: lens[imdex] = n;
1049: slen += n;
1050: count--;
1051: }
1052: PetscFree(recv_waits);
1053:
1054: /* move the data into the send scatter */
1055: PetscMalloc((slen+1)*sizeof(int),&lrows);
1056: count = 0;
1057: for (i=0; i<nrecvs; i++) {
1058: values = rvalues + i*nmax;
1059: for (j=0; j<lens[i]; j++) {
1060: lrows[count++] = values[j] - base;
1061: }
1062: }
1063: PetscFree(rvalues);
1064: PetscFree(lens);
1065: PetscFree(owner);
1066: PetscFree(nprocs);
1067:
1068: /* actually zap the local rows */
1069: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1070: PetscLogObjectParent(A,istmp);
1071: PetscFree(lrows);
1072: MatZeroRows_MPIRowbs_local(A,istmp,diag);
1073: ISDestroy(istmp);
1075: /* wait on sends */
1076: if (nsends) {
1077: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1078: MPI_Waitall(nsends,send_waits,send_status);
1079: PetscFree(send_status);
1080: }
1081: PetscFree(send_waits);
1082: PetscFree(svalues);
1084: return(0);
1085: }
1089: int MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1090: {
1091: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1092: BSsprow *vs,**rs;
1093: PetscScalar *xv;
1094: PetscReal sum = 0.0;
1095: int *xi,nz,i,j,ierr;
1098: if (a->size == 1) {
1099: MatNorm_MPIRowbs_local(mat,type,norm);
1100: } else {
1101: rs = a->A->rows;
1102: if (type == NORM_FROBENIUS) {
1103: for (i=0; i<mat->m; i++) {
1104: vs = *rs++;
1105: nz = vs->length;
1106: xv = vs->nz;
1107: while (nz--) {
1108: #if defined(PETSC_USE_COMPLEX)
1109: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1110: #else
1111: sum += (*xv)*(*xv); xv++;
1112: #endif
1113: }
1114: }
1115: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1116: *norm = sqrt(*norm);
1117: } else if (type == NORM_1) { /* max column norm */
1118: PetscReal *tmp,*tmp2;
1119: PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1120: PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1121: PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1122: *norm = 0.0;
1123: for (i=0; i<mat->m; i++) {
1124: vs = *rs++;
1125: nz = vs->length;
1126: xi = vs->col;
1127: xv = vs->nz;
1128: while (nz--) {
1129: tmp[*xi] += PetscAbsScalar(*xv);
1130: xi++; xv++;
1131: }
1132: }
1133: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1134: for (j=0; j<mat->n; j++) {
1135: if (tmp2[j] > *norm) *norm = tmp2[j];
1136: }
1137: PetscFree(tmp);
1138: PetscFree(tmp2);
1139: } else if (type == NORM_INFINITY) { /* max row norm */
1140: PetscReal ntemp = 0.0;
1141: for (i=0; i<mat->m; i++) {
1142: vs = *rs++;
1143: nz = vs->length;
1144: xv = vs->nz;
1145: sum = 0.0;
1146: while (nz--) {
1147: sum += PetscAbsScalar(*xv); xv++;
1148: }
1149: if (sum > ntemp) ntemp = sum;
1150: }
1151: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1152: } else {
1153: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1154: }
1155: }
1156: return(0);
1157: }
1161: int MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1162: {
1163: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1164: BSprocinfo *bspinfo = bsif->procinfo;
1165: PetscScalar *xxa,*xworka,*yya;
1166: int ierr;
1169: if (!bsif->blocksolveassembly) {
1170: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1171: }
1173: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1174: if (!bsif->vecs_permscale) {
1175: VecGetArray(bsif->xwork,&xworka);
1176: VecGetArray(xx,&xxa);
1177: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1178: VecRestoreArray(bsif->xwork,&xworka);
1179: VecRestoreArray(xx,&xxa);
1180: VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1181: }
1183: VecGetArray(xx,&xxa);
1184: VecGetArray(yy,&yya);
1185: /* Do lower triangular multiplication: [ y = L * xwork ] */
1186: if (bspinfo->single) {
1187: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1188: } else {
1189: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190: }
1191:
1192: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1193: if (mat->symmetric) {
1194: if (bspinfo->single){
1195: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1196: } else {
1197: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1198: }
1199: }
1200: /* not needed for ILU version since forward does it all */
1201: VecRestoreArray(xx,&xxa);
1202: VecRestoreArray(yy,&yya);
1204: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1205: if (!bsif->vecs_permscale) {
1206: VecGetArray(bsif->xwork,&xworka);
1207: VecGetArray(xx,&xxa);
1208: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1209: VecRestoreArray(bsif->xwork,&xworka);
1210: VecRestoreArray(xx,&xxa);
1211: VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1212: VecGetArray(bsif->xwork,&xworka);
1213: VecGetArray(yy,&yya);
1214: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1215: VecRestoreArray(bsif->xwork,&xworka);
1216: VecRestoreArray(yy,&yya);
1217: }
1218: PetscLogFlops(2*bsif->nz - mat->m);
1220: return(0);
1221: }
1225: int MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1226: {
1227: int ierr;
1228: PetscScalar one = 1.0;
1231: (*mat->ops->mult)(mat,xx,zz);
1232: VecAXPY(&one,yy,zz);
1233: return(0);
1234: }
1238: int MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1239: {
1240: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1241: PetscReal isend[5],irecv[5];
1242: int ierr;
1245: info->rows_global = (double)A->M;
1246: info->columns_global = (double)A->N;
1247: info->rows_local = (double)A->m;
1248: info->columns_local = (double)A->N;
1249: info->block_size = 1.0;
1250: info->mallocs = (double)mat->reallocs;
1251: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1252: isend[3] = A->mem; isend[4] = info->mallocs;
1254: if (flag == MAT_LOCAL) {
1255: info->nz_used = isend[0];
1256: info->nz_allocated = isend[1];
1257: info->nz_unneeded = isend[2];
1258: info->memory = isend[3];
1259: info->mallocs = isend[4];
1260: } else if (flag == MAT_GLOBAL_MAX) {
1261: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1262: info->nz_used = irecv[0];
1263: info->nz_allocated = irecv[1];
1264: info->nz_unneeded = irecv[2];
1265: info->memory = irecv[3];
1266: info->mallocs = irecv[4];
1267: } else if (flag == MAT_GLOBAL_SUM) {
1268: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1269: info->nz_used = irecv[0];
1270: info->nz_allocated = irecv[1];
1271: info->nz_unneeded = irecv[2];
1272: info->memory = irecv[3];
1273: info->mallocs = irecv[4];
1274: }
1275: return(0);
1276: }
1280: int MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1281: {
1282: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1283: BSsprow **rs = a->A->rows;
1284: int i,n,ierr;
1285: PetscScalar *x,zero = 0.0;
1288: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1289: if (!a->blocksolveassembly) {
1290: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1291: }
1293: VecSet(&zero,v);
1294: VecGetLocalSize(v,&n);
1295: if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1296: VecGetArray(v,&x);
1297: for (i=0; i<mat->m; i++) {
1298: x[i] = rs[i]->nz[rs[i]->diag_ind];
1299: }
1300: VecRestoreArray(v,&x);
1301: return(0);
1302: }
1306: int MatDestroy_MPIRowbs(Mat mat)
1307: {
1308: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1309: BSspmat *A = a->A;
1310: BSsprow *vs;
1311: int i,ierr;
1314: #if defined(PETSC_USE_LOG)
1315: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1316: #endif
1317: PetscFree(a->rowners);
1318: MatStashDestroy_Private(&mat->stash);
1319: if (a->bsmap) {
1320: if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1321: if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1322: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1323: PetscFree(a->bsmap);
1324: }
1326: if (A) {
1327: for (i=0; i<mat->m; i++) {
1328: vs = A->rows[i];
1329: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1330: }
1331: /* Note: A->map = a->bsmap is freed above */
1332: PetscFree(A->rows);
1333: PetscFree(A);
1334: }
1335: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1336: if (a->diag) {VecDestroy(a->diag);}
1337: if (a->xwork) {VecDestroy(a->xwork);}
1338: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1339: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1340: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1341: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1342: if (a->imax) {PetscFree(a->imax);}
1343: MPI_Comm_free(&(a->comm_mpirowbs));
1344: PetscFree(a);
1345: return(0);
1346: }
1350: int MatSetOption_MPIRowbs(Mat A,MatOption op)
1351: {
1352: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1355: switch (op) {
1356: case MAT_ROW_ORIENTED:
1357: a->roworiented = PETSC_TRUE;
1358: break;
1359: case MAT_COLUMN_ORIENTED:
1360: a->roworiented = PETSC_FALSE;
1361: break;
1362: case MAT_COLUMNS_SORTED:
1363: a->sorted = 1;
1364: break;
1365: case MAT_COLUMNS_UNSORTED:
1366: a->sorted = 0;
1367: break;
1368: case MAT_NO_NEW_NONZERO_LOCATIONS:
1369: a->nonew = 1;
1370: break;
1371: case MAT_YES_NEW_NONZERO_LOCATIONS:
1372: a->nonew = 0;
1373: break;
1374: case MAT_DO_NOT_USE_INODES:
1375: a->bs_color_single = 1;
1376: break;
1377: case MAT_YES_NEW_DIAGONALS:
1378: case MAT_ROWS_SORTED:
1379: case MAT_NEW_NONZERO_LOCATION_ERR:
1380: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381: case MAT_ROWS_UNSORTED:
1382: case MAT_USE_HASH_TABLE:
1383: PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignored\n");
1384: break;
1385: case MAT_IGNORE_OFF_PROC_ENTRIES:
1386: a->donotstash = PETSC_TRUE;
1387: break;
1388: case MAT_NO_NEW_DIAGONALS:
1389: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1390: break;
1391: case MAT_KEEP_ZEROED_ROWS:
1392: a->keepzeroedrows = PETSC_TRUE;
1393: break;
1394: case MAT_SYMMETRIC:
1395: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1396: break;
1397: case MAT_STRUCTURALLY_SYMMETRIC:
1398: case MAT_NOT_SYMMETRIC:
1399: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1400: case MAT_HERMITIAN:
1401: case MAT_NOT_HERMITIAN:
1402: case MAT_SYMMETRY_ETERNAL:
1403: case MAT_NOT_SYMMETRY_ETERNAL:
1404: break;
1405: default:
1406: SETERRQ(PETSC_ERR_SUP,"unknown option");
1407: break;
1408: }
1409: return(0);
1410: }
1414: int MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1415: {
1416: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1417: BSspmat *A = mat->A;
1418: BSsprow *rs;
1419:
1421: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1423: rs = A->rows[row - mat->rstart];
1424: *nz = rs->length;
1425: if (v) *v = rs->nz;
1426: if (idx) *idx = rs->col;
1427: return(0);
1428: }
1432: int MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1433: {
1435: return(0);
1436: }
1438: /* ------------------------------------------------------------------ */
1442: int MatPrintHelp_MPIRowbs(Mat A)
1443: {
1444: static PetscTruth called = PETSC_FALSE;
1445: MPI_Comm comm = A->comm;
1446: int ierr;
1449: if (called) {return(0);} else called = PETSC_TRUE;
1450: (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1451: (*PetscHelpPrintf)(comm," -mat_rowbs_no_inode - Do not use inodes\n");
1452: return(0);
1453: }
1457: int MatSetUpPreallocation_MPIRowbs(Mat A)
1458: {
1459: int ierr;
1462: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1463: return(0);
1464: }
1466: /* -------------------------------------------------------------------*/
1467: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1468: MatGetRow_MPIRowbs,
1469: MatRestoreRow_MPIRowbs,
1470: MatMult_MPIRowbs,
1471: /* 4*/ MatMultAdd_MPIRowbs,
1472: MatMult_MPIRowbs,
1473: MatMultAdd_MPIRowbs,
1474: MatSolve_MPIRowbs,
1475: 0,
1476: 0,
1477: /*10*/ 0,
1478: 0,
1479: 0,
1480: 0,
1481: 0,
1482: /*15*/ MatGetInfo_MPIRowbs,
1483: 0,
1484: MatGetDiagonal_MPIRowbs,
1485: 0,
1486: MatNorm_MPIRowbs,
1487: /*20*/ MatAssemblyBegin_MPIRowbs,
1488: MatAssemblyEnd_MPIRowbs,
1489: 0,
1490: MatSetOption_MPIRowbs,
1491: MatZeroEntries_MPIRowbs,
1492: /*25*/ MatZeroRows_MPIRowbs,
1493: 0,
1494: MatLUFactorNumeric_MPIRowbs,
1495: 0,
1496: MatCholeskyFactorNumeric_MPIRowbs,
1497: /*30*/ MatSetUpPreallocation_MPIRowbs,
1498: MatILUFactorSymbolic_MPIRowbs,
1499: MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1500: 0,
1501: 0,
1502: /*35*/ 0,
1503: MatForwardSolve_MPIRowbs,
1504: MatBackwardSolve_MPIRowbs,
1505: 0,
1506: 0,
1507: /*40*/ 0,
1508: MatGetSubMatrices_MPIRowbs,
1509: 0,
1510: 0,
1511: 0,
1512: /*45*/ MatPrintHelp_MPIRowbs,
1513: MatScale_MPIRowbs,
1514: 0,
1515: 0,
1516: 0,
1517: /*50*/ 0,
1518: 0,
1519: 0,
1520: 0,
1521: 0,
1522: /*55*/ 0,
1523: 0,
1524: 0,
1525: 0,
1526: 0,
1527: /*60*/ MatGetSubMatrix_MPIRowbs,
1528: MatDestroy_MPIRowbs,
1529: MatView_MPIRowbs,
1530: MatGetPetscMaps_Petsc,
1531: MatUseScaledForm_MPIRowbs,
1532: /*65*/ MatScaleSystem_MPIRowbs,
1533: MatUnScaleSystem_MPIRowbs,
1534: 0,
1535: 0,
1536: 0,
1537: /*70*/ 0,
1538: 0,
1539: 0,
1540: 0,
1541: 0,
1542: /*75*/ 0,
1543: 0,
1544: 0,
1545: 0,
1546: 0,
1547: /*80*/ 0,
1548: 0,
1549: 0,
1550: 0,
1551: /*85*/ MatLoad_MPIRowbs
1552: };
1554: /* ------------------------------------------------------------------- */
1556: EXTERN_C_BEGIN
1559: int MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1560: {
1561: int ierr;
1564: mat->preallocated = PETSC_TRUE;
1565: MatCreateMPIRowbs_local(mat,nz,nnz);
1566: return(0);
1567: }
1568: EXTERN_C_END
1570: /*MC
1571: MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1572: with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions
1573: on how to declare the existence of external packages), a matrix type can be constructed which invokes
1574: BlockSolve95 preconditioners and solvers.
1576: Options Database Keys:
1577: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()
1579: Level: beginner
1581: .seealso: MatCreateMPIRowbs
1582: M*/
1584: EXTERN_C_BEGIN
1587: int MatCreate_MPIRowbs(Mat A)
1588: {
1589: Mat_MPIRowbs *a;
1590: BSmapping *bsmap;
1591: BSoff_map *bsoff;
1592: int i,ierr,*offset,m,M;
1593: PetscTruth flg1,flg2,flg3;
1594: BSprocinfo *bspinfo;
1595: MPI_Comm comm;
1596:
1598: comm = A->comm;
1599: m = A->m;
1600: M = A->M;
1602: PetscNew(Mat_MPIRowbs,&a);
1603: A->data = (void*)a;
1604: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1605: A->factor = 0;
1606: A->mapping = 0;
1607: a->vecs_permscale = PETSC_FALSE;
1608: A->insertmode = NOT_SET_VALUES;
1609: a->blocksolveassembly = 0;
1610: a->keepzeroedrows = PETSC_FALSE;
1612: MPI_Comm_rank(comm,&a->rank);
1613: MPI_Comm_size(comm,&a->size);
1615: PetscSplitOwnership(comm,&m,&M);
1617: A->N = M;
1618: A->M = M;
1619: A->m = m;
1620: A->n = A->N; /* each row stores all columns */
1621: PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1622: a->reallocs = 0;
1624: /* the information in the maps duplicates the information computed below, eventually
1625: we should remove the duplicate information that is not contained in the maps */
1626: PetscMapCreateMPI(comm,m,M,&A->rmap);
1627: PetscMapCreateMPI(comm,m,M,&A->cmap);
1629: /* build local table of row ownerships */
1630: PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1631: MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1632: a->rowners[0] = 0;
1633: for (i=2; i<=a->size; i++) {
1634: a->rowners[i] += a->rowners[i-1];
1635: }
1636: a->rstart = a->rowners[a->rank];
1637: a->rend = a->rowners[a->rank+1];
1638: PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));
1640: /* build cache for off array entries formed */
1641: MatStashCreate_Private(A->comm,1,&A->stash);
1642: a->donotstash = PETSC_FALSE;
1644: /* Initialize BlockSolve information */
1645: a->A = 0;
1646: a->pA = 0;
1647: a->comm_pA = 0;
1648: a->fpA = 0;
1649: a->comm_fpA = 0;
1650: a->alpha = 1.0;
1651: a->0;
1652: a->failures = 0;
1653: MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1654: VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1655: VecDuplicate(a->diag,&(a->xwork));
1656: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1657: PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1658: bspinfo = BScreate_ctx();CHKERRBS(0);
1659: a->procinfo = bspinfo;
1660: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1661: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1662: BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1663: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1664: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1665: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1666: #if defined(PETSC_USE_DEBUG)
1667: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1668: #endif
1669: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1670: PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1671: if (flg1) {
1672: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1673: }
1674: PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1675: PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1676: PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1677: if (flg1 || flg2 || flg3) {
1678: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1679: } else {
1680: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1681: }
1682: #if defined(PETSC_USE_LOG)
1683: MLOG_INIT(); /* Initialize logging */
1684: #endif
1686: /* Compute global offsets */
1687: offset = &a->rstart;
1689: PetscNew(BSmapping,&a->bsmap);
1690: PetscLogObjectMemory(A,sizeof(BSmapping));
1691: bsmap = a->bsmap;
1692: PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1693: *((int *)bsmap->vlocal2global) = (*offset);
1694: bsmap->flocal2global = BSloc2glob;
1695: bsmap->free_l2g = 0;
1696: PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1697: *((int *)bsmap->vglobal2local) = (*offset);
1698: bsmap->fglobal2local = BSglob2loc;
1699: bsmap->free_g2l = 0;
1700: bsoff = BSmake_off_map(*offset,bspinfo,A->M);
1701: bsmap->vglobal2proc = (void *)bsoff;
1702: bsmap->fglobal2proc = BSglob2proc;
1703: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1704: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1705: "MatMPIRowbsSetPreallocation_MPIRowbs",
1706: MatMPIRowbsSetPreallocation_MPIRowbs);
1707: return(0);
1708: }
1709: EXTERN_C_END
1713: /* @
1714: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1715: per row in the matrix.
1717: Input Parameter:
1718: + mat - matrix
1719: . nz - maximum expected for any row
1720: - nzz - number expected in each row
1722: Note:
1723: This routine is valid only for matrices stored in the MATMPIROWBS
1724: format.
1725: @ */
1726: int MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1727: {
1728: int ierr,(*f)(Mat,int,const int[]);
1731: PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1732: if (f) {
1733: (*f)(mat,nz,nnz);
1734: }
1735: return(0);
1736: }
1738: /* --------------- extra BlockSolve-specific routines -------------- */
1741: /* @
1742: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1743: user can then manipulate to alter the default parameters.
1745: Input Parameter:
1746: mat - matrix
1748: Output Parameter:
1749: procinfo - processor information context
1751: Note:
1752: This routine is valid only for matrices stored in the MATMPIROWBS
1753: format.
1754: @ */
1755: int MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1756: {
1757: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1758: PetscTruth ismpirowbs;
1759: int ierr;
1762: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1763: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1764: procinfo = a->procinfo;
1765: return(0);
1766: }
1770: int MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1771: {
1772: Mat_MPIRowbs *a;
1773: BSspmat *A;
1774: BSsprow **rs;
1775: Mat mat;
1776: int i,nz,ierr,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1777: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1778: PetscScalar *vals;
1779: MPI_Comm comm = ((PetscObject)viewer)->comm;
1780: MPI_Status status;
1783: MPI_Comm_size(comm,&size);
1784: MPI_Comm_rank(comm,&rank);
1785: if (!rank) {
1786: PetscViewerBinaryGetDescriptor(viewer,&fd);
1787: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1788: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1789: if (header[3] < 0) {
1790: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1791: }
1792: }
1794: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1795: M = header[1];
1796: /* determine ownership of all rows */
1797: m = M/size + ((M % size) > rank);
1798: PetscMalloc((size+2)*sizeof(int),&rowners);
1799: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1800: rowners[0] = 0;
1801: for (i=2; i<=size; i++) {
1802: rowners[i] += rowners[i-1];
1803: }
1804: rstart = rowners[rank];
1805: rend = rowners[rank+1];
1807: /* distribute row lengths to all processors */
1808: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1809: if (!rank) {
1810: PetscMalloc(M*sizeof(int),&rowlengths);
1811: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1812: PetscMalloc(size*sizeof(int),&sndcounts);
1813: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1814: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1815: PetscFree(sndcounts);
1816: } else {
1817: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1818: }
1820: /* create our matrix */
1821: MatCreate(comm,m,m,M,M,newmat);
1822: MatSetType(*newmat,type);
1823: MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1824: mat = *newmat;
1825: PetscFree(ourlens);
1827: a = (Mat_MPIRowbs*)mat->data;
1828: A = a->A;
1829: rs = A->rows;
1831: if (!rank) {
1832: /* calculate the number of nonzeros on each processor */
1833: PetscMalloc(size*sizeof(int),&procsnz);
1834: PetscMemzero(procsnz,size*sizeof(int));
1835: for (i=0; i<size; i++) {
1836: for (j=rowners[i]; j< rowners[i+1]; j++) {
1837: procsnz[i] += rowlengths[j];
1838: }
1839: }
1840: PetscFree(rowlengths);
1842: /* determine max buffer needed and allocate it */
1843: maxnz = 0;
1844: for (i=0; i<size; i++) {
1845: maxnz = PetscMax(maxnz,procsnz[i]);
1846: }
1847: PetscMalloc(maxnz*sizeof(int),&cols);
1849: /* read in my part of the matrix column indices */
1850: nz = procsnz[0];
1851: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1852:
1853: /* insert it into my part of matrix */
1854: nz = 0;
1855: for (i=0; i<A->num_rows; i++) {
1856: for (j=0; j<a->imax[i]; j++) {
1857: rs[i]->col[j] = cols[nz++];
1858: }
1859: rs[i]->length = a->imax[i];
1860: }
1861: /* read in parts for all other processors */
1862: for (i=1; i<size; i++) {
1863: nz = procsnz[i];
1864: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1865: MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1866: }
1867: PetscFree(cols);
1868: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1870: /* read in my part of the matrix numerical values */
1871: nz = procsnz[0];
1872: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1873:
1874: /* insert it into my part of matrix */
1875: nz = 0;
1876: for (i=0; i<A->num_rows; i++) {
1877: for (j=0; j<a->imax[i]; j++) {
1878: rs[i]->nz[j] = vals[nz++];
1879: }
1880: }
1881: /* read in parts for all other processors */
1882: for (i=1; i<size; i++) {
1883: nz = procsnz[i];
1884: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1885: MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1886: }
1887: PetscFree(vals);
1888: PetscFree(procsnz);
1889: } else {
1890: /* determine buffer space needed for message */
1891: nz = 0;
1892: for (i=0; i<A->num_rows; i++) {
1893: nz += a->imax[i];
1894: }
1895: PetscMalloc(nz*sizeof(int),&cols);
1897: /* receive message of column indices*/
1898: MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1899: MPI_Get_count(&status,MPI_INT,&maxnz);
1900: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1902: /* insert it into my part of matrix */
1903: nz = 0;
1904: for (i=0; i<A->num_rows; i++) {
1905: for (j=0; j<a->imax[i]; j++) {
1906: rs[i]->col[j] = cols[nz++];
1907: }
1908: rs[i]->length = a->imax[i];
1909: }
1910: PetscFree(cols);
1911: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1913: /* receive message of values*/
1914: MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1915: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1916: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1918: /* insert it into my part of matrix */
1919: nz = 0;
1920: for (i=0; i<A->num_rows; i++) {
1921: for (j=0; j<a->imax[i]; j++) {
1922: rs[i]->nz[j] = vals[nz++];
1923: }
1924: rs[i]->length = a->imax[i];
1925: }
1926: PetscFree(vals);
1927: }
1928: PetscFree(rowners);
1929: a->nz = a->maxnz;
1930: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1931: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1932: return(0);
1933: }
1935: /*
1936: Special destroy and view routines for factored matrices
1937: */
1940: static int MatDestroy_MPIRowbs_Factored(Mat mat)
1941: {
1943: #if defined(PETSC_USE_LOG)
1944: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1945: #endif
1946: return(0);
1947: }
1951: static int MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1952: {
1956: MatView((Mat) mat->data,viewer);
1957: return(0);
1958: }
1962: int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1963: {
1964: /* Note: f is not currently used in BlockSolve */
1965: Mat newmat;
1966: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1967: int ierr;
1968: PetscTruth idn;
1971: if (isrow) {
1972: ISIdentity(isrow,&idn);
1973: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1974: }
1976: if (!mat->symmetric) {
1977: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
1978: preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
1979: symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
1980: }
1982: /* If the icc_storage flag wasn't set before the last blocksolveassembly, */
1983: /* we must completely redo the assembly as a different storage format is required. */
1984: if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
1985: mat->same_nonzero = PETSC_FALSE;
1986: mbs->blocksolveassembly = 0;
1987: }
1989: if (!mbs->blocksolveassembly) {
1990: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1991: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1992: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1993: }
1995: /* Copy permuted matrix */
1996: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1997: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
1999: /* Set up the communication for factorization */
2000: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2001: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2003: /*
2004: Create a new Mat structure to hold the "factored" matrix,
2005: not this merely contains a pointer to the original matrix, since
2006: the original matrix contains the factor information.
2007: */
2008: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2009: PetscLogObjectCreate(newmat);
2010: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2012: newmat->data = (void*)mat;
2013: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2014: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2015: newmat->ops->view = MatView_MPIRowbs_Factored;
2016: newmat->factor = 1;
2017: newmat->preallocated = PETSC_TRUE;
2018: newmat->M = mat->M;
2019: newmat->N = mat->N;
2020: newmat->m = mat->m;
2021: newmat->n = mat->n;
2022: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2024: *newfact = newmat;
2025: return(0);
2026: }
2030: int MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2031: {
2032: Mat newmat;
2033: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2034: int ierr;
2035: PetscTruth idn;
2038: if (info->levels != 0) SETERRQ(1,"Blocksolve ILU only supports 0 fill");
2039: if (isrow) {
2040: ISIdentity(isrow,&idn);
2041: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2042: }
2043: if (iscol) {
2044: ISIdentity(iscol,&idn);
2045: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2046: }
2048: if (!mbs->blocksolveassembly) {
2049: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2050: }
2051:
2052: /* if (mat->symmetric) { */
2053: /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2054: /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2055: /* using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2056: /* } */
2058: /* Copy permuted matrix */
2059: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2060: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2062: /* Set up the communication for factorization */
2063: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2064: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2066: /*
2067: Create a new Mat structure to hold the "factored" matrix,
2068: not this merely contains a pointer to the original matrix, since
2069: the original matrix contains the factor information.
2070: */
2071: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2072: PetscLogObjectCreate(newmat);
2073: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2075: newmat->data = (void*)mat;
2076: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2077: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2078: newmat->ops->view = MatView_MPIRowbs_Factored;
2079: newmat->factor = 1;
2080: newmat->preallocated = PETSC_TRUE;
2081: newmat->M = mat->M;
2082: newmat->N = mat->N;
2083: newmat->m = mat->m;
2084: newmat->n = mat->n;
2085: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2087: *newfact = newmat;
2088: return(0);
2089: }
2093: int MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2094: {
2095: int ierr;
2100: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2101: ISColoringCreate(mat->comm,mat->m,0,coloring);
2103: return(0);
2104: }
2108: /*@C
2109: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2110: format. This format is intended primarily as an interface for BlockSolve95.
2112: Collective on MPI_Comm
2114: Input Parameters:
2115: + comm - MPI communicator
2116: . m - number of local rows (or PETSC_DECIDE to have calculated)
2117: . M - number of global rows (or PETSC_DECIDE to have calculated)
2118: . nz - number of nonzeros per row (same for all local rows)
2119: - nnz - number of nonzeros per row (possibly different for each row).
2121: Output Parameter:
2122: . newA - the matrix
2124: Notes:
2125: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2126: than it must be used on all processors that share the object for that argument.
2128: The user MUST specify either the local or global matrix dimensions
2129: (possibly both).
2131: Specify the preallocated storage with either nz or nnz (not both). Set
2132: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2133: allocation.
2135: Notes:
2136: By default, the matrix is assumed to be nonsymmetric; the user can
2137: take advantage of special optimizations for symmetric matrices by calling
2138: $ MatSetOption(mat,MAT_SYMMETRIC)
2139: $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2140: BEFORE calling the routine MatAssemblyBegin().
2142: Internally, the MATMPIROWBS format inserts zero elements to the
2143: matrix if necessary, so that nonsymmetric matrices are considered
2144: to be symmetric in terms of their sparsity structure; this format
2145: is required for use of the parallel communication routines within
2146: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2147: then PETSc will internally allocate a 0 value for the element
2148: A[j,i] during MatAssemblyEnd() if the user has not already set
2149: a value for the matrix element A[j,i].
2151: Options Database Keys:
2152: . -mat_rowbs_no_inode - Do not use inodes.
2154: Level: intermediate
2155:
2156: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2158: .seealso: MatCreate(), MatSetValues()
2159: @*/
2160: int MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2161: {
2163:
2165: MatCreate(comm,m,m,M,M,newA);
2166: MatSetType(*newA,MATMPIROWBS);
2167: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2168: return(0);
2169: }
2172: /* -------------------------------------------------------------------------*/
2174: #include src/mat/impls/aij/seq/aij.h
2175: #include src/mat/impls/aij/mpi/mpiaij.h
2179: int MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2180: {
2181: int nmax,nstages_local,nstages,i,pos,max_no,ierr;
2185: /* Allocate memory to hold all the submatrices */
2186: if (scall != MAT_REUSE_MATRIX) {
2187: PetscMalloc((ismax+1)*sizeof(Mat),submat);
2188: }
2189:
2190: /* Determine the number of stages through which submatrices are done */
2191: nmax = 20*1000000 / (C->N * sizeof(int));
2192: if (!nmax) nmax = 1;
2193: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
2195: /* Make sure every processor loops through the nstages */
2196: MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);
2198: for (i=0,pos=0; i<nstages; i++) {
2199: if (pos+nmax <= ismax) max_no = nmax;
2200: else if (pos == ismax) max_no = 0;
2201: else max_no = ismax-pos;
2202: MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2203: pos += max_no;
2204: }
2205: return(0);
2206: }
2207: /* -------------------------------------------------------------------------*/
2208: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2209: matrix and preservs zeroes from structural symetry
2210: */
2213: int MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2214: {
2215: Mat_MPIRowbs *c = (Mat_MPIRowbs *)(C->data);
2216: BSspmat *A = c->A;
2217: Mat_SeqAIJ *mat;
2218: int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2219: int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,ierr,**rbuf1,row,proc;
2220: int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2221: int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2222: int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2223: int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2224: int *rmap_i,tag0,tag1,tag2,tag3;
2225: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2226: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2227: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2228: MPI_Status *r_status3,*r_status4,*s_status4;
2229: MPI_Comm comm;
2230: FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2231: PetscScalar *mat_a;
2232: PetscTruth sorted;
2233: int *onodes1,*olengths1;
2236: comm = C->comm;
2237: tag0 = C->tag;
2238: size = c->size;
2239: rank = c->rank;
2240: m = C->M;
2241:
2242: /* Get some new tags to keep the communication clean */
2243: PetscObjectGetNewTag((PetscObject)C,&tag1);
2244: PetscObjectGetNewTag((PetscObject)C,&tag2);
2245: PetscObjectGetNewTag((PetscObject)C,&tag3);
2247: /* Check if the col indices are sorted */
2248: for (i=0; i<ismax; i++) {
2249: ISSorted(isrow[i],&sorted);
2250: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2251: ISSorted(iscol[i],&sorted);
2252: /* if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2253: }
2255: len = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2256: PetscMalloc(len,&irow);
2257: icol = irow + ismax;
2258: nrow = (int*)(icol + ismax);
2259: ncol = nrow + ismax;
2260: rtable = ncol + ismax;
2262: for (i=0; i<ismax; i++) {
2263: ISGetIndices(isrow[i],&irow[i]);
2264: ISGetIndices(iscol[i],&icol[i]);
2265: ISGetLocalSize(isrow[i],&nrow[i]);
2266: ISGetLocalSize(iscol[i],&ncol[i]);
2267: }
2269: /* Create hash table for the mapping :row -> proc*/
2270: for (i=0,j=0; i<size; i++) {
2271: jmax = c->rowners[i+1];
2272: for (; j<jmax; j++) {
2273: rtable[j] = i;
2274: }
2275: }
2277: /* evaluate communication - mesg to who, length of mesg, and buffer space
2278: required. Based on this, buffers are allocated, and data copied into them*/
2279: PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2280: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2281: w3 = w2 + size; /* no of IS that needs to be sent to proc i */
2282: w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */
2283: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2284: for (i=0; i<ismax; i++) {
2285: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2286: jmax = nrow[i];
2287: irow_i = irow[i];
2288: for (j=0; j<jmax; j++) {
2289: row = irow_i[j];
2290: proc = rtable[row];
2291: w4[proc]++;
2292: }
2293: for (j=0; j<size; j++) {
2294: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
2295: }
2296: }
2297:
2298: nrqs = 0; /* no of outgoing messages */
2299: msz = 0; /* total mesg length (for all procs) */
2300: w1[rank] = 0; /* no mesg sent to self */
2301: w3[rank] = 0;
2302: for (i=0; i<size; i++) {
2303: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2304: }
2305: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2306: for (i=0,j=0; i<size; i++) {
2307: if (w1[i]) { pa[j] = i; j++; }
2308: }
2310: /* Each message would have a header = 1 + 2*(no of IS) + data */
2311: for (i=0; i<nrqs; i++) {
2312: j = pa[i];
2313: w1[j] += w2[j] + 2* w3[j];
2314: msz += w1[j];
2315: }
2317: /* Determine the number of messages to expect, their lengths, from from-ids */
2318: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2319: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2321: /* Now post the Irecvs corresponding to these messages */
2322: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2323:
2324: PetscFree(onodes1);
2325: PetscFree(olengths1);
2326:
2327: /* Allocate Memory for outgoing messages */
2328: len = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2329: PetscMalloc(len,&sbuf1);
2330: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2331: PetscMemzero(sbuf1,2*size*sizeof(int*));
2332: /* allocate memory for outgoing data + buf to receive the first reply */
2333: tmp = (int*)(ptr + size);
2334: ctr = tmp + 2*msz;
2336: {
2337: int *iptr = tmp,ict = 0;
2338: for (i=0; i<nrqs; i++) {
2339: j = pa[i];
2340: iptr += ict;
2341: sbuf1[j] = iptr;
2342: ict = w1[j];
2343: }
2344: }
2346: /* Form the outgoing messages */
2347: /* Initialize the header space */
2348: for (i=0; i<nrqs; i++) {
2349: j = pa[i];
2350: sbuf1[j][0] = 0;
2351: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2352: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
2353: }
2354:
2355: /* Parse the isrow and copy data into outbuf */
2356: for (i=0; i<ismax; i++) {
2357: PetscMemzero(ctr,size*sizeof(int));
2358: irow_i = irow[i];
2359: jmax = nrow[i];
2360: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
2361: row = irow_i[j];
2362: proc = rtable[row];
2363: if (proc != rank) { /* copy to the outgoing buf*/
2364: ctr[proc]++;
2365: *ptr[proc] = row;
2366: ptr[proc]++;
2367: }
2368: }
2369: /* Update the headers for the current IS */
2370: for (j=0; j<size; j++) { /* Can Optimise this loop too */
2371: if ((ctr_j = ctr[j])) {
2372: sbuf1_j = sbuf1[j];
2373: k = ++sbuf1_j[0];
2374: sbuf1_j[2*k] = ctr_j;
2375: sbuf1_j[2*k-1] = i;
2376: }
2377: }
2378: }
2380: /* Now post the sends */
2381: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2382: for (i=0; i<nrqs; ++i) {
2383: j = pa[i];
2384: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2385: }
2387: /* Post Receives to capture the buffer size */
2388: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2389: PetscMalloc((nrqs+1)*sizeof(int *),&rbuf2);
2390: rbuf2[0] = tmp + msz;
2391: for (i=1; i<nrqs; ++i) {
2392: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2393: }
2394: for (i=0; i<nrqs; ++i) {
2395: j = pa[i];
2396: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2397: }
2399: /* Send to other procs the buf size they should allocate */
2400:
2402: /* Receive messages*/
2403: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2404: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2405: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2406: PetscMalloc(len,&sbuf2);
2407: req_size = (int*)(sbuf2 + nrqr);
2408: req_source = req_size + nrqr;
2409:
2410: {
2411: BSsprow **sAi = A->rows;
2412: int id,rstart = c->rstart;
2413: int *sbuf2_i;
2415: for (i=0; i<nrqr; ++i) {
2416: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2417: req_size[idx] = 0;
2418: rbuf1_i = rbuf1[idx];
2419: start = 2*rbuf1_i[0] + 1;
2420: MPI_Get_count(r_status1+i,MPI_INT,&end);
2421: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2422: sbuf2_i = sbuf2[idx];
2423: for (j=start; j<end; j++) {
2424: id = rbuf1_i[j] - rstart;
2425: ncols = (sAi[id])->length;
2426: sbuf2_i[j] = ncols;
2427: req_size[idx] += ncols;
2428: }
2429: req_source[idx] = r_status1[i].MPI_SOURCE;
2430: /* form the header */
2431: sbuf2_i[0] = req_size[idx];
2432: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2433: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2434: }
2435: }
2436: PetscFree(r_status1);
2437: PetscFree(r_waits1);
2439: /* recv buffer sizes */
2440: /* Receive messages*/
2441:
2442: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2443: PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2444: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2445: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2446: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2448: for (i=0; i<nrqs; ++i) {
2449: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2450: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2451: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2452: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2453: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2454: }
2455: PetscFree(r_status2);
2456: PetscFree(r_waits2);
2457:
2458: /* Wait on sends1 and sends2 */
2459: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2460: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2462: MPI_Waitall(nrqs,s_waits1,s_status1);
2463: MPI_Waitall(nrqr,s_waits2,s_status2);
2464: PetscFree(s_status1);
2465: PetscFree(s_status2);
2466: PetscFree(s_waits1);
2467: PetscFree(s_waits2);
2469: /* Now allocate buffers for a->j, and send them off */
2470: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2471: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2472: PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2473: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2474:
2475: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2476: {
2477: BSsprow *brow;
2478: int *Acol;
2479: int rstart = c->rstart;
2481: for (i=0; i<nrqr; i++) {
2482: rbuf1_i = rbuf1[i];
2483: sbuf_aj_i = sbuf_aj[i];
2484: ct1 = 2*rbuf1_i[0] + 1;
2485: ct2 = 0;
2486: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2487: kmax = rbuf1[i][2*j];
2488: for (k=0; k<kmax; k++,ct1++) {
2489: brow = A->rows[rbuf1_i[ct1] - rstart];
2490: ncols = brow->length;
2491: Acol = brow->col;
2492: /* load the column indices for this row into cols*/
2493: cols = sbuf_aj_i + ct2;
2494: PetscMemcpy(cols,Acol,ncols*sizeof(int));
2495: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2496: mappings?? */
2497: ct2 += ncols;
2498: }
2499: }
2500: MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2501: }
2502: }
2503: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2504: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
2506: /* Allocate buffers for a->a, and send them off */
2507: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2508: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2509: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2510: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2511:
2512: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2513: {
2514: BSsprow *brow;
2515: FLOAT *Aval;
2516: int rstart = c->rstart;
2517:
2518: for (i=0; i<nrqr; i++) {
2519: rbuf1_i = rbuf1[i];
2520: sbuf_aa_i = sbuf_aa[i];
2521: ct1 = 2*rbuf1_i[0]+1;
2522: ct2 = 0;
2523: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2524: kmax = rbuf1_i[2*j];
2525: for (k=0; k<kmax; k++,ct1++) {
2526: brow = A->rows[rbuf1_i[ct1] - rstart];
2527: ncols = brow->length;
2528: Aval = brow->nz;
2529: /* load the column values for this row into vals*/
2530: vals = sbuf_aa_i+ct2;
2531: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2532: ct2 += ncols;
2533: }
2534: }
2535: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2536: }
2537: }
2538: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2539: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2540: PetscFree(rbuf1);
2542: /* Form the matrix */
2543: /* create col map */
2544: {
2545: int *icol_i;
2546:
2547: len = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2548: PetscMalloc(len,&cmap);
2549: cmap[0] = (int *)(cmap + ismax);
2550: PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2551: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2552: for (i=0; i<ismax; i++) {
2553: jmax = ncol[i];
2554: icol_i = icol[i];
2555: cmap_i = cmap[i];
2556: for (j=0; j<jmax; j++) {
2557: cmap_i[icol_i[j]] = j+1;
2558: }
2559: }
2560: }
2562: /* Create lens which is required for MatCreate... */
2563: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2564: len = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2565: PetscMalloc(len,&lens);
2566: lens[0] = (int *)(lens + ismax);
2567: PetscMemzero(lens[0],j*sizeof(int));
2568: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2569:
2570: /* Update lens from local data */
2571: { BSsprow *Arow;
2572: for (i=0; i<ismax; i++) {
2573: jmax = nrow[i];
2574: cmap_i = cmap[i];
2575: irow_i = irow[i];
2576: lens_i = lens[i];
2577: for (j=0; j<jmax; j++) {
2578: row = irow_i[j];
2579: proc = rtable[row];
2580: if (proc == rank) {
2581: Arow=A->rows[row-c->rstart];
2582: ncols=Arow->length;
2583: cols=Arow->col;
2584: for (k=0; k<ncols; k++) {
2585: if (cmap_i[cols[k]]) { lens_i[j]++;}
2586: }
2587: }
2588: }
2589: }
2590: }
2591:
2592: /* Create row map*/
2593: len = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2594: PetscMalloc(len,&rmap);
2595: rmap[0] = (int *)(rmap + ismax);
2596: PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2597: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2598: for (i=0; i<ismax; i++) {
2599: rmap_i = rmap[i];
2600: irow_i = irow[i];
2601: jmax = nrow[i];
2602: for (j=0; j<jmax; j++) {
2603: rmap_i[irow_i[j]] = j;
2604: }
2605: }
2606:
2607: /* Update lens from offproc data */
2608: {
2609: int *rbuf2_i,*rbuf3_i,*sbuf1_i;
2611: for (tmp2=0; tmp2<nrqs; tmp2++) {
2612: MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2613: idx = pa[i];
2614: sbuf1_i = sbuf1[idx];
2615: jmax = sbuf1_i[0];
2616: ct1 = 2*jmax+1;
2617: ct2 = 0;
2618: rbuf2_i = rbuf2[i];
2619: rbuf3_i = rbuf3[i];
2620: for (j=1; j<=jmax; j++) {
2621: is_no = sbuf1_i[2*j-1];
2622: max1 = sbuf1_i[2*j];
2623: lens_i = lens[is_no];
2624: cmap_i = cmap[is_no];
2625: rmap_i = rmap[is_no];
2626: for (k=0; k<max1; k++,ct1++) {
2627: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2628: max2 = rbuf2_i[ct1];
2629: for (l=0; l<max2; l++,ct2++) {
2630: if (cmap_i[rbuf3_i[ct2]]) {
2631: lens_i[row]++;
2632: }
2633: }
2634: }
2635: }
2636: }
2637: }
2638: PetscFree(r_status3);
2639: PetscFree(r_waits3);
2640: MPI_Waitall(nrqr,s_waits3,s_status3);
2641: PetscFree(s_status3);
2642: PetscFree(s_waits3);
2644: /* Create the submatrices */
2645: if (scall == MAT_REUSE_MATRIX) {
2646: PetscTruth same;
2647:
2648: /*
2649: Assumes new rows are same length as the old rows,hence bug!
2650: */
2651: for (i=0; i<ismax; i++) {
2652: PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2653: if (same == PETSC_FALSE) {
2654: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2655: }
2656: mat = (Mat_SeqAIJ*)(submats[i]->data);
2657: if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2658: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2659: }
2660: PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2661: if (same == PETSC_FALSE) {
2662: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2663: }
2664: /* Initial matrix as if empty */
2665: PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2666: submats[i]->factor = C->factor;
2667: }
2668: } else {
2669: for (i=0; i<ismax; i++) {
2670: /* Here we want to explicitly generate SeqAIJ matrices */
2671: MatCreate(PETSC_COMM_SELF,nrow[i],ncol[i],nrow[i],ncol[i],submats+i);
2672: MatSetType(submats[i],MATSEQAIJ);
2673: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2674: }
2675: }
2677: /* Assemble the matrices */
2678: /* First assemble the local rows */
2679: {
2680: int ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2681: PetscScalar *imat_a;
2682: BSsprow *Arow;
2683:
2684: for (i=0; i<ismax; i++) {
2685: mat = (Mat_SeqAIJ*)submats[i]->data;
2686: imat_ilen = mat->ilen;
2687: imat_j = mat->j;
2688: imat_i = mat->i;
2689: imat_a = mat->a;
2690: cmap_i = cmap[i];
2691: rmap_i = rmap[i];
2692: irow_i = irow[i];
2693: jmax = nrow[i];
2694: for (j=0; j<jmax; j++) {
2695: row = irow_i[j];
2696: proc = rtable[row];
2697: if (proc == rank) {
2698: old_row = row;
2699: row = rmap_i[row];
2700: ilen_row = imat_ilen[row];
2701:
2702: Arow=A->rows[old_row-c->rstart];
2703: ncols=Arow->length;
2704: cols=Arow->col;
2705: vals=Arow->nz;
2706:
2707: mat_i = imat_i[row];
2708: mat_a = imat_a + mat_i;
2709: mat_j = imat_j + mat_i;
2710: for (k=0; k<ncols; k++) {
2711: if ((tcol = cmap_i[cols[k]])) {
2712: *mat_j++ = tcol - 1;
2713: *mat_a++ = (PetscScalar)vals[k];
2714: ilen_row++;
2715: }
2716: }
2717: imat_ilen[row] = ilen_row;
2718: }
2719: }
2720: }
2721: }
2723: /* Now assemble the off proc rows*/
2724: {
2725: int *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2726: int *imat_j,*imat_i;
2727: PetscScalar *imat_a;
2728: FLOAT *rbuf4_i;
2729:
2730: for (tmp2=0; tmp2<nrqs; tmp2++) {
2731: MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2732: idx = pa[i];
2733: sbuf1_i = sbuf1[idx];
2734: jmax = sbuf1_i[0];
2735: ct1 = 2*jmax + 1;
2736: ct2 = 0;
2737: rbuf2_i = rbuf2[i];
2738: rbuf3_i = rbuf3[i];
2739: rbuf4_i = rbuf4[i];
2740: for (j=1; j<=jmax; j++) {
2741: is_no = sbuf1_i[2*j-1];
2742: rmap_i = rmap[is_no];
2743: cmap_i = cmap[is_no];
2744: mat = (Mat_SeqAIJ*)submats[is_no]->data;
2745: imat_ilen = mat->ilen;
2746: imat_j = mat->j;
2747: imat_i = mat->i;
2748: imat_a = mat->a;
2749: max1 = sbuf1_i[2*j];
2750: for (k=0; k<max1; k++,ct1++) {
2751: row = sbuf1_i[ct1];
2752: row = rmap_i[row];
2753: ilen = imat_ilen[row];
2754: mat_i = imat_i[row];
2755: mat_a = imat_a + mat_i;
2756: mat_j = imat_j + mat_i;
2757: max2 = rbuf2_i[ct1];
2758: for (l=0; l<max2; l++,ct2++) {
2759: if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2760: *mat_j++ = tcol - 1;
2761: *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2762: ilen++;
2763: }
2764: }
2765: imat_ilen[row] = ilen;
2766: }
2767: }
2768: }
2769: }
2770: PetscFree(r_status4);
2771: PetscFree(r_waits4);
2772: MPI_Waitall(nrqr,s_waits4,s_status4);
2773: PetscFree(s_waits4);
2774: PetscFree(s_status4);
2776: /* Restore the indices */
2777: for (i=0; i<ismax; i++) {
2778: ISRestoreIndices(isrow[i],irow+i);
2779: ISRestoreIndices(iscol[i],icol+i);
2780: }
2782: /* Destroy allocated memory */
2783: PetscFree(irow);
2784: PetscFree(w1);
2785: PetscFree(pa);
2787: PetscFree(sbuf1);
2788: PetscFree(rbuf2);
2789: for (i=0; i<nrqr; ++i) {
2790: PetscFree(sbuf2[i]);
2791: }
2792: for (i=0; i<nrqs; ++i) {
2793: PetscFree(rbuf3[i]);
2794: PetscFree(rbuf4[i]);
2795: }
2797: PetscFree(sbuf2);
2798: PetscFree(rbuf3);
2799: PetscFree(rbuf4);
2800: PetscFree(sbuf_aj[0]);
2801: PetscFree(sbuf_aj);
2802: PetscFree(sbuf_aa[0]);
2803: PetscFree(sbuf_aa);
2804:
2805: PetscFree(cmap);
2806: PetscFree(rmap);
2807: PetscFree(lens);
2809: for (i=0; i<ismax; i++) {
2810: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2811: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2812: }
2813: return(0);
2814: }
2816: /*
2817: can be optimized by send only non-zeroes in iscol IS -
2818: so prebuild submatrix on sending side including A,B partitioning
2819: */
2822: #include src/vec/is/impls/general/general.h
2823: int MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2824: {
2825: Mat_MPIRowbs *c = (Mat_MPIRowbs*)C->data;
2826: BSspmat *A = c->A;
2827: BSsprow *Arow;
2828: Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */
2829: Mat_MPIAIJ *mat; /* submat->data */
2830: int *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2831: int *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2832: int i,j,k,l,ierr,len,jmax,proc,idx;
2833: int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2834: FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */
2836: int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2837: int *req_source;
2838: int ncols_t;
2839:
2840:
2841: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2842: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2843:
2844: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2845: MPI_Status *r_status3,*r_status4,*s_status4;
2846: MPI_Comm comm;
2850: comm = C->comm;
2851: tag0 = C->tag;
2852: size = c->size;
2853: rank = c->rank;
2855: if (size==1) {
2856: if (scall == MAT_REUSE_MATRIX) {
2857: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2858: return(0);
2859: } else {
2860: Mat *newsubmat;
2861:
2862: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2863: *submat=*newsubmat;
2864: ierr=PetscFree(newsubmat);
2865: return(0);
2866: }
2867: }
2868:
2869: /* Get some new tags to keep the communication clean */
2870: PetscObjectGetNewTag((PetscObject)C,&tag1);
2871: PetscObjectGetNewTag((PetscObject)C,&tag2);
2872: PetscObjectGetNewTag((PetscObject)C,&tag3);
2874: /* Check if the col indices are sorted */
2875: {PetscTruth sorted;
2876: ISSorted(isrow,&sorted);
2877: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2878: ISSorted(iscol,&sorted);
2879: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2880: }
2881:
2882: ISGetIndices(isrow,&irow);
2883: ISGetIndices(iscol,&icol);
2884: ISGetLocalSize(isrow,&nrow);
2885: ISGetLocalSize(iscol,&ncol);
2886:
2887: if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2888: if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2889:
2890:
2891: len = (C->M+1)*sizeof(int);
2892: PetscMalloc(len,&rtable);
2893: /* Create hash table for the mapping :row -> proc*/
2894: for (i=0,j=0; i<size; i++) {
2895: jmax = c->rowners[i+1];
2896: for (; j<jmax; j++) {
2897: rtable[j] = i;
2898: }
2899: }
2901: /* evaluate communication - mesg to who, length of mesg, and buffer space
2902: required. Based on this, buffers are allocated, and data copied into them*/
2903: PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2904: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2905: PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2906: for (j=0; j<nrow; j++) {
2907: row_t = irow[j];
2908: proc = rtable[row_t];
2909: w1[proc]++;
2910: }
2911: nrqs = 0; /* no of outgoing messages */
2912: msz = 0; /* total mesg length (for all procs) */
2913: w1[rank] = 0; /* no mesg sent to self */
2914: for (i=0; i<size; i++) {
2915: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2916: }
2917:
2918: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2919: for (i=0,j=0; i<size; i++) {
2920: if (w1[i]) {
2921: pa[j++] = i;
2922: w1[i]++; /* header for return data */
2923: msz+=w1[i];
2924: }
2925: }
2926:
2927: {int *onodes1,*olengths1;
2928: /* Determine the number of messages to expect, their lengths, from from-ids */
2929: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2930: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2931: /* Now post the Irecvs corresponding to these messages */
2932: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2933: PetscFree(onodes1);
2934: PetscFree(olengths1);
2935: }
2936:
2937: { int **ptr,*iptr,*tmp;
2938: /* Allocate Memory for outgoing messages */
2939: len = 2*size*sizeof(int*) + msz*sizeof(int);
2940: PetscMalloc(len,&sbuf1);
2941: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2942: PetscMemzero(sbuf1,2*size*sizeof(int*));
2943: /* allocate memory for outgoing data + buf to receive the first reply */
2944: tmp = (int*)(ptr + size);
2946: for (i=0,iptr=tmp; i<nrqs; i++) {
2947: j = pa[i];
2948: sbuf1[j] = iptr;
2949: iptr += w1[j];
2950: }
2952: /* Form the outgoing messages */
2953: for (i=0; i<nrqs; i++) {
2954: j = pa[i];
2955: sbuf1[j][0] = 0; /*header */
2956: ptr[j] = sbuf1[j] + 1;
2957: }
2958:
2959: /* Parse the isrow and copy data into outbuf */
2960: for (j=0; j<nrow; j++) {
2961: row_t = irow[j];
2962: proc = rtable[row_t];
2963: if (proc != rank) { /* copy to the outgoing buf*/
2964: sbuf1[proc][0]++;
2965: *ptr[proc] = row_t;
2966: ptr[proc]++;
2967: }
2968: }
2969: } /* block */
2971: /* Now post the sends */
2972:
2973: /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
2974: * of requested rows)*/
2976: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2977: for (i=0; i<nrqs; ++i) {
2978: j = pa[i];
2979: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2980: }
2982: /* Post Receives to capture the buffer size */
2983: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2984: PetscMalloc((nrqs+1)*sizeof(int *),&rbuf2);
2985: PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
2986: for (i=1; i<nrqs; ++i) {
2987: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2988: }
2989: for (i=0; i<nrqs; ++i) {
2990: j = pa[i];
2991: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2992: }
2994: /* Send to other procs the buf size they should allocate */
2995: /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
2996: * (row sizes) */
2998: /* Receive messages*/
2999: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3000: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3001: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3002: PetscMalloc(len,&sbuf2);
3003: req_size = (int*)(sbuf2 + nrqr);
3004: req_source = req_size + nrqr;
3005:
3006: {
3007: BSsprow **sAi = A->rows;
3008: int id,rstart = c->rstart;
3009: int *sbuf2_i,*rbuf1_i,end;
3011: for (i=0; i<nrqr; ++i) {
3012: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3013: req_size[idx] = 0;
3014: rbuf1_i = rbuf1[idx];
3015: MPI_Get_count(r_status1+i,MPI_INT,&end);
3016: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3017: sbuf2_i = sbuf2[idx];
3018: for (j=1; j<end; j++) {
3019: id = rbuf1_i[j] - rstart;
3020: ncols_t = (sAi[id])->length;
3021: sbuf2_i[j] = ncols_t;
3022: req_size[idx] += ncols_t;
3023: }
3024: req_source[idx] = r_status1[i].MPI_SOURCE;
3025: /* form the header */
3026: sbuf2_i[0] = req_size[idx];
3027: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3028: }
3029: }
3030: PetscFree(r_status1);
3031: PetscFree(r_waits1);
3033: /* recv buffer sizes */
3034: /* Receive messages*/
3035:
3036: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3037: PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3038: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3039: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3040: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
3042: for (i=0; i<nrqs; ++i) {
3043: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3044: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3045: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3046: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3047: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3048: }
3049: PetscFree(r_status2);
3050: PetscFree(r_waits2);
3051:
3052: /* Wait on sends1 and sends2 */
3053: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3054: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
3056: MPI_Waitall(nrqs,s_waits1,s_status1);
3057: MPI_Waitall(nrqr,s_waits2,s_status2);
3058: PetscFree(s_status1);
3059: PetscFree(s_status2);
3060: PetscFree(s_waits1);
3061: PetscFree(s_waits2);
3063: /* Now allocate buffers for a->j, and send them off */
3064: /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3065: * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3066:
3067: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3068: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3069: PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3070: for (i=1; i<nrqr; i++) sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3071:
3072: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3073: {
3074: int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3075: int rstart = c->rstart;
3077: for (i=0; i<nrqr; i++) {
3078: rbuf1_i = rbuf1[i];
3079: sbuf3_i = sbuf3[i];
3080: noutcols = 0;
3081: kmax = rbuf1_i[0]; /* num. of req. rows */
3082: for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3083: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3084: ncols = Arow->length;
3085: Acol = Arow->col;
3086: /* load the column indices for this row into cols*/
3087: cols = sbuf3_i + noutcols;
3088: PetscMemcpy(cols,Acol,ncols*sizeof(int));
3089: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3090: noutcols += ncols;
3091: }
3092: MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3093: }
3094: }
3095: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3096: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
3098: /* Allocate buffers for a->a, and send them off */
3099: /* can be optimized by conect with previous block */
3100: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3101: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3102: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3103: for (i=1; i<nrqr; i++) sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3104:
3105: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3106: {
3107: FLOAT *Aval,*vals,*sbuf4_i;
3108: int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3109:
3110:
3111: for (i=0; i<nrqr; i++) {
3112: rbuf1_i = rbuf1[i];
3113: sbuf4_i = sbuf4[i];
3114: rqrow = 1;
3115: noutvals = 0;
3116: kmax = rbuf1_i[0]; /* num of req. rows */
3117: for (k=0; k<kmax; k++,rqrow++) {
3118: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3119: ncols = Arow->length;
3120: Aval = Arow->nz;
3121: /* load the column values for this row into vals*/
3122: vals = sbuf4_i+noutvals;
3123: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3124: noutvals += ncols;
3125: }
3126: MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3127: }
3128: }
3129: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3130: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3131: PetscFree(rbuf1);
3133: /* Form the matrix */
3135: /* create col map */
3136: len = C->N*sizeof(int)+1;
3137: PetscMalloc(len,&cmap);
3138: PetscMemzero(cmap,C->N*sizeof(int));
3139: for (j=0; j<ncol; j++) {
3140: cmap[icol[j]] = j+1;
3141: }
3142:
3143: /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3144: len = C->M*sizeof(int)+1;
3145: PetscMalloc(len,&rmap);
3146: PetscMemzero(rmap,C->M*sizeof(int));
3147: for (j=0; j<nrow; j++) {
3148: rmap[irow[j]] = j;
3149: }
3151: /*
3152: Determine the number of non-zeros in the diagonal and off-diagonal
3153: portions of the matrix in order to do correct preallocation
3154: */
3156: /* first get start and end of "diagonal" columns */
3157: if (csize == PETSC_DECIDE) {
3158: nlocal = ncol/size + ((ncol % size) > rank);
3159: } else {
3160: nlocal = csize;
3161: }
3162: {
3163: int ncols,*cols,olen,dlen,thecol;
3164: int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3165:
3166: MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3167: cstart = cend - nlocal;
3168: if (rank == size - 1 && cend != ncol) {
3169: SETERRQ(1,"Local column sizes do not add up to total number of columns");
3170: }
3172: PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3173: o_nz = d_nz + nrow;
3174:
3175: /* Update lens from local data */
3176: for (j=0; j<nrow; j++) {
3177: row = irow[j];
3178: proc = rtable[row];
3179: if (proc == rank) {
3180: Arow=A->rows[row-c->rstart];
3181: ncols=Arow->length;
3182: cols=Arow->col;
3183: olen=dlen=0;
3184: for (k=0; k<ncols; k++) {
3185: if ((thecol=cmap[cols[k]])) {
3186: if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3187: else olen++;
3188: }
3189: }
3190: o_nz[j]=olen;
3191: d_nz[j]=dlen;
3192: } else d_nz[j]=o_nz[j]=0;
3193: }
3194: /* Update lens from offproc data and done waits */
3195: /* this will be much simplier after sending only appropriate columns */
3196: for (j=0; j<nrqs;j++) {
3197: MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3198: proc = pa[i];
3199: sbuf1_i = sbuf1[proc];
3200: cidx = 0;
3201: rbuf2_i = rbuf2[i];
3202: rbuf3_i = rbuf3[i];
3203: kmax = sbuf1_i[0]; /*num of rq. rows*/
3204: for (k=1; k<=kmax; k++) {
3205: row = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3206: for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3207: if ((thecol=cmap[rbuf3_i[cidx]])) {
3208:
3209: if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3210: else o_nz[row]++;
3211: }
3212: }
3213: }
3214: }
3215: }
3216: PetscFree(r_status3);
3217: PetscFree(r_waits3);
3218: MPI_Waitall(nrqr,s_waits3,s_status3);
3219: PetscFree(s_status3);
3220: PetscFree(s_waits3);
3222: if (scall == MAT_INITIAL_MATRIX) {
3223: MatCreate(comm,nrow,nlocal,PETSC_DECIDE,ncol,submat);
3224: MatSetType(*submat,C->type_name);
3225: MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3226: mat=(Mat_MPIAIJ *)((*submat)->data);
3227: matA=(Mat_SeqAIJ *)(mat->A->data);
3228: matB=(Mat_SeqAIJ *)(mat->B->data);
3229:
3230: } else {
3231: PetscTruth same;
3232: /* folowing code can be optionaly dropped for debuged versions of users
3233: * program, but I don't know PETSc option which can switch off such safety
3234: * tests - in a same way counting of o_nz,d_nz can be droped for REUSE
3235: * matrix */
3236:
3237: PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3238: if (same == PETSC_FALSE) {
3239: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3240: }
3241: if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3242: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3243: }
3244: mat=(Mat_MPIAIJ *)((*submat)->data);
3245: matA=(Mat_SeqAIJ *)(mat->A->data);
3246: matB=(Mat_SeqAIJ *)(mat->B->data);
3247: PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3248: if (same == PETSC_FALSE) {
3249: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3250: }
3251: PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3252: if (same == PETSC_FALSE) {
3253: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3254: }
3255: /* Initial matrix as if empty */
3256: PetscMemzero(matA->ilen,nrow*sizeof(int));
3257: PetscMemzero(matB->ilen,nrow*sizeof(int));
3258: /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3259: * delete all possibly nonactual inforamtion */
3260: /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3261: * copy some factor information - where are thay */
3262: (*submat)->was_assembled=PETSC_FALSE;
3263: (*submat)->assembled=PETSC_FALSE;
3264:
3265: }
3266: PetscFree(d_nz);
3268: /* Assemble the matrix */
3269: /* First assemble from local rows */
3270: {
3271: int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3272: FLOAT *vals;
3273: PetscScalar *matA_a,*matB_a;
3274:
3275: for (j=0; j<nrow; j++) {
3276: oldrow = irow[j];
3277: proc = rtable[oldrow];
3278: if (proc == rank) {
3279: row = rmap[oldrow];
3280:
3281: Arow = A->rows[oldrow-c->rstart];
3282: ncols = Arow->length;
3283: cols = Arow->col;
3284: vals = Arow->nz;
3285:
3286: i_row = matA->i[row];
3287: matA_a = matA->a + i_row;
3288: matA_j = matA->j + i_row;
3289: i_row = matB->i[row];
3290: matB_a = matB->a + i_row;
3291: matB_j = matB->j + i_row;
3292: for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3293: if ((tcol = cmap[cols[k]])) {
3294: if (tcol<=cstart) {
3295: *matB_j++ = tcol-1;
3296: *matB_a++ = vals[k];
3297: ilenB++;
3298: } else if (tcol<=cend) {
3299: *matA_j++ = (tcol-1)-cstart;
3300: *matA_a++ = (PetscScalar)(vals[k]);
3301: ilenA++;
3302: } else {
3303: *matB_j++ = tcol-1;
3304: *matB_a++ = vals[k];
3305: ilenB++;
3306: }
3307: }
3308: }
3309: matA->ilen[row]=ilenA;
3310: matB->ilen[row]=ilenB;
3311:
3312: }
3313: }
3314: }
3316: /* Now assemble the off proc rows*/
3317: {
3318: int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3319: int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3320: PetscScalar *matA_a,*matB_a;
3321: FLOAT *rbuf4_i;
3323: for (j=0; j<nrqs; j++) {
3324: MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3325: proc = pa[i];
3326: sbuf1_i = sbuf1[proc];
3327:
3328: cidx = 0;
3329: rbuf2_i = rbuf2[i];
3330: rbuf3_i = rbuf3[i];
3331: rbuf4_i = rbuf4[i];
3332: kmax = sbuf1_i[0];
3333: for (k=1; k<=kmax; k++) {
3334: row = rmap[sbuf1_i[k]];
3335:
3336: i_row = matA->i[row];
3337: matA_a = matA->a + i_row;
3338: matA_j = matA->j + i_row;
3339: i_row = matB->i[row];
3340: matB_a = matB->a + i_row;
3341: matB_j = matB->j + i_row;
3342:
3343: lmax = rbuf2_i[k];
3344: for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3345: if ((tcol = cmap[rbuf3_i[cidx]])) {
3346: if (tcol<=cstart) {
3347: *matB_j++ = tcol-1;
3348: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3349: ilenB++;
3350: } else if (tcol<=cend) {
3351: *matA_j++ = (tcol-1)-cstart;
3352: *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3353: ilenA++;
3354: } else {
3355: *matB_j++ = tcol-1;
3356: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3357: ilenB++;
3358: }
3359: }
3360: }
3361: matA->ilen[row]=ilenA;
3362: matB->ilen[row]=ilenB;
3363: }
3364: }
3365: }
3367: PetscFree(r_status4);
3368: PetscFree(r_waits4);
3369: MPI_Waitall(nrqr,s_waits4,s_status4);
3370: PetscFree(s_waits4);
3371: PetscFree(s_status4);
3373: /* Restore the indices */
3374: ISRestoreIndices(isrow,&irow);
3375: ISRestoreIndices(iscol,&icol);
3377: /* Destroy allocated memory */
3378: PetscFree(rtable);
3379: PetscFree(w1);
3380: PetscFree(pa);
3382: PetscFree(sbuf1);
3383: PetscFree(rbuf2[0]);
3384: PetscFree(rbuf2);
3385: for (i=0; i<nrqr; ++i) {
3386: PetscFree(sbuf2[i]);
3387: }
3388: for (i=0; i<nrqs; ++i) {
3389: PetscFree(rbuf3[i]);
3390: PetscFree(rbuf4[i]);
3391: }
3393: PetscFree(sbuf2);
3394: PetscFree(rbuf3);
3395: PetscFree(rbuf4);
3396: PetscFree(sbuf3[0]);
3397: PetscFree(sbuf3);
3398: PetscFree(sbuf4[0]);
3399: PetscFree(sbuf4);
3400:
3401: PetscFree(cmap);
3402: PetscFree(rmap);
3405: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3406: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3409: return(0);
3410: }