Actual source code: mpirowbs.c

  1: /* $Id: mpirowbs.c,v 2.9 2001/08/10 03:30:53 bsmith Exp $*/

 3:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

  5: #define CHUNCKSIZE_LOCAL   10

  9: static int MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
 10: {

 14:   if (v) {
 15: #if defined(PETSC_USE_LOG)
 16:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 17: #endif
 18:     PetscFree(v);
 19:     PetscLogObjectMemory(A,len);
 20:   }
 21:   return(0);
 22: }

 26: static int MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 27: {
 28:   int len,ierr;

 31:   if (!n) {
 32:     *i = 0; *v = 0;
 33:   } else {
 34:     len = n*(sizeof(int) + sizeof(PetscScalar));
 35:     PetscMalloc(len,v);
 36:     PetscLogObjectMemory(A,len);
 37:     *i = (int *)(*v + n);
 38:   }
 39:   return(0);
 40: }

 44: int MatScale_MPIRowbs(const PetscScalar *alphain,Mat inA)
 45: {
 46:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
 47:   BSspmat      *A = a->A;
 48:   BSsprow      *vs;
 49:   PetscScalar  *ap,alpha = *alphain;
 50:   int          i,m = inA->m,nrow,j;

 53:   for (i=0; i<m; i++) {
 54:     vs   = A->rows[i];
 55:     nrow = vs->length;
 56:     ap   = vs->nz;
 57:     for (j=0; j<nrow; j++) {
 58:       ap[j] *= alpha;
 59:     }
 60:   }
 61:   PetscLogFlops(a->nz);
 62:   return(0);
 63: }

 65: /* ----------------------------------------------------------------- */
 68: static int MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
 69: {
 70:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 71:   int          ierr,i,len,m = A->m,*tnnz;
 72:   BSspmat      *bsmat;
 73:   BSsprow      *vs;

 76:   PetscMalloc((m+1)*sizeof(int),&tnnz);
 77:   if (!nnz) {
 78:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 79:     if (nz <= 0)             nz = 1;
 80:     for (i=0; i<m; i++) tnnz[i] = nz;
 81:     nz      = nz*m;
 82:   } else {
 83:     nz = 0;
 84:     for (i=0; i<m; i++) {
 85:       if (nnz[i] <= 0) tnnz[i] = 1;
 86:       else             tnnz[i] = nnz[i];
 87:       nz += tnnz[i];
 88:     }
 89:   }

 91:   /* Allocate BlockSolve matrix context */
 92:   PetscNew(BSspmat,&bsif->A);
 93:   bsmat = bsif->A;
 94:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 95:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 96:   len                    = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
 97:   PetscMalloc(len,&bsmat->rows);
 98:   bsmat->num_rows        = m;
 99:   bsmat->global_num_rows = A->M;
100:   bsmat->map             = bsif->bsmap;
101:   vs                     = (BSsprow*)(bsmat->rows + m);
102:   for (i=0; i<m; i++) {
103:     bsmat->rows[i]  = vs;
104:     bsif->imax[i]   = tnnz[i];
105:     vs->diag_ind    = -1;
106:     MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
107:     /* put zero on diagonal */
108:     /*vs->length            = 1;
109:     vs->col[0]      = i + bsif->rstart;
110:     vs->nz[0]       = 0.0;*/
111:     vs->length = 0;
112:     vs++;
113:   }
114:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
115:   bsif->nz               = 0;
116:   bsif->maxnz            = nz;
117:   bsif->sorted           = 0;
118:   bsif->roworiented      = PETSC_TRUE;
119:   bsif->nonew            = 0;
120:   bsif->bs_color_single  = 0;

122:   PetscFree(tnnz);
123:   return(0);
124: }

128: static int MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
129: {
130:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
131:   BSspmat      *A = mat->A;
132:   BSsprow      *vs;
133:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax,ierr;
134:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
135:   PetscScalar  *ap,value;

138:   for (k=0; k<m; k++) { /* loop over added rows */
139:     row = im[k];
140:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
141:     if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
142:     vs   = A->rows[row];
143:     ap   = vs->nz; rp = vs->col;
144:     rmax = imax[row]; nrow = vs->length;
145:     a    = 0;
146:     for (l=0; l<n; l++) { /* loop over added columns */
147:       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
148:       if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
149:       col = in[l]; value = *v++;
150:       if (!sorted) a = 0; b = nrow;
151:       while (b-a > 5) {
152:         t = (b+a)/2;
153:         if (rp[t] > col) b = t;
154:         else             a = t;
155:       }
156:       for (i=a; i<b; i++) {
157:         if (rp[i] > col) break;
158:         if (rp[i] == col) {
159:           if (addv == ADD_VALUES) ap[i] += value;
160:           else                    ap[i] = value;
161:           goto noinsert;
162:         }
163:       }
164:       if (nonew) goto noinsert;
165:       if (nrow >= rmax) {
166:         /* there is no extra room in row, therefore enlarge */
167:         int    *itemp,*iout,*iin = vs->col;
168:         PetscScalar *vout,*vin = vs->nz,*vtemp;

170:         /* malloc new storage space */
171:         imax[row] += CHUNCKSIZE_LOCAL;
172:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
173:         vout = vtemp; iout = itemp;
174:         for (ii=0; ii<i; ii++) {
175:           vout[ii] = vin[ii];
176:           iout[ii] = iin[ii];
177:         }
178:         vout[i] = value;
179:         iout[i] = col;
180:         for (ii=i+1; ii<=nrow; ii++) {
181:           vout[ii] = vin[ii-1];
182:           iout[ii] = iin[ii-1];
183:         }
184:         /* free old row storage */
185:         if (rmax > 0) {
186:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
187:         }
188:         vs->col           =  iout; vs->nz = vout;
189:         rmax              =  imax[row];
190:         mat->maxnz        += CHUNCKSIZE_LOCAL;
191:         mat->reallocs++;
192:       } else {
193:         /* shift higher columns over to make room for newie */
194:         for (ii=nrow-1; ii>=i; ii--) {
195:           rp[ii+1] = rp[ii];
196:           ap[ii+1] = ap[ii];
197:         }
198:         rp[i] = col;
199:         ap[i] = value;
200:       }
201:       nrow++;
202:       mat->nz++;
203:       AA->same_nonzero = PETSC_FALSE;
204:       noinsert:;
205:       a = i + 1;
206:     }
207:     vs->length = nrow;
208:   }
209:   return(0);
210: }


215: static int MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
216: {
218:   return(0);
219: }

223: static int MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
224: {
225:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
226:   BSspmat      *A = a->A;
227:   BSsprow      *vs;
228:   int          i,j,rstart = a->rstart;

231:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

233:   /* Mark location of diagonal */
234:   for (i=0; i<AA->m; i++) {
235:     vs = A->rows[i];
236:     for (j=0; j<vs->length; j++) {
237:       if (vs->col[j] == i + rstart) {
238:         vs->diag_ind = j;
239:         break;
240:       }
241:     }
242:     if (vs->diag_ind == -1) {
243:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
244:     }
245:   }
246:   return(0);
247: }

251: static int MatZeroRows_MPIRowbs_local(Mat A,IS is,const PetscScalar *diag)
252: {
253:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
254:   BSspmat      *l = a->A;
255:   int          i,ierr,N,*rz,m = A->m - 1,col,base=a->rowners[a->rank];

258:   ISGetLocalSize(is,&N);
259:   ISGetIndices(is,&rz);
260:   if (a->keepzeroedrows) {
261:     for (i=0; i<N; i++) {
262:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
263:       PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
264:       if (diag) {
265:         col=rz[i]+base;
266:         MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
267:       }
268:     }
269:   } else {
270:     if (diag) {
271:       for (i=0; i<N; i++) {
272:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
273:         if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
274:           l->rows[rz[i]]->length = 1;
275:           l->rows[rz[i]]->nz[0]  = *diag;
276:           l->rows[rz[i]]->col[0] = a->rstart + rz[i];
277:         } else {
278:           col=rz[i]+base;
279:           MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,diag,INSERT_VALUES);
280:         }
281:       }
282:     } else {
283:       for (i=0; i<N; i++) {
284:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
285:         l->rows[rz[i]]->length = 0;
286:       }
287:     }
288:     A->same_nonzero = PETSC_FALSE;
289:   }
290:   ISRestoreIndices(is,&rz);
291:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
292:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
293:   return(0);
294: }

298: static int MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
299: {
300:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
301:   BSsprow      *vs,**rs;
302:   PetscScalar  *xv;
303:   PetscReal    sum = 0.0;
304:   int          *xi,nz,i,j,ierr;

307:   rs = mat->A->rows;
308:   if (type == NORM_FROBENIUS) {
309:     for (i=0; i<A->m; i++) {
310:       vs = *rs++;
311:       nz = vs->length;
312:       xv = vs->nz;
313:       while (nz--) {
314: #if defined(PETSC_USE_COMPLEX)
315:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
316: #else
317:         sum += (*xv)*(*xv); xv++;
318: #endif
319:       }
320:     }
321:     *norm = sqrt(sum);
322:   } else if (type == NORM_1) { /* max column norm */
323:     PetscReal *tmp;
324:     PetscMalloc(A->n*sizeof(PetscReal),&tmp);
325:     PetscMemzero(tmp,A->n*sizeof(PetscReal));
326:     *norm = 0.0;
327:     for (i=0; i<A->m; i++) {
328:       vs = *rs++;
329:       nz = vs->length;
330:       xi = vs->col;
331:       xv = vs->nz;
332:       while (nz--) {
333:         tmp[*xi] += PetscAbsScalar(*xv);
334:         xi++; xv++;
335:       }
336:     }
337:     for (j=0; j<A->n; j++) {
338:       if (tmp[j] > *norm) *norm = tmp[j];
339:     }
340:     PetscFree(tmp);
341:   } else if (type == NORM_INFINITY) { /* max row norm */
342:     *norm = 0.0;
343:     for (i=0; i<A->m; i++) {
344:       vs = *rs++;
345:       nz = vs->length;
346:       xv = vs->nz;
347:       sum = 0.0;
348:       while (nz--) {
349:         sum += PetscAbsScalar(*xv); xv++;
350:       }
351:       if (sum > *norm) *norm = sum;
352:     }
353:   } else {
354:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
355:   }
356:   return(0);
357: }

359: /* ----------------------------------------------------------------- */

363: int MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
364: {
365:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
366:   int          ierr,i,j,row,col,rstart = a->rstart,rend = a->rend;
367:   PetscTruth   roworiented = a->roworiented;

370:   /* Note:  There's no need to "unscale" the matrix, since scaling is
371:      confined to a->pA, and we're working with a->A here */
372:   for (i=0; i<m; i++) {
373:     if (im[i] < 0) continue;
374:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
375:     if (im[i] >= rstart && im[i] < rend) {
376:       row = im[i] - rstart;
377:       for (j=0; j<n; j++) {
378:         if (in[j] < 0) continue;
379:         if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
380:         if (in[j] >= 0 && in[j] < mat->N){
381:           col = in[j];
382:           if (roworiented) {
383:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
384:           } else {
385:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
386:           }
387:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
388:       }
389:     } else {
390:       if (!a->donotstash) {
391:         if (roworiented) {
392:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
393:         } else {
394:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
395:         }
396:       }
397:     }
398:   }
399:   return(0);
400: }

404: int MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
405: {
406:   Mat_MPIRowbs  *a = (Mat_MPIRowbs*)mat->data;
407:   MPI_Comm      comm = mat->comm;
408:   int           ierr,nstash,reallocs;
409:   InsertMode    addv;

412:   /* Note:  There's no need to "unscale" the matrix, since scaling is
413:             confined to a->pA, and we're working with a->A here */

415:   /* make sure all processors are either in INSERTMODE or ADDMODE */
416:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
417:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
418:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
419:   }
420:   mat->insertmode = addv; /* in case this processor had no cache */

422:   MatStashScatterBegin_Private(&mat->stash,a->rowners);
423:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
424:   PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
425:   return(0);
426: }

428:  #include petscviewer.h

432: static int MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
433: {
434:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
435:   int               ierr,i,j;
436:   PetscTruth        isascii;
437:   BSspmat           *A = a->A;
438:   BSsprow           **rs = A->rows;
439:   PetscViewerFormat format;

442:   PetscViewerGetFormat(viewer,&format);
443:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);

445:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
446:     int ind_l,ind_g,clq_l,clq_g,color;
447:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
448:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
449:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
450:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
451:     color = BSnum_colors(a->pA);CHKERRBS(0);
452:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
453:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
454:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
455:     for (i=0; i<A->num_rows; i++) {
456:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
457:       for (j=0; j<rs[i]->length; j++) {
458:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
459:       }
460:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
461:     }
462:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
463:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
464:   } else {
465:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
466:     for (i=0; i<A->num_rows; i++) {
467:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
468:       for (j=0; j<rs[i]->length; j++) {
469:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
470:       }
471:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
472:     }
473:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
474:   }
475:   PetscViewerFlush(viewer);
476:   return(0);
477: }

481: static int MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
482: {
483:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
484:   int          ierr,i,M,m,rank,size,*sbuff,*rowlengths;
485:   int          *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
486:   BSspmat      *A = a->A;
487:   BSsprow      **rs = A->rows;
488:   MPI_Comm     comm = mat->comm;
489:   MPI_Status   status;
490:   PetscScalar  *vals;
491:   MatInfo      info;

494:   MPI_Comm_size(comm,&size);
495:   MPI_Comm_rank(comm,&rank);

497:   M = mat->M; m = mat->m;
498:   /* First gather together on the first processor the lengths of 
499:      each row, and write them out to the file */
500:   PetscMalloc(m*sizeof(int),&sbuff);
501:   for (i=0; i<A->num_rows; i++) {
502:     sbuff[i] = rs[i]->length;
503:   }
504:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
505:   if (!rank) {
506:     PetscViewerBinaryGetDescriptor(viewer,&fd);
507:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
508:     PetscMalloc(size*sizeof(int),&recvcts);
509:     recvdisp = a->rowners;
510:     for (i=0; i<size; i++) {
511:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
512:     }
513:     /* first four elements of rowlength are the header */
514:     rowlengths[0] = mat->cookie;
515:     rowlengths[1] = mat->M;
516:     rowlengths[2] = mat->N;
517:     rowlengths[3] = (int)info.nz_used;
518:     MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
519:     PetscFree(sbuff);
520:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,0);
521:     /* count the number of nonzeros on each processor */
522:     PetscMemzero(recvcts,size*sizeof(int));
523:     for (i=0; i<size; i++) {
524:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
525:         recvcts[i] += rowlengths[j+3];
526:       }
527:     }
528:     /* allocate buffer long enough to hold largest one */
529:     maxnz = 0;
530:     for (i=0; i<size; i++) {
531:       maxnz = PetscMax(maxnz,recvcts[i]);
532:     }
533:     PetscFree(rowlengths);
534:     PetscFree(recvcts);
535:     PetscMalloc(maxnz*sizeof(int),&cols);

537:     /* binary store column indices for 0th processor */
538:     nz = 0;
539:     for (i=0; i<A->num_rows; i++) {
540:       for (j=0; j<rs[i]->length; j++) {
541:         cols[nz++] = rs[i]->col[j];
542:       }
543:     }
544:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);

546:     /* receive and store column indices for all other processors */
547:     for (i=1; i<size; i++) {
548:       /* should tell processor that I am now ready and to begin the send */
549:       MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
550:       MPI_Get_count(&status,MPI_INT,&nz);
551:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
552:     }
553:     PetscFree(cols);
554:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

556:     /* binary store values for 0th processor */
557:     nz = 0;
558:     for (i=0; i<A->num_rows; i++) {
559:       for (j=0; j<rs[i]->length; j++) {
560:         vals[nz++] = rs[i]->nz[j];
561:       }
562:     }
563:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);

565:     /* receive and store nonzeros for all other processors */
566:     for (i=1; i<size; i++) {
567:       /* should tell processor that I am now ready and to begin the send */
568:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
569:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
570:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
571:     }
572:     PetscFree(vals);
573:   } else {
574:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
575:     PetscFree(sbuff);

577:     /* count local nonzeros */
578:     nz = 0;
579:     for (i=0; i<A->num_rows; i++) {
580:       for (j=0; j<rs[i]->length; j++) {
581:         nz++;
582:       }
583:     }
584:     /* copy into buffer column indices */
585:     PetscMalloc(nz*sizeof(int),&cols);
586:     nz = 0;
587:     for (i=0; i<A->num_rows; i++) {
588:       for (j=0; j<rs[i]->length; j++) {
589:         cols[nz++] = rs[i]->col[j];
590:       }
591:     }
592:     /* send */  /* should wait until processor zero tells me to go */
593:     MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
594:     PetscFree(cols);

596:     /* copy into buffer column values */
597:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
598:     nz   = 0;
599:     for (i=0; i<A->num_rows; i++) {
600:       for (j=0; j<rs[i]->length; j++) {
601:         vals[nz++] = rs[i]->nz[j];
602:       }
603:     }
604:     /* send */  /* should wait until processor zero tells me to go */
605:     MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
606:     PetscFree(vals);
607:   }

609:   return(0);
610: }

614: int MatView_MPIRowbs(Mat mat,PetscViewer viewer)
615: {
616:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
617:   int          ierr;
618:   PetscTruth   isascii,isbinary;

621:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
622:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
623:   if (!bsif->blocksolveassembly) {
624:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
625:   }
626:   if (isascii) {
627:     MatView_MPIRowbs_ASCII(mat,viewer);
628:   } else if (isbinary) {
629:     MatView_MPIRowbs_Binary(mat,viewer);
630:   } else {
631:     SETERRQ1(1,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
632:   }
633:   return(0);
634: }
635: 
638: static int MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
639: {
640:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
641:   BSspmat      *A = a->A;
642:   BSsprow      *vs;
643:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
644:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
645:   int          ctr_j,*sbuf1_j,k,ierr;
646:   PetscScalar  val=0.0;
647:   MPI_Comm     comm;
648:   MPI_Request  *s_waits1,*r_waits1;
649:   MPI_Status   *s_status,*r_status;

652:   comm   = mat->comm;
653:   tag    = mat->tag;
654:   size   = a->size;
655:   rank   = a->rank;
656:   M      = mat->M;
657:   rstart = a->rstart;

659:   PetscMalloc(M*sizeof(int),&rtable);
660:   /* Create hash table for the mapping :row -> proc */
661:   for (i=0,j=0; i<size; i++) {
662:     len = a->rowners[i+1];
663:     for (; j<len; j++) {
664:       rtable[j] = i;
665:     }
666:   }

668:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
669:      required. Based on this, buffers are allocated, and data copied into them. */
670:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
671:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
672:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
673:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

675:   for (i=0;  i<mat->m; i++) {
676:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
677:     vs = A->rows[i];
678:     for (j=0; j<vs->length; j++) {
679:       proc = rtable[vs->col[j]];
680:       w4[proc]++;
681:     }
682:     for (j=0; j<size; j++) {
683:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
684:     }
685:   }
686: 
687:   nrqs       = 0;              /* number of outgoing messages */
688:   msz        = 0;              /* total mesg length (for all proc */
689:   w1[2*rank] = 0;              /* no mesg sent to itself */
690:   w3[rank]   = 0;
691:   for (i=0; i<size; i++) {
692:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
693:   }
694:   /* pa - is list of processors to communicate with */
695:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
696:   for (i=0,j=0; i<size; i++) {
697:     if (w1[2*i]) {pa[j] = i; j++;}
698:   }

700:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
701:   for (i=0; i<nrqs; i++) {
702:     j       = pa[i];
703:     w1[2*j] += w1[2*j+1] + 2*w3[j];
704:     msz     += w1[2*j];
705:   }
706: 
707:   /* Do a global reduction to determine how many messages to expect */
708:   PetscMaxSum(comm,w1,&bsz,&nrqr);

710:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
711:   len      = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
712:   PetscMalloc(len,&rbuf1);
713:   rbuf1[0] = (int*)(rbuf1 + nrqr);
714:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

716:   /* Post the receives */
717:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
718:   for (i=0; i<nrqr; ++i){
719:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
720:   }
721: 
722:   /* Allocate Memory for outgoing messages */
723:   len   = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
724:   PetscMalloc(len,&sbuf1);
725:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
726:   PetscMemzero(sbuf1,2*size*sizeof(int*));
727:   tmp   = (int*)(sbuf1 + 2*size);
728:   ctr   = tmp + msz;

730:   {
731:     int *iptr = tmp,ict  = 0;
732:     for (i=0; i<nrqs; i++) {
733:       j        = pa[i];
734:       iptr    += ict;
735:       sbuf1[j] = iptr;
736:       ict      = w1[2*j];
737:     }
738:   }

740:   /* Form the outgoing messages */
741:   /* Clean up the header space */
742:   for (i=0; i<nrqs; i++) {
743:     j           = pa[i];
744:     sbuf1[j][0] = 0;
745:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
746:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
747:   }

749:   /* Parse the matrix and copy the data into sbuf1 */
750:   for (i=0; i<mat->m; i++) {
751:     PetscMemzero(ctr,size*sizeof(int));
752:     vs = A->rows[i];
753:     for (j=0; j<vs->length; j++) {
754:       col  = vs->col[j];
755:       proc = rtable[col];
756:       if (proc != rank) { /* copy to the outgoing buffer */
757:         ctr[proc]++;
758:           *ptr[proc] = col;
759:           ptr[proc]++;
760:       } else {
761:         row = col - rstart;
762:         col = i + rstart;
763:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
764:       }
765:     }
766:     /* Update the headers for the current row */
767:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
768:       if ((ctr_j = ctr[j])) {
769:         sbuf1_j        = sbuf1[j];
770:         k               = ++sbuf1_j[0];
771:         sbuf1_j[2*k]   = ctr_j;
772:         sbuf1_j[2*k-1] = i + rstart;
773:       }
774:     }
775:   }
776:    /* Check Validity of the outgoing messages */
777:   {
778:     int sum;
779:     for (i=0 ; i<nrqs ; i++) {
780:       j = pa[i];
781:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
782:     }

784:     for (i=0 ; i<nrqs ; i++) {
785:       j = pa[i];
786:       sum = 1;
787:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
788:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
789:     }
790:   }
791: 
792:   /* Now post the sends */
793:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
794:   for (i=0; i<nrqs; ++i) {
795:     j    = pa[i];
796:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
797:   }
798: 
799:   /* Receive messages*/
800:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
801:   for (i=0; i<nrqr; ++i) {
802:     MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
803:     /* Process the Message */
804:     {
805:       int    *rbuf1_i,n_row,ct1;

807:       rbuf1_i = rbuf1[idx];
808:       n_row   = rbuf1_i[0];
809:       ct1     = 2*n_row+1;
810:       val     = 0.0;
811:       /* Optimise this later */
812:       for (j=1; j<=n_row; j++) {
813:         col = rbuf1_i[2*j-1];
814:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
815:           row = rbuf1_i[ct1] - rstart;
816:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
817:         }
818:       }
819:     }
820:   }

822:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
823:   MPI_Waitall(nrqs,s_waits1,s_status);

825:   PetscFree(rtable);
826:   PetscFree(w1);
827:   PetscFree(pa);
828:   PetscFree(rbuf1);
829:   PetscFree(sbuf1);
830:   PetscFree(r_waits1);
831:   PetscFree(s_waits1);
832:   PetscFree(r_status);
833:   PetscFree(s_status);
834:   return(0);
835: }

837: /*
838:      This does the BlockSolve portion of the matrix assembly.
839:    It is provided in a seperate routine so that users can
840:    operate on the matrix (using MatScale(), MatShift() etc.) after 
841:    the matrix has been assembled but before BlockSolve has sucked it
842:    in and devoured it.
843: */
846: int MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
847: {
848:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
849:   int          ierr,ldim,low,high,i;
850:   PetscScalar  *diag;

853:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
854:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
855:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
856:   }

858:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
859:     /* Indicates bypassing cliques in coloring */
860:     if (a->bs_color_single) {
861:       BSctx_set_si(a->procinfo,100);
862:     }
863:     /* Form permuted matrix for efficient parallel execution */
864:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
865:     /* Set up the communication */
866:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
867:   } else {
868:     /* Repermute the matrix */
869:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
870:   }

872:   /* Symmetrically scale the matrix by the diagonal */
873:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

875:   /* Store inverse of square root of permuted diagonal scaling matrix */
876:   VecGetLocalSize(a->diag,&ldim);
877:   VecGetOwnershipRange(a->diag,&low,&high);
878:   VecGetArray(a->diag,&diag);
879:   for (i=0; i<ldim; i++) {
880:     if (a->pA->scale_diag[i] != 0.0) {
881:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
882:     } else {
883:       diag[i] = 1.0;
884:     }
885:   }
886:   VecRestoreArray(a->diag,&diag);
887:   a->assembled_icc_storage = a->A->icc_storage;
888:   a->blocksolveassembly = 1;
889:   mat->was_assembled    = PETSC_TRUE;
890:   mat->same_nonzero     = PETSC_TRUE;
891:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n");
892:   return(0);
893: }

897: int MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
898: {
899:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
900:   int          i,n,row,col,*rows,*cols,ierr,rstart,nzcount,flg,j,ncols;
901:   PetscScalar  *vals,val;
902:   InsertMode   addv = mat->insertmode;

905:   while (1) {
906:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
907:     if (!flg) break;
908: 
909:     for (i=0; i<n;) {
910:       /* Now identify the consecutive vals belonging to the same row */
911:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
912:       if (j < n) ncols = j-i;
913:       else       ncols = n-i;
914:       /* Now assemble all these values with a single function call */
915:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
916:       i = j;
917:     }
918:   }
919:   MatStashScatterEnd_Private(&mat->stash);

921:   rstart = a->rstart;
922:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
923:   /* BlockSolve requires that the matrix is structurally symmetric */
924:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
925:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
926:   }
927: 
928:   /* BlockSolve requires that all the diagonal elements are set */
929:   val  = 0.0;
930:   for (i=0; i<mat->m; i++) {
931:     row = i; col = i + rstart;
932:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
933:   }
934: 
935:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
936:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
937: 
938:   a->blocksolveassembly = 0;
939:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz);
940:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
941:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs);
942:   return(0);
943: }

947: int MatZeroEntries_MPIRowbs(Mat mat)
948: {
949:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
950:   BSspmat      *A = l->A;
951:   BSsprow      *vs;
952:   int          i,j;

955:   for (i=0; i <mat->m; i++) {
956:     vs = A->rows[i];
957:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
958:   }
959:   return(0);
960: }

962: /* the code does not do the diagonal entries correctly unless the 
963:    matrix is square and the column and row owerships are identical.
964:    This is a BUG.
965: */

969: int MatZeroRows_MPIRowbs(Mat A,IS is,const PetscScalar *diag)
970: {
971:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
972:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
973:   int            *nprocs,j,idx,nsends;
974:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
975:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
976:   int            *lens,imdex,*lrows,*values;
977:   MPI_Comm       comm = A->comm;
978:   MPI_Request    *send_waits,*recv_waits;
979:   MPI_Status     recv_status,*send_status;
980:   IS             istmp;
981:   PetscTruth     found;

984:   ISGetLocalSize(is,&N);
985:   ISGetIndices(is,&rows);

987:   /*  first count number of contributors to each processor */
988:   PetscMalloc(2*size*sizeof(int),&nprocs);
989:   PetscMemzero(nprocs,2*size*sizeof(int));
990:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
991:   for (i=0; i<N; i++) {
992:     idx   = rows[i];
993:     found = PETSC_FALSE;
994:     for (j=0; j<size; j++) {
995:       if (idx >= owners[j] && idx < owners[j+1]) {
996:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
997:       }
998:     }
999:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1000:   }
1001:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

1003:   /* inform other processors of number of messages and max length*/
1004:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

1006:   /* post receives:   */
1007:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1008:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1009:   for (i=0; i<nrecvs; i++) {
1010:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1011:   }

1013:   /* do sends:
1014:       1) starts[i] gives the starting index in svalues for stuff going to 
1015:          the ith processor
1016:   */
1017:   PetscMalloc((N+1)*sizeof(int),&svalues);
1018:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1019:   PetscMalloc((size+1)*sizeof(int),&starts);
1020:   starts[0] = 0;
1021:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1022:   for (i=0; i<N; i++) {
1023:     svalues[starts[owner[i]]++] = rows[i];
1024:   }
1025:   ISRestoreIndices(is,&rows);

1027:   starts[0] = 0;
1028:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1029:   count = 0;
1030:   for (i=0; i<size; i++) {
1031:     if (nprocs[2*i+1]) {
1032:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1033:     }
1034:   }
1035:   PetscFree(starts);

1037:   base = owners[rank];

1039:   /*  wait on receives */
1040:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1041:   source = lens + nrecvs;
1042:   count = nrecvs; slen = 0;
1043:   while (count) {
1044:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1045:     /* unpack receives into our local space */
1046:     MPI_Get_count(&recv_status,MPI_INT,&n);
1047:     source[imdex]  = recv_status.MPI_SOURCE;
1048:     lens[imdex]    = n;
1049:     slen           += n;
1050:     count--;
1051:   }
1052:   PetscFree(recv_waits);
1053: 
1054:   /* move the data into the send scatter */
1055:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1056:   count = 0;
1057:   for (i=0; i<nrecvs; i++) {
1058:     values = rvalues + i*nmax;
1059:     for (j=0; j<lens[i]; j++) {
1060:       lrows[count++] = values[j] - base;
1061:     }
1062:   }
1063:   PetscFree(rvalues);
1064:   PetscFree(lens);
1065:   PetscFree(owner);
1066:   PetscFree(nprocs);
1067: 
1068:   /* actually zap the local rows */
1069:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1070:   PetscLogObjectParent(A,istmp);
1071:   PetscFree(lrows);
1072:   MatZeroRows_MPIRowbs_local(A,istmp,diag);
1073:   ISDestroy(istmp);

1075:   /* wait on sends */
1076:   if (nsends) {
1077:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1078:     MPI_Waitall(nsends,send_waits,send_status);
1079:     PetscFree(send_status);
1080:   }
1081:   PetscFree(send_waits);
1082:   PetscFree(svalues);

1084:   return(0);
1085: }

1089: int MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1090: {
1091:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1092:   BSsprow      *vs,**rs;
1093:   PetscScalar  *xv;
1094:   PetscReal    sum = 0.0;
1095:   int          *xi,nz,i,j,ierr;

1098:   if (a->size == 1) {
1099:     MatNorm_MPIRowbs_local(mat,type,norm);
1100:   } else {
1101:     rs = a->A->rows;
1102:     if (type == NORM_FROBENIUS) {
1103:       for (i=0; i<mat->m; i++) {
1104:         vs = *rs++;
1105:         nz = vs->length;
1106:         xv = vs->nz;
1107:         while (nz--) {
1108: #if defined(PETSC_USE_COMPLEX)
1109:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1110: #else
1111:           sum += (*xv)*(*xv); xv++;
1112: #endif
1113:         }
1114:       }
1115:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1116:       *norm = sqrt(*norm);
1117:     } else if (type == NORM_1) { /* max column norm */
1118:       PetscReal *tmp,*tmp2;
1119:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1120:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1121:       PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1122:       *norm = 0.0;
1123:       for (i=0; i<mat->m; i++) {
1124:         vs = *rs++;
1125:         nz = vs->length;
1126:         xi = vs->col;
1127:         xv = vs->nz;
1128:         while (nz--) {
1129:           tmp[*xi] += PetscAbsScalar(*xv);
1130:           xi++; xv++;
1131:         }
1132:       }
1133:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1134:       for (j=0; j<mat->n; j++) {
1135:         if (tmp2[j] > *norm) *norm = tmp2[j];
1136:       }
1137:       PetscFree(tmp);
1138:       PetscFree(tmp2);
1139:     } else if (type == NORM_INFINITY) { /* max row norm */
1140:       PetscReal ntemp = 0.0;
1141:       for (i=0; i<mat->m; i++) {
1142:         vs = *rs++;
1143:         nz = vs->length;
1144:         xv = vs->nz;
1145:         sum = 0.0;
1146:         while (nz--) {
1147:           sum += PetscAbsScalar(*xv); xv++;
1148:         }
1149:         if (sum > ntemp) ntemp = sum;
1150:       }
1151:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1152:     } else {
1153:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1154:     }
1155:   }
1156:   return(0);
1157: }

1161: int MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1162: {
1163:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1164:   BSprocinfo   *bspinfo = bsif->procinfo;
1165:   PetscScalar  *xxa,*xworka,*yya;
1166:   int          ierr;

1169:   if (!bsif->blocksolveassembly) {
1170:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1171:   }

1173:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1174:   if (!bsif->vecs_permscale) {
1175:     VecGetArray(bsif->xwork,&xworka);
1176:     VecGetArray(xx,&xxa);
1177:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1178:     VecRestoreArray(bsif->xwork,&xworka);
1179:     VecRestoreArray(xx,&xxa);
1180:     VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1181:   }

1183:   VecGetArray(xx,&xxa);
1184:   VecGetArray(yy,&yya);
1185:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1186:   if (bspinfo->single) {
1187:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1188:   }  else {
1189:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190:   }
1191: 
1192:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1193:   if (mat->symmetric) {
1194:     if (bspinfo->single){
1195:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1196:     } else {
1197:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1198:     }
1199:   }
1200:   /* not needed for ILU version since forward does it all */
1201:   VecRestoreArray(xx,&xxa);
1202:   VecRestoreArray(yy,&yya);

1204:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1205:   if (!bsif->vecs_permscale) {
1206:     VecGetArray(bsif->xwork,&xworka);
1207:     VecGetArray(xx,&xxa);
1208:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1209:     VecRestoreArray(bsif->xwork,&xworka);
1210:     VecRestoreArray(xx,&xxa);
1211:     VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1212:     VecGetArray(bsif->xwork,&xworka);
1213:     VecGetArray(yy,&yya);
1214:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1215:     VecRestoreArray(bsif->xwork,&xworka);
1216:     VecRestoreArray(yy,&yya);
1217:   }
1218:   PetscLogFlops(2*bsif->nz - mat->m);

1220:   return(0);
1221: }

1225: int MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1226: {
1227:   int          ierr;
1228:   PetscScalar  one = 1.0;

1231:   (*mat->ops->mult)(mat,xx,zz);
1232:   VecAXPY(&one,yy,zz);
1233:   return(0);
1234: }

1238: int MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1239: {
1240:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1241:   PetscReal    isend[5],irecv[5];
1242:   int          ierr;

1245:   info->rows_global    = (double)A->M;
1246:   info->columns_global = (double)A->N;
1247:   info->rows_local     = (double)A->m;
1248:   info->columns_local  = (double)A->N;
1249:   info->block_size     = 1.0;
1250:   info->mallocs        = (double)mat->reallocs;
1251:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1252:   isend[3] = A->mem;  isend[4] = info->mallocs;

1254:   if (flag == MAT_LOCAL) {
1255:     info->nz_used      = isend[0];
1256:     info->nz_allocated = isend[1];
1257:     info->nz_unneeded  = isend[2];
1258:     info->memory       = isend[3];
1259:     info->mallocs      = isend[4];
1260:   } else if (flag == MAT_GLOBAL_MAX) {
1261:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1262:     info->nz_used      = irecv[0];
1263:     info->nz_allocated = irecv[1];
1264:     info->nz_unneeded  = irecv[2];
1265:     info->memory       = irecv[3];
1266:     info->mallocs      = irecv[4];
1267:   } else if (flag == MAT_GLOBAL_SUM) {
1268:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1269:     info->nz_used      = irecv[0];
1270:     info->nz_allocated = irecv[1];
1271:     info->nz_unneeded  = irecv[2];
1272:     info->memory       = irecv[3];
1273:     info->mallocs      = irecv[4];
1274:   }
1275:   return(0);
1276: }

1280: int MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1281: {
1282:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1283:   BSsprow      **rs = a->A->rows;
1284:   int          i,n,ierr;
1285:   PetscScalar  *x,zero = 0.0;

1288:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1289:   if (!a->blocksolveassembly) {
1290:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1291:   }

1293:   VecSet(&zero,v);
1294:   VecGetLocalSize(v,&n);
1295:   if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1296:   VecGetArray(v,&x);
1297:   for (i=0; i<mat->m; i++) {
1298:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1299:   }
1300:   VecRestoreArray(v,&x);
1301:   return(0);
1302: }

1306: int MatDestroy_MPIRowbs(Mat mat)
1307: {
1308:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1309:   BSspmat      *A = a->A;
1310:   BSsprow      *vs;
1311:   int          i,ierr;

1314: #if defined(PETSC_USE_LOG)
1315:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1316: #endif
1317:   PetscFree(a->rowners);
1318:   MatStashDestroy_Private(&mat->stash);
1319:   if (a->bsmap) {
1320:     if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1321:     if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1322:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1323:     PetscFree(a->bsmap);
1324:   }

1326:   if (A) {
1327:     for (i=0; i<mat->m; i++) {
1328:       vs = A->rows[i];
1329:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1330:     }
1331:     /* Note: A->map = a->bsmap is freed above */
1332:     PetscFree(A->rows);
1333:     PetscFree(A);
1334:   }
1335:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1336:   if (a->diag)     {VecDestroy(a->diag);}
1337:   if (a->xwork)    {VecDestroy(a->xwork);}
1338:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1339:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1340:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1341:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1342:   if (a->imax)     {PetscFree(a->imax);}
1343:   MPI_Comm_free(&(a->comm_mpirowbs));
1344:   PetscFree(a);
1345:   return(0);
1346: }

1350: int MatSetOption_MPIRowbs(Mat A,MatOption op)
1351: {
1352:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;

1355:   switch (op) {
1356:   case MAT_ROW_ORIENTED:
1357:     a->roworiented = PETSC_TRUE;
1358:     break;
1359:   case MAT_COLUMN_ORIENTED:
1360:     a->roworiented = PETSC_FALSE;
1361:     break;
1362:   case MAT_COLUMNS_SORTED:
1363:     a->sorted      = 1;
1364:     break;
1365:   case MAT_COLUMNS_UNSORTED:
1366:     a->sorted      = 0;
1367:     break;
1368:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1369:     a->nonew       = 1;
1370:     break;
1371:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1372:     a->nonew       = 0;
1373:     break;
1374:   case MAT_DO_NOT_USE_INODES:
1375:     a->bs_color_single = 1;
1376:     break;
1377:   case MAT_YES_NEW_DIAGONALS:
1378:   case MAT_ROWS_SORTED:
1379:   case MAT_NEW_NONZERO_LOCATION_ERR:
1380:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381:   case MAT_ROWS_UNSORTED:
1382:   case MAT_USE_HASH_TABLE:
1383:     PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignored\n");
1384:     break;
1385:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1386:     a->donotstash = PETSC_TRUE;
1387:     break;
1388:   case MAT_NO_NEW_DIAGONALS:
1389:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1390:     break;
1391:   case MAT_KEEP_ZEROED_ROWS:
1392:     a->keepzeroedrows    = PETSC_TRUE;
1393:     break;
1394:   case MAT_SYMMETRIC:
1395:     BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1396:     break;
1397:   case MAT_STRUCTURALLY_SYMMETRIC:
1398:   case MAT_NOT_SYMMETRIC:
1399:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1400:   case MAT_HERMITIAN:
1401:   case MAT_NOT_HERMITIAN:
1402:   case MAT_SYMMETRY_ETERNAL:
1403:   case MAT_NOT_SYMMETRY_ETERNAL:
1404:     break;
1405:   default:
1406:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1407:     break;
1408:   }
1409:   return(0);
1410: }

1414: int MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1415: {
1416:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1417:   BSspmat      *A = mat->A;
1418:   BSsprow      *rs;
1419: 
1421:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1423:   rs  = A->rows[row - mat->rstart];
1424:   *nz = rs->length;
1425:   if (v)   *v   = rs->nz;
1426:   if (idx) *idx = rs->col;
1427:   return(0);
1428: }

1432: int MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1433: {
1435:   return(0);
1436: }

1438: /* ------------------------------------------------------------------ */

1442: int MatPrintHelp_MPIRowbs(Mat A)
1443: {
1444:   static PetscTruth called = PETSC_FALSE;
1445:   MPI_Comm          comm = A->comm;
1446:   int               ierr;

1449:   if (called) {return(0);} else called = PETSC_TRUE;
1450:   (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1451:   (*PetscHelpPrintf)(comm,"  -mat_rowbs_no_inode  - Do not use inodes\n");
1452:   return(0);
1453: }

1457: int MatSetUpPreallocation_MPIRowbs(Mat A)
1458: {
1459:   int        ierr;

1462:    MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1463:   return(0);
1464: }

1466: /* -------------------------------------------------------------------*/
1467: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1468:        MatGetRow_MPIRowbs,
1469:        MatRestoreRow_MPIRowbs,
1470:        MatMult_MPIRowbs,
1471: /* 4*/ MatMultAdd_MPIRowbs,
1472:        MatMult_MPIRowbs,
1473:        MatMultAdd_MPIRowbs,
1474:        MatSolve_MPIRowbs,
1475:        0,
1476:        0,
1477: /*10*/ 0,
1478:        0,
1479:        0,
1480:        0,
1481:        0,
1482: /*15*/ MatGetInfo_MPIRowbs,
1483:        0,
1484:        MatGetDiagonal_MPIRowbs,
1485:        0,
1486:        MatNorm_MPIRowbs,
1487: /*20*/ MatAssemblyBegin_MPIRowbs,
1488:        MatAssemblyEnd_MPIRowbs,
1489:        0,
1490:        MatSetOption_MPIRowbs,
1491:        MatZeroEntries_MPIRowbs,
1492: /*25*/ MatZeroRows_MPIRowbs,
1493:        0,
1494:        MatLUFactorNumeric_MPIRowbs,
1495:        0,
1496:        MatCholeskyFactorNumeric_MPIRowbs,
1497: /*30*/ MatSetUpPreallocation_MPIRowbs,
1498:        MatILUFactorSymbolic_MPIRowbs,
1499:        MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1500:        0,
1501:        0,
1502: /*35*/ 0,
1503:        MatForwardSolve_MPIRowbs,
1504:        MatBackwardSolve_MPIRowbs,
1505:        0,
1506:        0,
1507: /*40*/ 0,
1508:        MatGetSubMatrices_MPIRowbs,
1509:        0,
1510:        0,
1511:        0,
1512: /*45*/ MatPrintHelp_MPIRowbs,
1513:        MatScale_MPIRowbs,
1514:        0,
1515:        0,
1516:        0,
1517: /*50*/ 0,
1518:        0,
1519:        0,
1520:        0,
1521:        0,
1522: /*55*/ 0,
1523:        0,
1524:        0,
1525:        0,
1526:        0,
1527: /*60*/ MatGetSubMatrix_MPIRowbs,
1528:        MatDestroy_MPIRowbs,
1529:        MatView_MPIRowbs,
1530:        MatGetPetscMaps_Petsc,
1531:        MatUseScaledForm_MPIRowbs,
1532: /*65*/ MatScaleSystem_MPIRowbs,
1533:        MatUnScaleSystem_MPIRowbs,
1534:        0,
1535:        0,
1536:        0,
1537: /*70*/ 0,
1538:        0,
1539:        0,
1540:        0,
1541:        0,
1542: /*75*/ 0,
1543:        0,
1544:        0,
1545:        0,
1546:        0,
1547: /*80*/ 0,
1548:        0,
1549:        0,
1550:        0,
1551: /*85*/ MatLoad_MPIRowbs
1552: };

1554: /* ------------------------------------------------------------------- */

1556: EXTERN_C_BEGIN
1559: int MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1560: {
1561:   int        ierr;

1564:   mat->preallocated = PETSC_TRUE;
1565:   MatCreateMPIRowbs_local(mat,nz,nnz);
1566:   return(0);
1567: }
1568: EXTERN_C_END

1570: /*MC
1571:    MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1572:    with the external package BlockSolve95.  If BlockSolve95 is installed (see the manual for instructions
1573:    on how to declare the existence of external packages), a matrix type can be constructed which invokes
1574:    BlockSolve95 preconditioners and solvers. 

1576:    Options Database Keys:
1577: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()

1579:   Level: beginner

1581: .seealso: MatCreateMPIRowbs
1582: M*/

1584: EXTERN_C_BEGIN
1587: int MatCreate_MPIRowbs(Mat A)
1588: {
1589:   Mat_MPIRowbs *a;
1590:   BSmapping    *bsmap;
1591:   BSoff_map    *bsoff;
1592:   int          i,ierr,*offset,m,M;
1593:   PetscTruth   flg1,flg2,flg3;
1594:   BSprocinfo   *bspinfo;
1595:   MPI_Comm     comm;
1596: 
1598:   comm = A->comm;
1599:   m    = A->m;
1600:   M    = A->M;

1602:   PetscNew(Mat_MPIRowbs,&a);
1603:   A->data               = (void*)a;
1604:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1605:   A->factor             = 0;
1606:   A->mapping            = 0;
1607:   a->vecs_permscale     = PETSC_FALSE;
1608:   A->insertmode         = NOT_SET_VALUES;
1609:   a->blocksolveassembly = 0;
1610:   a->keepzeroedrows     = PETSC_FALSE;

1612:   MPI_Comm_rank(comm,&a->rank);
1613:   MPI_Comm_size(comm,&a->size);

1615:   PetscSplitOwnership(comm,&m,&M);

1617:   A->N = M;
1618:   A->M = M;
1619:   A->m = m;
1620:   A->n = A->N;  /* each row stores all columns */
1621:   PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1622:   a->reallocs                      = 0;

1624:   /* the information in the maps duplicates the information computed below, eventually 
1625:      we should remove the duplicate information that is not contained in the maps */
1626:   PetscMapCreateMPI(comm,m,M,&A->rmap);
1627:   PetscMapCreateMPI(comm,m,M,&A->cmap);

1629:   /* build local table of row ownerships */
1630:   PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1631:   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1632:   a->rowners[0] = 0;
1633:   for (i=2; i<=a->size; i++) {
1634:     a->rowners[i] += a->rowners[i-1];
1635:   }
1636:   a->rstart = a->rowners[a->rank];
1637:   a->rend   = a->rowners[a->rank+1];
1638:   PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));

1640:   /* build cache for off array entries formed */
1641:   MatStashCreate_Private(A->comm,1,&A->stash);
1642:   a->donotstash = PETSC_FALSE;

1644:   /* Initialize BlockSolve information */
1645:   a->A              = 0;
1646:   a->pA              = 0;
1647:   a->comm_pA  = 0;
1648:   a->fpA      = 0;
1649:   a->comm_fpA = 0;
1650:   a->alpha    = 1.0;
1651:   a->0;
1652:   a->failures = 0;
1653:   MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1654:   VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1655:   VecDuplicate(a->diag,&(a->xwork));
1656:   PetscLogObjectParent(A,a->diag);  PetscLogObjectParent(A,a->xwork);
1657:   PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1658:   bspinfo = BScreate_ctx();CHKERRBS(0);
1659:   a->procinfo = bspinfo;
1660:   BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1661:   BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1662:   BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1663:   BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1664:   BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1665:   BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1666: #if defined(PETSC_USE_DEBUG)
1667:   BSctx_set_err(bspinfo,1);CHKERRBS(0);  /* BS error checking */
1668: #endif
1669:   BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1670:   PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1671:   if (flg1) {
1672:     BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1673:   }
1674:   PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1675:   PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1676:   PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1677:   if (flg1 || flg2 || flg3) {
1678:     BSctx_set_si(bspinfo,1);CHKERRBS(0);
1679:   } else {
1680:     BSctx_set_si(bspinfo,0);CHKERRBS(0);
1681:   }
1682: #if defined(PETSC_USE_LOG)
1683:   MLOG_INIT();  /* Initialize logging */
1684: #endif

1686:   /* Compute global offsets */
1687:   offset = &a->rstart;

1689:   PetscNew(BSmapping,&a->bsmap);
1690:   PetscLogObjectMemory(A,sizeof(BSmapping));
1691:   bsmap = a->bsmap;
1692:   PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1693:   *((int *)bsmap->vlocal2global) = (*offset);
1694:   bsmap->flocal2global                 = BSloc2glob;
1695:   bsmap->free_l2g                = 0;
1696:   PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1697:   *((int *)bsmap->vglobal2local) = (*offset);
1698:   bsmap->fglobal2local                 = BSglob2loc;
1699:   bsmap->free_g2l                 = 0;
1700:   bsoff                          = BSmake_off_map(*offset,bspinfo,A->M);
1701:   bsmap->vglobal2proc                 = (void *)bsoff;
1702:   bsmap->fglobal2proc                 = BSglob2proc;
1703:   bsmap->free_g2p                = (void(*)(void*)) BSfree_off_map;
1704:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1705:                                     "MatMPIRowbsSetPreallocation_MPIRowbs",
1706:                                      MatMPIRowbsSetPreallocation_MPIRowbs);
1707:   return(0);
1708: }
1709: EXTERN_C_END

1713: /* @
1714:   MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros 
1715:   per row in the matrix.

1717:   Input Parameter:
1718: +  mat - matrix
1719: .  nz - maximum expected for any row
1720: -  nzz - number expected in each row

1722:   Note:
1723:   This routine is valid only for matrices stored in the MATMPIROWBS
1724:   format.
1725: @ */
1726: int MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1727: {
1728:   int ierr,(*f)(Mat,int,const int[]);

1731:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1732:   if (f) {
1733:     (*f)(mat,nz,nnz);
1734:   }
1735:   return(0);
1736: }

1738: /* --------------- extra BlockSolve-specific routines -------------- */
1741: /* @
1742:   MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1743:   user can then manipulate to alter the default parameters.

1745:   Input Parameter:
1746:   mat - matrix

1748:   Output Parameter:
1749:   procinfo - processor information context

1751:   Note:
1752:   This routine is valid only for matrices stored in the MATMPIROWBS
1753:   format.
1754: @ */
1755: int MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1756: {
1757:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1758:   PetscTruth   ismpirowbs;
1759:   int          ierr;

1762:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1763:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1764:   procinfo = a->procinfo;
1765:   return(0);
1766: }

1770: int MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1771: {
1772:   Mat_MPIRowbs *a;
1773:   BSspmat      *A;
1774:   BSsprow      **rs;
1775:   Mat          mat;
1776:   int          i,nz,ierr,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1777:   int          header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1778:   PetscScalar  *vals;
1779:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1780:   MPI_Status   status;

1783:   MPI_Comm_size(comm,&size);
1784:   MPI_Comm_rank(comm,&rank);
1785:   if (!rank) {
1786:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1787:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1788:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1789:     if (header[3] < 0) {
1790:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1791:     }
1792:   }

1794:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1795:   M = header[1];
1796:   /* determine ownership of all rows */
1797:   m          = M/size + ((M % size) > rank);
1798:   PetscMalloc((size+2)*sizeof(int),&rowners);
1799:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1800:   rowners[0] = 0;
1801:   for (i=2; i<=size; i++) {
1802:     rowners[i] += rowners[i-1];
1803:   }
1804:   rstart = rowners[rank];
1805:   rend   = rowners[rank+1];

1807:   /* distribute row lengths to all processors */
1808:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1809:   if (!rank) {
1810:     PetscMalloc(M*sizeof(int),&rowlengths);
1811:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1812:     PetscMalloc(size*sizeof(int),&sndcounts);
1813:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1814:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1815:     PetscFree(sndcounts);
1816:   } else {
1817:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1818:   }

1820:   /* create our matrix */
1821:   MatCreate(comm,m,m,M,M,newmat);
1822:   MatSetType(*newmat,type);
1823:   MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1824:   mat = *newmat;
1825:   PetscFree(ourlens);

1827:   a = (Mat_MPIRowbs*)mat->data;
1828:   A = a->A;
1829:   rs = A->rows;

1831:   if (!rank) {
1832:     /* calculate the number of nonzeros on each processor */
1833:     PetscMalloc(size*sizeof(int),&procsnz);
1834:     PetscMemzero(procsnz,size*sizeof(int));
1835:     for (i=0; i<size; i++) {
1836:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1837:         procsnz[i] += rowlengths[j];
1838:       }
1839:     }
1840:     PetscFree(rowlengths);

1842:     /* determine max buffer needed and allocate it */
1843:     maxnz = 0;
1844:     for (i=0; i<size; i++) {
1845:       maxnz = PetscMax(maxnz,procsnz[i]);
1846:     }
1847:     PetscMalloc(maxnz*sizeof(int),&cols);

1849:     /* read in my part of the matrix column indices  */
1850:     nz = procsnz[0];
1851:     PetscBinaryRead(fd,cols,nz,PETSC_INT);
1852: 
1853:     /* insert it into my part of matrix */
1854:     nz = 0;
1855:     for (i=0; i<A->num_rows; i++) {
1856:       for (j=0; j<a->imax[i]; j++) {
1857:         rs[i]->col[j] = cols[nz++];
1858:       }
1859:       rs[i]->length = a->imax[i];
1860:     }
1861:     /* read in parts for all other processors */
1862:     for (i=1; i<size; i++) {
1863:       nz   = procsnz[i];
1864:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1865:       MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1866:     }
1867:     PetscFree(cols);
1868:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1870:     /* read in my part of the matrix numerical values  */
1871:     nz   = procsnz[0];
1872:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1873: 
1874:     /* insert it into my part of matrix */
1875:     nz = 0;
1876:     for (i=0; i<A->num_rows; i++) {
1877:       for (j=0; j<a->imax[i]; j++) {
1878:         rs[i]->nz[j] = vals[nz++];
1879:       }
1880:     }
1881:     /* read in parts for all other processors */
1882:     for (i=1; i<size; i++) {
1883:       nz   = procsnz[i];
1884:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1885:       MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1886:     }
1887:     PetscFree(vals);
1888:     PetscFree(procsnz);
1889:   } else {
1890:     /* determine buffer space needed for message */
1891:     nz = 0;
1892:     for (i=0; i<A->num_rows; i++) {
1893:       nz += a->imax[i];
1894:     }
1895:     PetscMalloc(nz*sizeof(int),&cols);

1897:     /* receive message of column indices*/
1898:     MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1899:     MPI_Get_count(&status,MPI_INT,&maxnz);
1900:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1902:     /* insert it into my part of matrix */
1903:     nz = 0;
1904:     for (i=0; i<A->num_rows; i++) {
1905:       for (j=0; j<a->imax[i]; j++) {
1906:         rs[i]->col[j] = cols[nz++];
1907:       }
1908:       rs[i]->length = a->imax[i];
1909:     }
1910:     PetscFree(cols);
1911:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1913:     /* receive message of values*/
1914:     MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1915:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1916:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1918:     /* insert it into my part of matrix */
1919:     nz = 0;
1920:     for (i=0; i<A->num_rows; i++) {
1921:       for (j=0; j<a->imax[i]; j++) {
1922:         rs[i]->nz[j] = vals[nz++];
1923:       }
1924:       rs[i]->length = a->imax[i];
1925:     }
1926:     PetscFree(vals);
1927:   }
1928:   PetscFree(rowners);
1929:   a->nz = a->maxnz;
1930:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1931:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1932:   return(0);
1933: }

1935: /* 
1936:     Special destroy and view routines for factored matrices 
1937: */
1940: static int MatDestroy_MPIRowbs_Factored(Mat mat)
1941: {
1943: #if defined(PETSC_USE_LOG)
1944:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1945: #endif
1946:   return(0);
1947: }

1951: static int MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1952: {

1956:   MatView((Mat) mat->data,viewer);
1957:   return(0);
1958: }

1962: int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1963: {
1964:   /* Note:  f is not currently used in BlockSolve */
1965:   Mat          newmat;
1966:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1967:   int          ierr;
1968:   PetscTruth   idn;

1971:   if (isrow) {
1972:     ISIdentity(isrow,&idn);
1973:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1974:   }

1976:   if (!mat->symmetric) {
1977:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
1978:         preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
1979:         symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
1980:   }

1982:   /* If the icc_storage flag wasn't set before the last blocksolveassembly,          */
1983:   /* we must completely redo the assembly as a different storage format is required. */
1984:   if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
1985:     mat->same_nonzero       = PETSC_FALSE;
1986:     mbs->blocksolveassembly = 0;
1987:   }

1989:   if (!mbs->blocksolveassembly) {
1990:     BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1991:     BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1992:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1993:   }

1995:   /* Copy permuted matrix */
1996:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1997:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

1999:   /* Set up the communication for factorization */
2000:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2001:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2003:   /* 
2004:       Create a new Mat structure to hold the "factored" matrix, 
2005:     not this merely contains a pointer to the original matrix, since
2006:     the original matrix contains the factor information.
2007:   */
2008:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2009:   PetscLogObjectCreate(newmat);
2010:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2012:   newmat->data         = (void*)mat;
2013:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2014:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2015:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2016:   newmat->factor       = 1;
2017:   newmat->preallocated = PETSC_TRUE;
2018:   newmat->M            = mat->M;
2019:   newmat->N            = mat->N;
2020:   newmat->m            = mat->m;
2021:   newmat->n            = mat->n;
2022:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2024:   *newfact = newmat;
2025:   return(0);
2026: }

2030: int MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2031: {
2032:   Mat          newmat;
2033:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2034:   int          ierr;
2035:   PetscTruth   idn;

2038:   if (info->levels != 0) SETERRQ(1,"Blocksolve ILU only supports 0 fill");
2039:   if (isrow) {
2040:     ISIdentity(isrow,&idn);
2041:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2042:   }
2043:   if (iscol) {
2044:     ISIdentity(iscol,&idn);
2045:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2046:   }

2048:   if (!mbs->blocksolveassembly) {
2049:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2050:   }
2051: 
2052: /*   if (mat->symmetric) { */
2053: /*     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2054: /*         MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2055: /*         using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2056: /*   } */

2058:   /* Copy permuted matrix */
2059:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2060:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2062:   /* Set up the communication for factorization */
2063:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2064:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2066:   /* 
2067:       Create a new Mat structure to hold the "factored" matrix,
2068:     not this merely contains a pointer to the original matrix, since
2069:     the original matrix contains the factor information.
2070:   */
2071:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2072:   PetscLogObjectCreate(newmat);
2073:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2075:   newmat->data         = (void*)mat;
2076:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2077:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2078:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2079:   newmat->factor       = 1;
2080:   newmat->preallocated = PETSC_TRUE;
2081:   newmat->M            = mat->M;
2082:   newmat->N            = mat->N;
2083:   newmat->m            = mat->m;
2084:   newmat->n            = mat->n;
2085:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2087:   *newfact = newmat;
2088:   return(0);
2089: }

2093: int MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2094: {
2095:   int          ierr;

2100:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2101:   ISColoringCreate(mat->comm,mat->m,0,coloring);

2103:   return(0);
2104: }

2108: /*@C
2109:    MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2110:    format.  This format is intended primarily as an interface for BlockSolve95.

2112:    Collective on MPI_Comm

2114:    Input Parameters:
2115: +  comm - MPI communicator
2116: .  m - number of local rows (or PETSC_DECIDE to have calculated)
2117: .  M - number of global rows (or PETSC_DECIDE to have calculated)
2118: .  nz - number of nonzeros per row (same for all local rows)
2119: -  nnz - number of nonzeros per row (possibly different for each row).

2121:    Output Parameter:
2122: .  newA - the matrix 

2124:    Notes:
2125:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2126:    than it must be used on all processors that share the object for that argument.

2128:    The user MUST specify either the local or global matrix dimensions
2129:    (possibly both).

2131:    Specify the preallocated storage with either nz or nnz (not both).  Set 
2132:    nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2133:    allocation.

2135:    Notes:
2136:    By default, the matrix is assumed to be nonsymmetric; the user can
2137:    take advantage of special optimizations for symmetric matrices by calling
2138: $     MatSetOption(mat,MAT_SYMMETRIC)
2139: $     MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2140:    BEFORE calling the routine MatAssemblyBegin().

2142:    Internally, the MATMPIROWBS format inserts zero elements to the
2143:    matrix if necessary, so that nonsymmetric matrices are considered
2144:    to be symmetric in terms of their sparsity structure; this format
2145:    is required for use of the parallel communication routines within
2146:    BlockSolve95. In particular, if the matrix element A[i,j] exists,
2147:    then PETSc will internally allocate a 0 value for the element
2148:    A[j,i] during MatAssemblyEnd() if the user has not already set
2149:    a value for the matrix element A[j,i].

2151:    Options Database Keys:
2152: .  -mat_rowbs_no_inode - Do not use inodes.

2154:    Level: intermediate
2155:   
2156: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve

2158: .seealso: MatCreate(), MatSetValues()
2159: @*/
2160: int MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2161: {
2163: 
2165:   MatCreate(comm,m,m,M,M,newA);
2166:   MatSetType(*newA,MATMPIROWBS);
2167:   MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2168:   return(0);
2169: }


2172: /* -------------------------------------------------------------------------*/

2174:  #include src/mat/impls/aij/seq/aij.h
2175:  #include src/mat/impls/aij/mpi/mpiaij.h

2179: int MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2180: {
2181:   int         nmax,nstages_local,nstages,i,pos,max_no,ierr;


2185:   /* Allocate memory to hold all the submatrices */
2186:   if (scall != MAT_REUSE_MATRIX) {
2187:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
2188:   }
2189: 
2190:   /* Determine the number of stages through which submatrices are done */
2191:   nmax          = 20*1000000 / (C->N * sizeof(int));
2192:   if (!nmax) nmax = 1;
2193:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

2195:   /* Make sure every processor loops through the nstages */
2196:   MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);

2198:   for (i=0,pos=0; i<nstages; i++) {
2199:     if (pos+nmax <= ismax) max_no = nmax;
2200:     else if (pos == ismax) max_no = 0;
2201:     else                   max_no = ismax-pos;
2202:     MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2203:     pos += max_no;
2204:   }
2205:   return(0);
2206: }
2207: /* -------------------------------------------------------------------------*/
2208: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2209:    matrix and preservs zeroes from structural symetry
2210:  */
2213: int MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2214: {
2215:   Mat_MPIRowbs  *c = (Mat_MPIRowbs *)(C->data);
2216:   BSspmat       *A = c->A;
2217:   Mat_SeqAIJ    *mat;
2218:   int         **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2219:   int         **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,ierr,**rbuf1,row,proc;
2220:   int         nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2221:   int         **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2222:   int         **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2223:   int         len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2224:   int         *rmap_i,tag0,tag1,tag2,tag3;
2225:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2226:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2227:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2228:   MPI_Status  *r_status3,*r_status4,*s_status4;
2229:   MPI_Comm    comm;
2230:   FLOAT       **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2231:   PetscScalar *mat_a;
2232:   PetscTruth  sorted;
2233:   int         *onodes1,*olengths1;

2236:   comm   = C->comm;
2237:   tag0   = C->tag;
2238:   size   = c->size;
2239:   rank   = c->rank;
2240:   m      = C->M;
2241: 
2242:   /* Get some new tags to keep the communication clean */
2243:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2244:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2245:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2247:     /* Check if the col indices are sorted */
2248:   for (i=0; i<ismax; i++) {
2249:     ISSorted(isrow[i],&sorted);
2250:     if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2251:     ISSorted(iscol[i],&sorted);
2252:     /*    if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2253:   }

2255:   len    = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2256:   PetscMalloc(len,&irow);
2257:   icol   = irow + ismax;
2258:   nrow   = (int*)(icol + ismax);
2259:   ncol   = nrow + ismax;
2260:   rtable = ncol + ismax;

2262:   for (i=0; i<ismax; i++) {
2263:     ISGetIndices(isrow[i],&irow[i]);
2264:     ISGetIndices(iscol[i],&icol[i]);
2265:     ISGetLocalSize(isrow[i],&nrow[i]);
2266:     ISGetLocalSize(iscol[i],&ncol[i]);
2267:   }

2269:   /* Create hash table for the mapping :row -> proc*/
2270:   for (i=0,j=0; i<size; i++) {
2271:     jmax = c->rowners[i+1];
2272:     for (; j<jmax; j++) {
2273:       rtable[j] = i;
2274:     }
2275:   }

2277:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2278:      required. Based on this, buffers are allocated, and data copied into them*/
2279:   PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2280:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2281:   w3     = w2 + size;      /* no of IS that needs to be sent to proc i */
2282:   w4     = w3 + size;      /* temp work space used in determining w1, w2, w3 */
2283:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2284:   for (i=0; i<ismax; i++) {
2285:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2286:     jmax   = nrow[i];
2287:     irow_i = irow[i];
2288:     for (j=0; j<jmax; j++) {
2289:       row  = irow_i[j];
2290:       proc = rtable[row];
2291:       w4[proc]++;
2292:     }
2293:     for (j=0; j<size; j++) {
2294:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
2295:     }
2296:   }
2297: 
2298:   nrqs     = 0;              /* no of outgoing messages */
2299:   msz      = 0;              /* total mesg length (for all procs) */
2300:   w1[rank] = 0;              /* no mesg sent to self */
2301:   w3[rank] = 0;
2302:   for (i=0; i<size; i++) {
2303:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2304:   }
2305:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2306:   for (i=0,j=0; i<size; i++) {
2307:     if (w1[i]) { pa[j] = i; j++; }
2308:   }

2310:   /* Each message would have a header = 1 + 2*(no of IS) + data */
2311:   for (i=0; i<nrqs; i++) {
2312:     j     = pa[i];
2313:     w1[j] += w2[j] + 2* w3[j];
2314:     msz   += w1[j];
2315:   }

2317:   /* Determine the number of messages to expect, their lengths, from from-ids */
2318:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2319:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

2321:   /* Now post the Irecvs corresponding to these messages */
2322:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2323: 
2324:   PetscFree(onodes1);
2325:   PetscFree(olengths1);
2326: 
2327:   /* Allocate Memory for outgoing messages */
2328:   len      = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2329:   PetscMalloc(len,&sbuf1);
2330:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2331:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2332:   /* allocate memory for outgoing data + buf to receive the first reply */
2333:   tmp      = (int*)(ptr + size);
2334:   ctr      = tmp + 2*msz;

2336:   {
2337:     int *iptr = tmp,ict = 0;
2338:     for (i=0; i<nrqs; i++) {
2339:       j         = pa[i];
2340:       iptr     += ict;
2341:       sbuf1[j]  = iptr;
2342:       ict       = w1[j];
2343:     }
2344:   }

2346:   /* Form the outgoing messages */
2347:   /* Initialize the header space */
2348:   for (i=0; i<nrqs; i++) {
2349:     j           = pa[i];
2350:     sbuf1[j][0] = 0;
2351:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2352:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
2353:   }
2354: 
2355:   /* Parse the isrow and copy data into outbuf */
2356:   for (i=0; i<ismax; i++) {
2357:     PetscMemzero(ctr,size*sizeof(int));
2358:     irow_i = irow[i];
2359:     jmax   = nrow[i];
2360:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
2361:       row  = irow_i[j];
2362:       proc = rtable[row];
2363:       if (proc != rank) { /* copy to the outgoing buf*/
2364:         ctr[proc]++;
2365:         *ptr[proc] = row;
2366:         ptr[proc]++;
2367:       }
2368:     }
2369:     /* Update the headers for the current IS */
2370:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
2371:       if ((ctr_j = ctr[j])) {
2372:         sbuf1_j        = sbuf1[j];
2373:         k              = ++sbuf1_j[0];
2374:         sbuf1_j[2*k]   = ctr_j;
2375:         sbuf1_j[2*k-1] = i;
2376:       }
2377:     }
2378:   }

2380:   /*  Now  post the sends */
2381:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2382:   for (i=0; i<nrqs; ++i) {
2383:     j    = pa[i];
2384:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2385:   }

2387:   /* Post Receives to capture the buffer size */
2388:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2389:   PetscMalloc((nrqs+1)*sizeof(int *),&rbuf2);
2390:   rbuf2[0] = tmp + msz;
2391:   for (i=1; i<nrqs; ++i) {
2392:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2393:   }
2394:   for (i=0; i<nrqs; ++i) {
2395:     j    = pa[i];
2396:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2397:   }

2399:   /* Send to other procs the buf size they should allocate */
2400: 

2402:   /* Receive messages*/
2403:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2404:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2405:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2406:   PetscMalloc(len,&sbuf2);
2407:   req_size    = (int*)(sbuf2 + nrqr);
2408:   req_source  = req_size + nrqr;
2409: 
2410:   {
2411:     BSsprow    **sAi = A->rows;
2412:     int        id,rstart = c->rstart;
2413:     int        *sbuf2_i;

2415:     for (i=0; i<nrqr; ++i) {
2416:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2417:       req_size[idx]   = 0;
2418:       rbuf1_i         = rbuf1[idx];
2419:       start           = 2*rbuf1_i[0] + 1;
2420:       MPI_Get_count(r_status1+i,MPI_INT,&end);
2421:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2422:       sbuf2_i         = sbuf2[idx];
2423:       for (j=start; j<end; j++) {
2424:         id               = rbuf1_i[j] - rstart;
2425:         ncols            = (sAi[id])->length;
2426:         sbuf2_i[j]       = ncols;
2427:         req_size[idx]   += ncols;
2428:       }
2429:       req_source[idx] = r_status1[i].MPI_SOURCE;
2430:       /* form the header */
2431:       sbuf2_i[0]   = req_size[idx];
2432:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2433:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2434:     }
2435:   }
2436:   PetscFree(r_status1);
2437:   PetscFree(r_waits1);

2439:   /*  recv buffer sizes */
2440:   /* Receive messages*/
2441: 
2442:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2443:   PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2444:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2445:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2446:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

2448:   for (i=0; i<nrqs; ++i) {
2449:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2450:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2451:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2452:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2453:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2454:   }
2455:   PetscFree(r_status2);
2456:   PetscFree(r_waits2);
2457: 
2458:   /* Wait on sends1 and sends2 */
2459:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2460:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

2462:   MPI_Waitall(nrqs,s_waits1,s_status1);
2463:   MPI_Waitall(nrqr,s_waits2,s_status2);
2464:   PetscFree(s_status1);
2465:   PetscFree(s_status2);
2466:   PetscFree(s_waits1);
2467:   PetscFree(s_waits2);

2469:   /* Now allocate buffers for a->j, and send them off */
2470:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2471:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2472:   PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2473:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2474: 
2475:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2476:   {
2477:     BSsprow *brow;
2478:     int *Acol;
2479:     int rstart = c->rstart;

2481:     for (i=0; i<nrqr; i++) {
2482:       rbuf1_i   = rbuf1[i];
2483:       sbuf_aj_i = sbuf_aj[i];
2484:       ct1       = 2*rbuf1_i[0] + 1;
2485:       ct2       = 0;
2486:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2487:         kmax = rbuf1[i][2*j];
2488:         for (k=0; k<kmax; k++,ct1++) {
2489:           brow   = A->rows[rbuf1_i[ct1] - rstart];
2490:           ncols  = brow->length;
2491:           Acol   = brow->col;
2492:           /* load the column indices for this row into cols*/
2493:           cols  = sbuf_aj_i + ct2;
2494:           PetscMemcpy(cols,Acol,ncols*sizeof(int));
2495:           /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2496:                                                           mappings?? */
2497:           ct2 += ncols;
2498:         }
2499:       }
2500:       MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2501:     }
2502:   }
2503:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2504:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

2506:   /* Allocate buffers for a->a, and send them off */
2507:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2508:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2509:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2510:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2511: 
2512:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2513:   {
2514:     BSsprow *brow;
2515:     FLOAT *Aval;
2516:     int rstart = c->rstart;
2517: 
2518:     for (i=0; i<nrqr; i++) {
2519:       rbuf1_i   = rbuf1[i];
2520:       sbuf_aa_i = sbuf_aa[i];
2521:       ct1       = 2*rbuf1_i[0]+1;
2522:       ct2       = 0;
2523:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2524:         kmax = rbuf1_i[2*j];
2525:         for (k=0; k<kmax; k++,ct1++) {
2526:           brow  = A->rows[rbuf1_i[ct1] - rstart];
2527:           ncols = brow->length;
2528:           Aval  = brow->nz;
2529:           /* load the column values for this row into vals*/
2530:           vals  = sbuf_aa_i+ct2;
2531:           PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2532:           ct2 += ncols;
2533:         }
2534:       }
2535:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2536:     }
2537:   }
2538:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2539:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2540:   PetscFree(rbuf1);

2542:   /* Form the matrix */
2543:   /* create col map */
2544:   {
2545:     int *icol_i;
2546: 
2547:     len     = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2548:     PetscMalloc(len,&cmap);
2549:     cmap[0] = (int *)(cmap + ismax);
2550:     PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2551:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2552:     for (i=0; i<ismax; i++) {
2553:       jmax   = ncol[i];
2554:       icol_i = icol[i];
2555:       cmap_i = cmap[i];
2556:       for (j=0; j<jmax; j++) {
2557:         cmap_i[icol_i[j]] = j+1;
2558:       }
2559:     }
2560:   }

2562:   /* Create lens which is required for MatCreate... */
2563:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2564:   len     = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2565:   PetscMalloc(len,&lens);
2566:   lens[0] = (int *)(lens + ismax);
2567:   PetscMemzero(lens[0],j*sizeof(int));
2568:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2569: 
2570:   /* Update lens from local data */
2571:   { BSsprow *Arow;
2572:     for (i=0; i<ismax; i++) {
2573:       jmax   = nrow[i];
2574:       cmap_i = cmap[i];
2575:       irow_i = irow[i];
2576:       lens_i = lens[i];
2577:       for (j=0; j<jmax; j++) {
2578:         row  = irow_i[j];
2579:         proc = rtable[row];
2580:         if (proc == rank) {
2581:           Arow=A->rows[row-c->rstart];
2582:           ncols=Arow->length;
2583:           cols=Arow->col;
2584:           for (k=0; k<ncols; k++) {
2585:             if (cmap_i[cols[k]]) { lens_i[j]++;}
2586:           }
2587:         }
2588:       }
2589:     }
2590:   }
2591: 
2592:   /* Create row map*/
2593:   len     = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2594:   PetscMalloc(len,&rmap);
2595:   rmap[0] = (int *)(rmap + ismax);
2596:   PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2597:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2598:   for (i=0; i<ismax; i++) {
2599:     rmap_i = rmap[i];
2600:     irow_i = irow[i];
2601:     jmax   = nrow[i];
2602:     for (j=0; j<jmax; j++) {
2603:       rmap_i[irow_i[j]] = j;
2604:     }
2605:   }
2606: 
2607:   /* Update lens from offproc data */
2608:   {
2609:     int *rbuf2_i,*rbuf3_i,*sbuf1_i;

2611:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2612:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2613:       idx     = pa[i];
2614:       sbuf1_i = sbuf1[idx];
2615:       jmax    = sbuf1_i[0];
2616:       ct1     = 2*jmax+1;
2617:       ct2     = 0;
2618:       rbuf2_i = rbuf2[i];
2619:       rbuf3_i = rbuf3[i];
2620:       for (j=1; j<=jmax; j++) {
2621:         is_no   = sbuf1_i[2*j-1];
2622:         max1    = sbuf1_i[2*j];
2623:         lens_i  = lens[is_no];
2624:         cmap_i  = cmap[is_no];
2625:         rmap_i  = rmap[is_no];
2626:         for (k=0; k<max1; k++,ct1++) {
2627:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2628:           max2 = rbuf2_i[ct1];
2629:           for (l=0; l<max2; l++,ct2++) {
2630:             if (cmap_i[rbuf3_i[ct2]]) {
2631:               lens_i[row]++;
2632:             }
2633:           }
2634:         }
2635:       }
2636:     }
2637:   }
2638:   PetscFree(r_status3);
2639:   PetscFree(r_waits3);
2640:   MPI_Waitall(nrqr,s_waits3,s_status3);
2641:   PetscFree(s_status3);
2642:   PetscFree(s_waits3);

2644:   /* Create the submatrices */
2645:   if (scall == MAT_REUSE_MATRIX) {
2646:     PetscTruth same;
2647: 
2648:     /*
2649:         Assumes new rows are same length as the old rows,hence bug!
2650:     */
2651:     for (i=0; i<ismax; i++) {
2652:       PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2653:       if (same == PETSC_FALSE) {
2654:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2655:       }
2656:       mat = (Mat_SeqAIJ*)(submats[i]->data);
2657:       if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2658:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2659:       }
2660:       PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2661:       if (same == PETSC_FALSE) {
2662:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2663:       }
2664:       /* Initial matrix as if empty */
2665:       PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2666:       submats[i]->factor = C->factor;
2667:     }
2668:   } else {
2669:     for (i=0; i<ismax; i++) {
2670:       /* Here we want to explicitly generate SeqAIJ matrices */
2671:       MatCreate(PETSC_COMM_SELF,nrow[i],ncol[i],nrow[i],ncol[i],submats+i);
2672:       MatSetType(submats[i],MATSEQAIJ);
2673:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2674:     }
2675:   }

2677:   /* Assemble the matrices */
2678:   /* First assemble the local rows */
2679:   {
2680:     int    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2681:     PetscScalar *imat_a;
2682:     BSsprow *Arow;
2683: 
2684:     for (i=0; i<ismax; i++) {
2685:       mat       = (Mat_SeqAIJ*)submats[i]->data;
2686:       imat_ilen = mat->ilen;
2687:       imat_j    = mat->j;
2688:       imat_i    = mat->i;
2689:       imat_a    = mat->a;
2690:       cmap_i    = cmap[i];
2691:       rmap_i    = rmap[i];
2692:       irow_i    = irow[i];
2693:       jmax      = nrow[i];
2694:       for (j=0; j<jmax; j++) {
2695:         row      = irow_i[j];
2696:         proc     = rtable[row];
2697:         if (proc == rank) {
2698:           old_row  = row;
2699:           row      = rmap_i[row];
2700:           ilen_row = imat_ilen[row];
2701: 
2702:           Arow=A->rows[old_row-c->rstart];
2703:           ncols=Arow->length;
2704:           cols=Arow->col;
2705:           vals=Arow->nz;
2706: 
2707:           mat_i    = imat_i[row];
2708:           mat_a    = imat_a + mat_i;
2709:           mat_j    = imat_j + mat_i;
2710:           for (k=0; k<ncols; k++) {
2711:             if ((tcol = cmap_i[cols[k]])) {
2712:               *mat_j++ = tcol - 1;
2713:               *mat_a++ = (PetscScalar)vals[k];
2714:               ilen_row++;
2715:             }
2716:           }
2717:           imat_ilen[row] = ilen_row;
2718:         }
2719:       }
2720:     }
2721:   }

2723:   /*   Now assemble the off proc rows*/
2724:   {
2725:     int    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2726:     int    *imat_j,*imat_i;
2727:     PetscScalar *imat_a;
2728:     FLOAT *rbuf4_i;
2729: 
2730:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2731:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2732:       idx     = pa[i];
2733:       sbuf1_i = sbuf1[idx];
2734:       jmax    = sbuf1_i[0];
2735:       ct1     = 2*jmax + 1;
2736:       ct2     = 0;
2737:       rbuf2_i = rbuf2[i];
2738:       rbuf3_i = rbuf3[i];
2739:       rbuf4_i = rbuf4[i];
2740:       for (j=1; j<=jmax; j++) {
2741:         is_no     = sbuf1_i[2*j-1];
2742:         rmap_i    = rmap[is_no];
2743:         cmap_i    = cmap[is_no];
2744:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
2745:         imat_ilen = mat->ilen;
2746:         imat_j    = mat->j;
2747:         imat_i    = mat->i;
2748:         imat_a    = mat->a;
2749:         max1      = sbuf1_i[2*j];
2750:         for (k=0; k<max1; k++,ct1++) {
2751:           row   = sbuf1_i[ct1];
2752:           row   = rmap_i[row];
2753:           ilen  = imat_ilen[row];
2754:           mat_i = imat_i[row];
2755:           mat_a = imat_a + mat_i;
2756:           mat_j = imat_j + mat_i;
2757:           max2 = rbuf2_i[ct1];
2758:           for (l=0; l<max2; l++,ct2++) {
2759:             if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2760:               *mat_j++ = tcol - 1;
2761:               *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2762:               ilen++;
2763:             }
2764:           }
2765:           imat_ilen[row] = ilen;
2766:         }
2767:       }
2768:     }
2769:   }
2770:   PetscFree(r_status4);
2771:   PetscFree(r_waits4);
2772:   MPI_Waitall(nrqr,s_waits4,s_status4);
2773:   PetscFree(s_waits4);
2774:   PetscFree(s_status4);

2776:   /* Restore the indices */
2777:   for (i=0; i<ismax; i++) {
2778:     ISRestoreIndices(isrow[i],irow+i);
2779:     ISRestoreIndices(iscol[i],icol+i);
2780:   }

2782:   /* Destroy allocated memory */
2783:   PetscFree(irow);
2784:   PetscFree(w1);
2785:   PetscFree(pa);

2787:   PetscFree(sbuf1);
2788:   PetscFree(rbuf2);
2789:   for (i=0; i<nrqr; ++i) {
2790:     PetscFree(sbuf2[i]);
2791:   }
2792:   for (i=0; i<nrqs; ++i) {
2793:     PetscFree(rbuf3[i]);
2794:     PetscFree(rbuf4[i]);
2795:   }

2797:   PetscFree(sbuf2);
2798:   PetscFree(rbuf3);
2799:   PetscFree(rbuf4);
2800:   PetscFree(sbuf_aj[0]);
2801:   PetscFree(sbuf_aj);
2802:   PetscFree(sbuf_aa[0]);
2803:   PetscFree(sbuf_aa);
2804: 
2805:   PetscFree(cmap);
2806:   PetscFree(rmap);
2807:   PetscFree(lens);

2809:   for (i=0; i<ismax; i++) {
2810:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2811:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2812:   }
2813:   return(0);
2814: }

2816: /*
2817:   can be optimized by send only non-zeroes in iscol IS  -
2818:   so prebuild submatrix on sending side including A,B partitioning
2819:   */
2822:  #include src/vec/is/impls/general/general.h
2823: int MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2824: {
2825:   Mat_MPIRowbs  *c = (Mat_MPIRowbs*)C->data;
2826:   BSspmat       *A = c->A;
2827:   BSsprow *Arow;
2828:   Mat_SeqAIJ    *matA,*matB; /* on prac , off proc part of submat */
2829:   Mat_MPIAIJ    *mat;  /* submat->data */
2830:   int    *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2831:   int    *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2832:   int    i,j,k,l,ierr,len,jmax,proc,idx;
2833:   int    **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2834:   FLOAT  **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */

2836:   int    *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2837:   int    *req_source;
2838:   int    ncols_t;
2839: 
2840: 
2841:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2842:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2843: 
2844:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2845:   MPI_Status  *r_status3,*r_status4,*s_status4;
2846:   MPI_Comm    comm;


2850:   comm   = C->comm;
2851:   tag0   = C->tag;
2852:   size   = c->size;
2853:   rank   = c->rank;

2855:   if (size==1) {
2856:     if (scall == MAT_REUSE_MATRIX) {
2857:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2858:       return(0);
2859:     } else {
2860:       Mat *newsubmat;
2861: 
2862:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2863:       *submat=*newsubmat;
2864:       ierr=PetscFree(newsubmat);
2865:       return(0);
2866:     }
2867:   }
2868: 
2869:   /* Get some new tags to keep the communication clean */
2870:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2871:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2872:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2874:   /* Check if the col indices are sorted */
2875:   {PetscTruth sorted;
2876:   ISSorted(isrow,&sorted);
2877:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2878:   ISSorted(iscol,&sorted);
2879:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2880:   }
2881: 
2882:   ISGetIndices(isrow,&irow);
2883:   ISGetIndices(iscol,&icol);
2884:   ISGetLocalSize(isrow,&nrow);
2885:   ISGetLocalSize(iscol,&ncol);
2886: 
2887:   if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2888:   if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2889: 
2890: 
2891:   len    = (C->M+1)*sizeof(int);
2892:   PetscMalloc(len,&rtable);
2893:   /* Create hash table for the mapping :row -> proc*/
2894:   for (i=0,j=0; i<size; i++) {
2895:     jmax = c->rowners[i+1];
2896:     for (; j<jmax; j++) {
2897:       rtable[j] = i;
2898:     }
2899:   }

2901:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2902:      required. Based on this, buffers are allocated, and data copied into them*/
2903:   PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2904:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2905:   PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2906:   for (j=0; j<nrow; j++) {
2907:     row_t  = irow[j];
2908:     proc   = rtable[row_t];
2909:     w1[proc]++;
2910:   }
2911:   nrqs     = 0;              /* no of outgoing messages */
2912:   msz      = 0;              /* total mesg length (for all procs) */
2913:   w1[rank] = 0;              /* no mesg sent to self */
2914:   for (i=0; i<size; i++) {
2915:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2916:   }
2917: 
2918:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2919:   for (i=0,j=0; i<size; i++) {
2920:     if (w1[i]) {
2921:       pa[j++] = i;
2922:       w1[i]++;  /* header for return data */
2923:       msz+=w1[i];
2924:     }
2925:   }
2926: 
2927:   {int  *onodes1,*olengths1;
2928:   /* Determine the number of messages to expect, their lengths, from from-ids */
2929:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2930:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2931:   /* Now post the Irecvs corresponding to these messages */
2932:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2933:   PetscFree(onodes1);
2934:   PetscFree(olengths1);
2935:   }
2936: 
2937: { int **ptr,*iptr,*tmp;
2938:   /* Allocate Memory for outgoing messages */
2939:   len      = 2*size*sizeof(int*) + msz*sizeof(int);
2940:   PetscMalloc(len,&sbuf1);
2941:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2942:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2943:   /* allocate memory for outgoing data + buf to receive the first reply */
2944:   tmp      = (int*)(ptr + size);

2946:   for (i=0,iptr=tmp; i<nrqs; i++) {
2947:     j         = pa[i];
2948:     sbuf1[j]  = iptr;
2949:     iptr     += w1[j];
2950:   }

2952:   /* Form the outgoing messages */
2953:   for (i=0; i<nrqs; i++) {
2954:     j           = pa[i];
2955:     sbuf1[j][0] = 0;   /*header */
2956:     ptr[j]      = sbuf1[j] + 1;
2957:   }
2958: 
2959:   /* Parse the isrow and copy data into outbuf */
2960:   for (j=0; j<nrow; j++) {
2961:     row_t  = irow[j];
2962:     proc = rtable[row_t];
2963:     if (proc != rank) { /* copy to the outgoing buf*/
2964:       sbuf1[proc][0]++;
2965:       *ptr[proc] = row_t;
2966:       ptr[proc]++;
2967:     }
2968:   }
2969: } /* block */

2971:   /*  Now  post the sends */
2972: 
2973:   /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
2974:    * of requested rows)*/

2976:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2977:   for (i=0; i<nrqs; ++i) {
2978:     j    = pa[i];
2979:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2980:   }

2982:   /* Post Receives to capture the buffer size */
2983:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2984:   PetscMalloc((nrqs+1)*sizeof(int *),&rbuf2);
2985:   PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
2986:   for (i=1; i<nrqs; ++i) {
2987:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2988:   }
2989:   for (i=0; i<nrqs; ++i) {
2990:     j    = pa[i];
2991:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2992:   }

2994:   /* Send to other procs the buf size they should allocate */
2995:   /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
2996:    * (row sizes) */

2998:   /* Receive messages*/
2999:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3000:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3001:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3002:   PetscMalloc(len,&sbuf2);
3003:   req_size    = (int*)(sbuf2 + nrqr);
3004:   req_source  = req_size + nrqr;
3005: 
3006:   {
3007:     BSsprow    **sAi = A->rows;
3008:     int        id,rstart = c->rstart;
3009:     int        *sbuf2_i,*rbuf1_i,end;

3011:     for (i=0; i<nrqr; ++i) {
3012:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3013:       req_size[idx]   = 0;
3014:       rbuf1_i         = rbuf1[idx];
3015:       MPI_Get_count(r_status1+i,MPI_INT,&end);
3016:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3017:       sbuf2_i         = sbuf2[idx];
3018:       for (j=1; j<end; j++) {
3019:         id               = rbuf1_i[j] - rstart;
3020:         ncols_t          = (sAi[id])->length;
3021:         sbuf2_i[j]       = ncols_t;
3022:         req_size[idx]   += ncols_t;
3023:       }
3024:       req_source[idx] = r_status1[i].MPI_SOURCE;
3025:       /* form the header */
3026:       sbuf2_i[0]   = req_size[idx];
3027:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3028:     }
3029:   }
3030:   PetscFree(r_status1);
3031:   PetscFree(r_waits1);

3033:   /*  recv buffer sizes */
3034:   /* Receive messages*/
3035: 
3036:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3037:   PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3038:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3039:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3040:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

3042:   for (i=0; i<nrqs; ++i) {
3043:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3044:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3045:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3046:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3047:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3048:   }
3049:   PetscFree(r_status2);
3050:   PetscFree(r_waits2);
3051: 
3052:   /* Wait on sends1 and sends2 */
3053:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3054:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

3056:   MPI_Waitall(nrqs,s_waits1,s_status1);
3057:   MPI_Waitall(nrqr,s_waits2,s_status2);
3058:   PetscFree(s_status1);
3059:   PetscFree(s_status2);
3060:   PetscFree(s_waits1);
3061:   PetscFree(s_waits2);

3063:   /* Now allocate buffers for a->j, and send them off */
3064:   /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3065:    * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3066: 
3067:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3068:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3069:   PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3070:   for (i=1; i<nrqr; i++)  sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3071: 
3072:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3073:   {
3074:     int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3075:     int rstart = c->rstart;

3077:     for (i=0; i<nrqr; i++) {
3078:       rbuf1_i   = rbuf1[i];
3079:       sbuf3_i   = sbuf3[i];
3080:       noutcols  = 0;
3081:       kmax = rbuf1_i[0];  /* num. of req. rows */
3082:       for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3083:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3084:         ncols  = Arow->length;
3085:         Acol   = Arow->col;
3086:         /* load the column indices for this row into cols*/
3087:         cols  = sbuf3_i + noutcols;
3088:         PetscMemcpy(cols,Acol,ncols*sizeof(int));
3089:         /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3090:         noutcols += ncols;
3091:       }
3092:       MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3093:     }
3094:   }
3095:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3096:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

3098:   /* Allocate buffers for a->a, and send them off */
3099:   /* can be optimized by conect with previous block */
3100:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3101:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3102:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3103:   for (i=1; i<nrqr; i++)  sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3104: 
3105:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3106:   {
3107:     FLOAT *Aval,*vals,*sbuf4_i;
3108:     int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3109: 
3110: 
3111:     for (i=0; i<nrqr; i++) {
3112:       rbuf1_i   = rbuf1[i];
3113:       sbuf4_i   = sbuf4[i];
3114:       rqrow     = 1;
3115:       noutvals  = 0;
3116:       kmax      = rbuf1_i[0];  /* num of req. rows */
3117:       for (k=0; k<kmax; k++,rqrow++) {
3118:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3119:         ncols  = Arow->length;
3120:         Aval = Arow->nz;
3121:         /* load the column values for this row into vals*/
3122:         vals  = sbuf4_i+noutvals;
3123:         PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3124:         noutvals += ncols;
3125:       }
3126:       MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3127:     }
3128:   }
3129:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3130:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3131:   PetscFree(rbuf1);

3133:   /* Form the matrix */

3135:   /* create col map */
3136:   len     = C->N*sizeof(int)+1;
3137:   PetscMalloc(len,&cmap);
3138:   PetscMemzero(cmap,C->N*sizeof(int));
3139:   for (j=0; j<ncol; j++) {
3140:       cmap[icol[j]] = j+1;
3141:   }
3142: 
3143:   /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3144:   len     = C->M*sizeof(int)+1;
3145:   PetscMalloc(len,&rmap);
3146:   PetscMemzero(rmap,C->M*sizeof(int));
3147:   for (j=0; j<nrow; j++) {
3148:     rmap[irow[j]] = j;
3149:   }

3151:   /*
3152:      Determine the number of non-zeros in the diagonal and off-diagonal 
3153:      portions of the matrix in order to do correct preallocation
3154:    */

3156:   /* first get start and end of "diagonal" columns */
3157:   if (csize == PETSC_DECIDE) {
3158:     nlocal = ncol/size + ((ncol % size) > rank);
3159:   } else {
3160:     nlocal = csize;
3161:   }
3162:   {
3163:     int ncols,*cols,olen,dlen,thecol;
3164:     int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3165: 
3166:     MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3167:     cstart = cend - nlocal;
3168:     if (rank == size - 1 && cend != ncol) {
3169:       SETERRQ(1,"Local column sizes do not add up to total number of columns");
3170:     }

3172:     PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3173:     o_nz = d_nz + nrow;
3174: 
3175:     /* Update lens from local data */
3176:     for (j=0; j<nrow; j++) {
3177:       row  = irow[j];
3178:       proc = rtable[row];
3179:       if (proc == rank) {
3180:         Arow=A->rows[row-c->rstart];
3181:         ncols=Arow->length;
3182:         cols=Arow->col;
3183:         olen=dlen=0;
3184:         for (k=0; k<ncols; k++) {
3185:           if ((thecol=cmap[cols[k]])) {
3186:             if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3187:             else olen++;
3188:           }
3189:         }
3190:         o_nz[j]=olen;
3191:         d_nz[j]=dlen;
3192:       } else d_nz[j]=o_nz[j]=0;
3193:     }
3194:     /* Update lens from offproc data and done waits */
3195:     /* this will be much simplier after sending only appropriate columns */
3196:     for (j=0; j<nrqs;j++) {
3197:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3198:       proc   = pa[i];
3199:       sbuf1_i = sbuf1[proc];
3200:       cidx    = 0;
3201:       rbuf2_i = rbuf2[i];
3202:       rbuf3_i = rbuf3[i];
3203:       kmax    = sbuf1_i[0]; /*num of rq. rows*/
3204:       for (k=1; k<=kmax; k++) {
3205:         row  = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3206:         for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3207:           if ((thecol=cmap[rbuf3_i[cidx]])) {
3208: 
3209:             if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3210:             else o_nz[row]++;
3211:           }
3212:         }
3213:       }
3214:     }
3215:   }
3216:   PetscFree(r_status3);
3217:   PetscFree(r_waits3);
3218:   MPI_Waitall(nrqr,s_waits3,s_status3);
3219:   PetscFree(s_status3);
3220:   PetscFree(s_waits3);

3222:   if (scall ==  MAT_INITIAL_MATRIX) {
3223:     MatCreate(comm,nrow,nlocal,PETSC_DECIDE,ncol,submat);
3224:     MatSetType(*submat,C->type_name);
3225:     MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3226:     mat=(Mat_MPIAIJ *)((*submat)->data);
3227:     matA=(Mat_SeqAIJ *)(mat->A->data);
3228:     matB=(Mat_SeqAIJ *)(mat->B->data);
3229: 
3230:   } else {
3231:     PetscTruth same;
3232:     /* folowing code can be optionaly dropped for debuged versions of users
3233:      * program, but I don't know PETSc option which can switch off such safety
3234:      * tests - in a same way counting of o_nz,d_nz can be droped for  REUSE
3235:      * matrix */
3236: 
3237:     PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3238:     if (same == PETSC_FALSE) {
3239:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3240:     }
3241:     if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3242:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3243:     }
3244:     mat=(Mat_MPIAIJ *)((*submat)->data);
3245:     matA=(Mat_SeqAIJ *)(mat->A->data);
3246:     matB=(Mat_SeqAIJ *)(mat->B->data);
3247:     PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3248:     if (same == PETSC_FALSE) {
3249:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3250:     }
3251:     PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3252:     if (same == PETSC_FALSE) {
3253:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3254:     }
3255:   /* Initial matrix as if empty */
3256:     PetscMemzero(matA->ilen,nrow*sizeof(int));
3257:     PetscMemzero(matB->ilen,nrow*sizeof(int));
3258:     /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3259:      * delete all possibly nonactual inforamtion */
3260:     /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3261:      * copy some factor information - where are thay */
3262:     (*submat)->was_assembled=PETSC_FALSE;
3263:     (*submat)->assembled=PETSC_FALSE;
3264: 
3265:   }
3266:   PetscFree(d_nz);

3268:   /* Assemble the matrix */
3269:   /* First assemble from local rows */
3270:   {
3271:     int    i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3272:     FLOAT  *vals;
3273:     PetscScalar *matA_a,*matB_a;
3274: 
3275:     for (j=0; j<nrow; j++) {
3276:       oldrow = irow[j];
3277:       proc   = rtable[oldrow];
3278:       if (proc == rank) {
3279:         row  = rmap[oldrow];
3280: 
3281:         Arow  = A->rows[oldrow-c->rstart];
3282:         ncols = Arow->length;
3283:         cols  = Arow->col;
3284:         vals  = Arow->nz;
3285: 
3286:         i_row   = matA->i[row];
3287:         matA_a = matA->a + i_row;
3288:         matA_j = matA->j + i_row;
3289:         i_row   = matB->i[row];
3290:         matB_a = matB->a + i_row;
3291:         matB_j = matB->j + i_row;
3292:         for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3293:           if ((tcol = cmap[cols[k]])) {
3294:             if (tcol<=cstart) {
3295:               *matB_j++ = tcol-1;
3296:               *matB_a++ = vals[k];
3297:               ilenB++;
3298:             } else if (tcol<=cend) {
3299:               *matA_j++ = (tcol-1)-cstart;
3300:               *matA_a++ = (PetscScalar)(vals[k]);
3301:               ilenA++;
3302:             } else {
3303:               *matB_j++ = tcol-1;
3304:               *matB_a++ = vals[k];
3305:               ilenB++;
3306:             }
3307:           }
3308:         }
3309:         matA->ilen[row]=ilenA;
3310:         matB->ilen[row]=ilenB;
3311: 
3312:       }
3313:     }
3314:   }

3316:   /*   Now assemble the off proc rows*/
3317:   {
3318:     int  *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3319:     int  *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3320:     PetscScalar *matA_a,*matB_a;
3321:     FLOAT *rbuf4_i;

3323:     for (j=0; j<nrqs; j++) {
3324:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3325:       proc   = pa[i];
3326:       sbuf1_i = sbuf1[proc];
3327: 
3328:       cidx    = 0;
3329:       rbuf2_i = rbuf2[i];
3330:       rbuf3_i = rbuf3[i];
3331:       rbuf4_i = rbuf4[i];
3332:       kmax    = sbuf1_i[0];
3333:       for (k=1; k<=kmax; k++) {
3334:         row = rmap[sbuf1_i[k]];
3335: 
3336:         i_row  = matA->i[row];
3337:         matA_a = matA->a + i_row;
3338:         matA_j = matA->j + i_row;
3339:         i_row  = matB->i[row];
3340:         matB_a = matB->a + i_row;
3341:         matB_j = matB->j + i_row;
3342: 
3343:         lmax = rbuf2_i[k];
3344:         for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3345:           if ((tcol = cmap[rbuf3_i[cidx]])) {
3346:             if (tcol<=cstart) {
3347:               *matB_j++ = tcol-1;
3348:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3349:               ilenB++;
3350:             } else if (tcol<=cend) {
3351:               *matA_j++ = (tcol-1)-cstart;
3352:               *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3353:               ilenA++;
3354:             } else {
3355:               *matB_j++ = tcol-1;
3356:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3357:               ilenB++;
3358:             }
3359:           }
3360:         }
3361:         matA->ilen[row]=ilenA;
3362:         matB->ilen[row]=ilenB;
3363:       }
3364:     }
3365:   }

3367:   PetscFree(r_status4);
3368:   PetscFree(r_waits4);
3369:   MPI_Waitall(nrqr,s_waits4,s_status4);
3370:   PetscFree(s_waits4);
3371:   PetscFree(s_status4);

3373:   /* Restore the indices */
3374:   ISRestoreIndices(isrow,&irow);
3375:   ISRestoreIndices(iscol,&icol);

3377:   /* Destroy allocated memory */
3378:   PetscFree(rtable);
3379:   PetscFree(w1);
3380:   PetscFree(pa);

3382:   PetscFree(sbuf1);
3383:   PetscFree(rbuf2[0]);
3384:   PetscFree(rbuf2);
3385:   for (i=0; i<nrqr; ++i) {
3386:     PetscFree(sbuf2[i]);
3387:   }
3388:   for (i=0; i<nrqs; ++i) {
3389:     PetscFree(rbuf3[i]);
3390:     PetscFree(rbuf4[i]);
3391:   }

3393:   PetscFree(sbuf2);
3394:   PetscFree(rbuf3);
3395:   PetscFree(rbuf4);
3396:   PetscFree(sbuf3[0]);
3397:   PetscFree(sbuf3);
3398:   PetscFree(sbuf4[0]);
3399:   PetscFree(sbuf4);
3400: 
3401:   PetscFree(cmap);
3402:   PetscFree(rmap);


3405:   MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3406:   MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);


3409:   return(0);
3410: }