Actual source code: mpispooles.c

  1: /*$Id: mpispooles.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
  2: /* 
  3:    Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
  4: */

 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h
 8:  #include src/mat/impls/baij/seq/baij.h
 9:  #include src/mat/impls/aij/mpi/mpiaij.h
 10:  #include src/mat/impls/sbaij/mpi/mpisbaij.h
 11:  #include src/mat/impls/aij/seq/spooles/spooles.h

 13: extern int SetSpoolesOptions(Mat, Spooles_options *);

 17: int MatDestroy_MPIAIJSpooles(Mat A)
 18: {
 19:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 20:   int           ierr;
 21: 
 23:   if (lu->CleanUpSpooles) {
 24:     FrontMtx_free(lu->frontmtx) ;
 25:     IV_free(lu->newToOldIV) ;
 26:     IV_free(lu->oldToNewIV) ;
 27:     IV_free(lu->vtxmapIV) ;
 28:     InpMtx_free(lu->mtxA) ;
 29:     ETree_free(lu->frontETree) ;
 30:     IVL_free(lu->symbfacIVL) ;
 31:     SubMtxManager_free(lu->mtxmanager) ;
 32:     DenseMtx_free(lu->mtxX) ;
 33:     DenseMtx_free(lu->mtxY) ;
 34:     MPI_Comm_free(&(lu->comm_spooles));
 35:     if ( lu->scat ){
 36:       VecDestroy(lu->vec_spooles);
 37:       ISDestroy(lu->iden);
 38:       ISDestroy(lu->is_petsc);
 39:       VecScatterDestroy(lu->scat);
 40:     }
 41:   }
 42:   MatConvert_Spooles_Base(A,lu->basetype,&A);
 43:   (*A->ops->destroy)(A);

 45:   return(0);
 46: }

 50: int MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
 51: {
 52:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 53:   int           ierr,size,rank,m=A->m,irow,*rowindY;
 54:   PetscScalar   *array;
 55:   DenseMtx      *newY ;
 56:   SubMtxManager *solvemanager ;
 57: #if defined(PETSC_USE_COMPLEX)
 58:   double x_real,x_imag;
 59: #endif

 62:   MPI_Comm_size(A->comm,&size);
 63:   MPI_Comm_rank(A->comm,&rank);
 64: 
 65:   /* copy b into spooles' rhs mtxY */
 66:   DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m) ;
 67:   VecGetArray(b,&array);

 69:   DenseMtx_rowIndices(lu->mtxY, &m, &rowindY) ;  /* get m, rowind */
 70:   for ( irow = 0 ; irow < m ; irow++ ) {
 71:     rowindY[irow] = irow + lu->rstart;           /* global rowind */
 72: #if !defined(PETSC_USE_COMPLEX)
 73:     DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++) ;
 74: #else
 75:     DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
 76:     array++;
 77: #endif
 78:   }
 79:   VecRestoreArray(b,&array);
 80: 
 81:   if ( lu->options.msglvl > 2 ) {
 82:     fprintf(lu->options.msgFile, "\n\n 1 matrix in original ordering") ;
 83:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
 84:     fflush(lu->options.msgFile) ;
 85:   }
 86: 
 87:   /* permute and redistribute Y if necessary */
 88:   DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV) ;
 89:   if ( lu->options.msglvl > 2 ) {
 90:     fprintf(lu->options.msgFile, "\n\n rhs matrix in new ordering") ;
 91:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
 92:    fflush(lu->options.msgFile) ;
 93:   }

 95:   MPI_Barrier(A->comm) ; /* for initializing firsttag, because the num. of tags used
 96:                                    by FrontMtx_MPI_split() is unknown */
 97:   lu->firsttag = 0;
 98:   newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
 99:                                 lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
100:   DenseMtx_free(lu->mtxY) ;
101:   lu->mtxY = newY ;
102:   lu->firsttag += size ;
103:   if ( lu->options.msglvl > 2 ) {
104:     fprintf(lu->options.msgFile, "\n\n split DenseMtx Y") ;
105:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
106:     fflush(lu->options.msgFile) ;
107:   }

109:   if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
110:     /*   pivoting has taken place, redistribute the right hand side
111:          to match the final rows and columns in the fronts             */
112:     IV *rowmapIV ;
113:     rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
114:                                     lu->options.msgFile, lu->comm_spooles) ;
115:     newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
116:                                    lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
117:     DenseMtx_free(lu->mtxY) ;
118:     lu->mtxY = newY ;
119:     IV_free(rowmapIV) ;
120:     lu->firsttag += size;
121:   }
122:   if ( lu->options.msglvl > 2 ) {
123:     fprintf(lu->options.msgFile, "\n\n rhs matrix after split") ;
124:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
125:     fflush(lu->options.msgFile) ;
126:   }

128:   if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
129: 
130:   /* solve the linear system */
131:   solvemanager = SubMtxManager_new() ;
132:   SubMtxManager_init(solvemanager, NO_LOCK, 0) ;
133:   FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
134:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
135:   SubMtxManager_free(solvemanager) ;
136:   if ( lu->options.msglvl > 2 ) {
137:     fprintf(lu->options.msgFile, "\n solution in new ordering") ;
138:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile) ;
139:   }

141:   /* permute the solution into the original ordering */
142:   DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV) ;
143:   if ( lu->options.msglvl > 2 ) {
144:     fprintf(lu->options.msgFile, "\n\n solution in old ordering") ;
145:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile) ;
146:     fflush(lu->options.msgFile) ;
147:   }
148: 
149:   /* scatter local solution mtxX into mpi vector x */
150:   if( !lu->scat ){ /* create followings once for each numfactorization */
151:     /* vec_spooles <- mtxX */
152: #if !defined(PETSC_USE_COMPLEX) 
153:     VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
154: #else    
155:     VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
156:     VecGetArray(lu->vec_spooles,&array);
157:     for (irow = 0; irow < lu->nmycol; irow++){
158:       DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
159:       array[irow] = x_real+x_imag*PETSC_i;
160:     }
161:     VecRestoreArray(lu->vec_spooles,&array);
162: #endif 
163:     ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
164:     ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
165:     VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
166:   }

168:   VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
169:   VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
170: 
171:   return(0);
172: }

176: int MatFactorNumeric_MPIAIJSpooles(Mat A,Mat *F)
177: {
178:   Mat_Spooles     *lu = (Mat_Spooles*)(*F)->spptr;
179:   int             rank,size,ierr,lookahead=0;
180:   ChvManager      *chvmanager ;
181:   Chv             *rootchv ;
182:   Graph           *graph ;
183:   IVL             *adjIVL;
184:   DV              *cumopsDV ;
185:   double          droptol=0.0,*opcounts,minops,cutoff;
186: #if !defined(PETSC_USE_COMPLEX)
187:   double          *val;
188: #endif
189:   InpMtx          *newA ;
190:   PetscScalar     *av, *bv;
191:   int             *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
192:                   i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
193:   int             M=A->M,m=A->m,root,nedges,tagbound,lasttag;
194: 
196:   MPI_Comm_size(A->comm,&size);
197:   MPI_Comm_rank(A->comm,&rank);

199:   if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
200:     /* get input parameters */
201:     SetSpoolesOptions(A, &lu->options);

203:     (*F)->ops->solve   = MatSolve_MPIAIJSpooles;
204:     (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
205:     (*F)->assembled    = PETSC_TRUE;

207:     /* to be used by MatSolve() */
208:     lu->mtxY = DenseMtx_new() ;
209:     lu->mtxX = DenseMtx_new() ;
210:     lu->scat = PETSC_NULL;

212:     IVzero(20, lu->stats) ;
213:     DVzero(20, lu->cpus) ;

215:     lu->mtxA = InpMtx_new() ;
216:   }
217: 
218:   /* copy A to Spooles' InpMtx object */
219:   if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
220:     Mat_MPIAIJ  *mat =  (Mat_MPIAIJ*)A->data;
221:     Mat_SeqAIJ  *aa=(Mat_SeqAIJ*)(mat->A)->data;
222:     Mat_SeqAIJ  *bb=(Mat_SeqAIJ*)(mat->B)->data;
223:     ai=aa->i; aj=aa->j; av=aa->a;
224:     bi=bb->i; bj=bb->j; bv=bb->a;
225:     lu->rstart = mat->rstart;
226:     nz         = aa->nz + bb->nz;
227:     garray     = mat->garray;
228:   } else {         /* SPOOLES_SYMMETRIC  */
229:     Mat_MPISBAIJ  *mat = (Mat_MPISBAIJ*)A->data;
230:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
231:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
232:     ai=aa->i; aj=aa->j; av=aa->a;
233:     bi=bb->i; bj=bb->j; bv=bb->a;
234:     lu->rstart = mat->rstart;
235:     nz         = aa->nz + bb->nz;
236:     garray     = mat->garray;
237:   }
238: 
239:   InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0) ;
240:   row   = InpMtx_ivec1(lu->mtxA);
241:   col   = InpMtx_ivec2(lu->mtxA);
242: #if !defined(PETSC_USE_COMPLEX)
243:   val   = InpMtx_dvec(lu->mtxA);
244: #endif

246:   jj = 0; irow = lu->rstart;
247:   for ( i=0; i<m; i++ ) {
248:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
249:     countA = ai[i+1] - ai[i];
250:     countB = bi[i+1] - bi[i];
251:     bjj = bj + bi[i];
252:     jB = 0;
253: 
254:     if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
255:       /* B part, smaller col index */
256:       colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
257:       for (j=0; j<countB; j++){
258:         jcol = garray[bjj[j]];
259:         if (jcol > colA_start) {
260:           jB = j;
261:           break;
262:         }
263:         row[jj] = irow; col[jj] = jcol;
264: #if !defined(PETSC_USE_COMPLEX)
265:         val[jj++] = *bv++;
266: #else
267:         InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv)) ;
268:         bv++; jj++;
269: #endif
270:         if (j==countB-1) jB = countB;
271:       }
272:     }
273:     /* A part */
274:     for (j=0; j<countA; j++){
275:       row[jj] = irow; col[jj] = lu->rstart + ajj[j];
276: #if !defined(PETSC_USE_COMPLEX)
277:       val[jj++] = *av++;
278: #else
279:       InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av)) ;
280:       av++; jj++;
281: #endif
282:     }
283:     /* B part, larger col index */
284:     for (j=jB; j<countB; j++){
285:       row[jj] = irow; col[jj] = garray[bjj[j]];
286: #if !defined(PETSC_USE_COMPLEX)
287:       val[jj++] = *bv++;
288: #else
289:      InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv)) ;
290:      bv++; jj++;
291: #endif
292:     }
293:     irow++;
294:   }
295: #if !defined(PETSC_USE_COMPLEX)
296:   InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
297: #endif
298:   InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
299:   if ( lu->options.msglvl > 0 ) {
300:     printf("[%d] input matrix\n",rank);
301:     fprintf(lu->options.msgFile, "\n\n [%d] input matrix\n",rank) ;
302:     InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
303:     fflush(lu->options.msgFile) ;
304:   }

306:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
307:     /*
308:       find a low-fill ordering
309:       (1) create the Graph object
310:       (2) order the graph using multiple minimum degree
311:       (3) find out who has the best ordering w.r.t. op count,
312:           and broadcast that front tree object
313:     */
314:     graph = Graph_new() ;
315:     adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
316:               lu->options.msglvl, lu->options.msgFile, lu->comm_spooles) ;
317:     nedges = IVL_tsize(adjIVL) ;
318:     Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL) ;
319:     if ( lu->options.msglvl > 2 ) {
320:       fprintf(lu->options.msgFile, "\n\n graph of the input matrix") ;
321:       Graph_writeForHumanEye(graph, lu->options.msgFile) ;
322:       fflush(lu->options.msgFile) ;
323:     }

325:     switch (lu->options.ordering) {
326:     case 0:
327:       lu->frontETree = orderViaBestOfNDandMS(graph,
328:                      lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
329:                      lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
330:     case 1:
331:       lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
332:     case 2:
333:       lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
334:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
335:     case 3:
336:       lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
337:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
338:     default:
339:       SETERRQ(1,"Unknown Spooles's ordering");
340:     }

342:     Graph_free(graph) ;
343:     if ( lu->options.msglvl > 2 ) {
344:       fprintf(lu->options.msgFile, "\n\n front tree from ordering") ;
345:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
346:       fflush(lu->options.msgFile) ;
347:     }

349:     opcounts = DVinit(size, 0.0) ;
350:     opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag) ;
351:     MPI_Allgather((void *) &opcounts[rank], 1, MPI_DOUBLE,
352:               (void *) opcounts, 1, MPI_DOUBLE, A->comm) ;
353:     minops = DVmin(size, opcounts, &root) ;
354:     DVfree(opcounts) ;
355: 
356:     lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
357:                              lu->options.msglvl, lu->options.msgFile, lu->comm_spooles) ;
358:     if ( lu->options.msglvl > 2 ) {
359:       fprintf(lu->options.msgFile, "\n\n best front tree") ;
360:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
361:       fflush(lu->options.msgFile) ;
362:     }
363: 
364:     /* get the permutations, permute the front tree, permute the matrix */
365:     lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree) ;
366:     lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree) ;

368:     ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;

370:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV)) ;
371: 
372:     if (  lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA) ;

374:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
375:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;

377:     /* generate the owners map IV object and the map from vertices to owners */
378:     cutoff   = 1./(2*size) ;
379:     cumopsDV = DV_new() ;
380:     DV_init(cumopsDV, size, NULL) ;
381:     lu->ownersIV = ETree_ddMap(lu->frontETree,
382:                        lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff) ;
383:     DV_free(cumopsDV) ;
384:     lu->vtxmapIV = IV_new() ;
385:     IV_init(lu->vtxmapIV, M, NULL) ;
386:     IVgather(M, IV_entries(lu->vtxmapIV),
387:              IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree)) ;
388:     if ( lu->options.msglvl > 2 ) {
389:       fprintf(lu->options.msgFile, "\n\n map from fronts to owning processes") ;
390:       IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile) ;
391:       fprintf(lu->options.msgFile, "\n\n map from vertices to owning processes") ;
392:       IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile) ;
393:       fflush(lu->options.msgFile) ;
394:     }

396:     /* redistribute the matrix */
397:     lu->firsttag = 0 ;
398:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
399:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
400:     lu->firsttag += size ;

402:     InpMtx_free(lu->mtxA) ;
403:     lu->mtxA = newA ;
404:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
405:     if ( lu->options.msglvl > 2 ) {
406:       fprintf(lu->options.msgFile, "\n\n split InpMtx") ;
407:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
408:       fflush(lu->options.msgFile) ;
409:     }
410: 
411:     /* compute the symbolic factorization */
412:     lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
413:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
414:     lu->firsttag += lu->frontETree->nfront ;
415:     if ( lu->options.msglvl > 2 ) {
416:       fprintf(lu->options.msgFile, "\n\n local symbolic factorization") ;
417:       IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile) ;
418:       fflush(lu->options.msgFile) ;
419:     }

421:     lu->mtxmanager = SubMtxManager_new() ;
422:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;
423:     lu->frontmtx = FrontMtx_new() ;

425:   } else { /* new num factorization using previously computed symbolic factor */
426:     if (lu->options.pivotingflag) {                  /* different FrontMtx is required */
427:       FrontMtx_free(lu->frontmtx) ;
428:       lu->frontmtx   = FrontMtx_new() ;
429:     }

431:     SubMtxManager_free(lu->mtxmanager) ;
432:     lu->mtxmanager = SubMtxManager_new() ;
433:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;

435:     /* permute mtxA */
436:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV)) ;
437:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA) ;
438: 
439:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
440:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;

442:     /* redistribute the matrix */
443:     MPI_Barrier(A->comm) ;
444:     lu->firsttag = 0;
445:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
446:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles) ;
447:     lu->firsttag += size ;

449:     InpMtx_free(lu->mtxA) ;
450:     lu->mtxA = newA ;
451:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
452:     if ( lu->options.msglvl > 2 ) {
453:       fprintf(lu->options.msgFile, "\n\n split InpMtx") ;
454:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
455:       fflush(lu->options.msgFile) ;
456:     }
457:   } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */

459:   FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
460:               FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
461:               lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;

463:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
464:     if ( lu->options.patchAndGoFlag == 1 ) {
465:       lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
466:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
467:                        lu->options.storeids, lu->options.storevalues) ;
468:     } else if ( lu->options.patchAndGoFlag == 2 ) {
469:       lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
470:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
471:                        lu->options.storeids, lu->options.storevalues) ;
472:     }
473:   }

475:   /* numerical factorization */
476:   chvmanager = ChvManager_new() ;
477:   ChvManager_init(chvmanager, NO_LOCK, 0) ;

479:   tagbound = maxTagMPI(lu->comm_spooles) ;
480:   lasttag  = lu->firsttag + 3*lu->frontETree->nfront + 2;
481:   /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
482:   if ( lasttag > tagbound ) {
483:       SETERRQ3(1,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
484:                lu->firsttag, lasttag, tagbound) ;
485:   }
486:   rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
487:                      chvmanager, lu->ownersIV, lookahead, &ierr, lu->cpus,
488:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles) ;
489:   ChvManager_free(chvmanager) ;
490:   lu->firsttag = lasttag;
491:   if ( lu->options.msglvl > 2 ) {
492:     fprintf(lu->options.msgFile, "\n\n numeric factorization") ;
493:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
494:     fflush(lu->options.msgFile) ;
495:   }

497:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
498:     if ( lu->options.patchAndGoFlag == 1 ) {
499:       if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
500:         if (lu->options.msglvl > 0 ){
501:           fprintf(lu->options.msgFile, "\n small pivots found at these locations") ;
502:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
503:         }
504:       }
505:       PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
506:     } else if ( lu->options.patchAndGoFlag == 2 ) {
507:       if (lu->options.msglvl > 0 ){
508:         if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
509:           fprintf(lu->options.msgFile, "\n small pivots found at these locations") ;
510:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
511:         }
512:         if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
513:           fprintf(lu->options.msgFile, "\n perturbations") ;
514:           DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile) ;
515:         }
516:       }
517:       PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
518:     }
519:   }
520:   if ( ierr >= 0 ) SETERRQ2(1,"\n proc %d : factorization error at front %d", rank, ierr) ;
521: 
522:   /*  post-process the factorization and split 
523:       the factor matrices into submatrices */
524:   lasttag  = lu->firsttag + 5*size;
525:   if ( lasttag > tagbound ) {
526:       SETERRQ3(1,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
527:                lu->firsttag, lasttag, tagbound) ;
528:   }
529:   FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
530:                          lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
531:   lu->firsttag += 5*size ;
532:   if ( lu->options.msglvl > 2 ) {
533:     fprintf(lu->options.msgFile, "\n\n numeric factorization after post-processing");
534:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
535:     fflush(lu->options.msgFile) ;
536:   }
537: 
538:   /* create the solve map object */
539:   lu->solvemap = SolveMap_new() ;
540:   SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
541:                FrontMtx_upperBlockIVL(lu->frontmtx),
542:                FrontMtx_lowerBlockIVL(lu->frontmtx),
543:                size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
544:                lu->options.seed, lu->options.msglvl, lu->options.msgFile);
545:   if ( lu->options.msglvl > 2 ) {
546:     SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile) ;
547:     fflush(lu->options.msgFile) ;
548:   }

550:   /* redistribute the submatrices of the factors */
551:   FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
552:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
553:   if ( lu->options.msglvl > 2 ) {
554:     fprintf(lu->options.msgFile, "\n\n numeric factorization after split") ;
555:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
556:     fflush(lu->options.msgFile) ;
557:   }

559:   /* create a solution DenseMtx object */
560:   lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
561:                                          lu->options.msglvl, lu->options.msgFile) ;
562:   lu->nmycol = IV_size(lu->ownedColumnsIV) ;
563:   if ( lu->nmycol > 0) {
564:     DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol) ;
565:     /* get pointers rowindX and entX */
566:     DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
567:     lu->entX = DenseMtx_entries(lu->mtxX) ;
568:   } else { /* lu->nmycol == 0 */
569:     lu->entX    = 0;
570:     lu->rowindX = 0;
571:   }

573:   if ( lu->scat ){
574:     VecDestroy(lu->vec_spooles);
575:     ISDestroy(lu->iden);
576:     ISDestroy(lu->is_petsc);
577:     VecScatterDestroy(lu->scat);
578:   }
579:   lu->scat = PETSC_NULL;
580:   lu->flg = SAME_NONZERO_PATTERN;

582:   lu->CleanUpSpooles = PETSC_TRUE;
583:   return(0);
584: }

586: EXTERN_C_BEGIN
589: int MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,const MatType type,Mat *newmat) {
590:   /* This routine is only called to convert a MATMPIAIJ matrix */
591:   /* to a MATMPIAIJSPOOLES matrix, so we will ignore 'MatType type'. */
592:   int         ierr;
593:   Mat         B=*newmat;
594:   Mat_Spooles *lu;

597:   if (B != A) {
598:     /* This routine is inherited, so we know the type is correct. */
599:     MatDuplicate(A,MAT_COPY_VALUES,&B);
600:   }

602:   PetscNew(Mat_Spooles,&lu);
603:   B->spptr = (void*)lu;

605:   lu->basetype                  = MATMPIAIJ;
606:   lu->CleanUpSpooles            = PETSC_FALSE;
607:   lu->MatDuplicate              = A->ops->duplicate;
608:   lu->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
609:   lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
610:   lu->MatView                   = A->ops->view;
611:   lu->MatAssemblyEnd            = A->ops->assemblyend;
612:   lu->MatDestroy                = A->ops->destroy;
613:   B->ops->duplicate             = MatDuplicate_MPIAIJSpooles;
614:   B->ops->lufactorsymbolic      = MatLUFactorSymbolic_MPIAIJSpooles;
615:   B->ops->view                  = MatView_SeqAIJSpooles;
616:   B->ops->assemblyend           = MatAssemblyEnd_MPIAIJSpooles;
617:   B->ops->destroy               = MatDestroy_MPIAIJSpooles;

619:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
620:                                            "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
621:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
622:                                            "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
623:   PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
624:   *newmat = B;
625:   return(0);
626: }
627: EXTERN_C_END

631: int MatDuplicate_MPIAIJSpooles(Mat A, MatDuplicateOption op, Mat *M) {
632:   int         ierr;
633:   Mat_Spooles *lu=(Mat_Spooles *)A->spptr;

636:   (*lu->MatDuplicate)(A,op,M);
637:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_Spooles));
638:   return(0);
639: }

641: /*MC
642:   MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices 
643:   via the external package Spooles.

645:   If MPIAIJSPOOLES is installed (see the manual for
646:   instructions on how to declare the existence of external packages),
647:   a matrix type can be constructed which invokes SPOOLES solvers.
648:   After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).
649:   This matrix type is only supported for double precision real.

651:   This matrix inherits from MATMPIAIJ.  As a result, MatMPIAIJSetPreallocation is 
652:   supported for this matrix type.  One can also call MatConvert for an inplace conversion to or from 
653:   the MATMPIAIJ type without data copy.

655:   Consult Spooles documentation for more information about the options database keys below.

657:   Options Database Keys:
658: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
659: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
660: . -mat_spooles_seed <seed> - random number seed used for ordering
661: . -mat_spooles_msglvl <msglvl> - message output level
662: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
663: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
664: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
665: . -mat_spooles_maxsize <n> - maximum size of a supernode
666: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
667: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
668: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
669: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
670: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
671: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
672: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object

674:    Level: beginner

676: .seealso: PCLU
677: M*/

679: EXTERN_C_BEGIN
682: int MatCreate_MPIAIJSpooles(Mat A) {
684:   Mat A_diag;

687:   /* Change type name before calling MatSetType to force proper construction of MPIAIJ and MPIAIJSpooles types */
688:   PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJSPOOLES);
689:   MatSetType(A,MATMPIAIJ);
690:   A_diag = ((Mat_MPIAIJ *)A->data)->A;
691:   MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,&A_diag);
692:   MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,&A);
693:   return(0);
694: }
695: EXTERN_C_END