Actual source code: mpispooles.c
1: /*$Id: mpispooles.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/impls/sbaij/seq/sbaij.h
8: #include src/mat/impls/baij/seq/baij.h
9: #include src/mat/impls/aij/mpi/mpiaij.h
10: #include src/mat/impls/sbaij/mpi/mpisbaij.h
11: #include src/mat/impls/aij/seq/spooles/spooles.h
13: extern int SetSpoolesOptions(Mat, Spooles_options *);
17: int MatDestroy_MPIAIJSpooles(Mat A)
18: {
19: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
20: int ierr;
21:
23: if (lu->CleanUpSpooles) {
24: FrontMtx_free(lu->frontmtx) ;
25: IV_free(lu->newToOldIV) ;
26: IV_free(lu->oldToNewIV) ;
27: IV_free(lu->vtxmapIV) ;
28: InpMtx_free(lu->mtxA) ;
29: ETree_free(lu->frontETree) ;
30: IVL_free(lu->symbfacIVL) ;
31: SubMtxManager_free(lu->mtxmanager) ;
32: DenseMtx_free(lu->mtxX) ;
33: DenseMtx_free(lu->mtxY) ;
34: MPI_Comm_free(&(lu->comm_spooles));
35: if ( lu->scat ){
36: VecDestroy(lu->vec_spooles);
37: ISDestroy(lu->iden);
38: ISDestroy(lu->is_petsc);
39: VecScatterDestroy(lu->scat);
40: }
41: }
42: MatConvert_Spooles_Base(A,lu->basetype,&A);
43: (*A->ops->destroy)(A);
45: return(0);
46: }
50: int MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
51: {
52: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
53: int ierr,size,rank,m=A->m,irow,*rowindY;
54: PetscScalar *array;
55: DenseMtx *newY ;
56: SubMtxManager *solvemanager ;
57: #if defined(PETSC_USE_COMPLEX)
58: double x_real,x_imag;
59: #endif
62: MPI_Comm_size(A->comm,&size);
63: MPI_Comm_rank(A->comm,&rank);
64:
65: /* copy b into spooles' rhs mtxY */
66: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m) ;
67: VecGetArray(b,&array);
69: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY) ; /* get m, rowind */
70: for ( irow = 0 ; irow < m ; irow++ ) {
71: rowindY[irow] = irow + lu->rstart; /* global rowind */
72: #if !defined(PETSC_USE_COMPLEX)
73: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++) ;
74: #else
75: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
76: array++;
77: #endif
78: }
79: VecRestoreArray(b,&array);
80:
81: if ( lu->options.msglvl > 2 ) {
82: fprintf(lu->options.msgFile, "\n\n 1 matrix in original ordering") ;
83: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
84: fflush(lu->options.msgFile) ;
85: }
86:
87: /* permute and redistribute Y if necessary */
88: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV) ;
89: if ( lu->options.msglvl > 2 ) {
90: fprintf(lu->options.msgFile, "\n\n rhs matrix in new ordering") ;
91: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
92: fflush(lu->options.msgFile) ;
93: }
95: MPI_Barrier(A->comm) ; /* for initializing firsttag, because the num. of tags used
96: by FrontMtx_MPI_split() is unknown */
97: lu->firsttag = 0;
98: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
99: lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
100: DenseMtx_free(lu->mtxY) ;
101: lu->mtxY = newY ;
102: lu->firsttag += size ;
103: if ( lu->options.msglvl > 2 ) {
104: fprintf(lu->options.msgFile, "\n\n split DenseMtx Y") ;
105: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
106: fflush(lu->options.msgFile) ;
107: }
109: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
110: /* pivoting has taken place, redistribute the right hand side
111: to match the final rows and columns in the fronts */
112: IV *rowmapIV ;
113: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
114: lu->options.msgFile, lu->comm_spooles) ;
115: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
116: lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
117: DenseMtx_free(lu->mtxY) ;
118: lu->mtxY = newY ;
119: IV_free(rowmapIV) ;
120: lu->firsttag += size;
121: }
122: if ( lu->options.msglvl > 2 ) {
123: fprintf(lu->options.msgFile, "\n\n rhs matrix after split") ;
124: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile) ;
125: fflush(lu->options.msgFile) ;
126: }
128: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
129:
130: /* solve the linear system */
131: solvemanager = SubMtxManager_new() ;
132: SubMtxManager_init(solvemanager, NO_LOCK, 0) ;
133: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
134: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
135: SubMtxManager_free(solvemanager) ;
136: if ( lu->options.msglvl > 2 ) {
137: fprintf(lu->options.msgFile, "\n solution in new ordering") ;
138: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile) ;
139: }
141: /* permute the solution into the original ordering */
142: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV) ;
143: if ( lu->options.msglvl > 2 ) {
144: fprintf(lu->options.msgFile, "\n\n solution in old ordering") ;
145: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile) ;
146: fflush(lu->options.msgFile) ;
147: }
148:
149: /* scatter local solution mtxX into mpi vector x */
150: if( !lu->scat ){ /* create followings once for each numfactorization */
151: /* vec_spooles <- mtxX */
152: #if !defined(PETSC_USE_COMPLEX)
153: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
154: #else
155: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
156: VecGetArray(lu->vec_spooles,&array);
157: for (irow = 0; irow < lu->nmycol; irow++){
158: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
159: array[irow] = x_real+x_imag*PETSC_i;
160: }
161: VecRestoreArray(lu->vec_spooles,&array);
162: #endif
163: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
164: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
165: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
166: }
168: VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
169: VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
170:
171: return(0);
172: }
176: int MatFactorNumeric_MPIAIJSpooles(Mat A,Mat *F)
177: {
178: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
179: int rank,size,ierr,lookahead=0;
180: ChvManager *chvmanager ;
181: Chv *rootchv ;
182: Graph *graph ;
183: IVL *adjIVL;
184: DV *cumopsDV ;
185: double droptol=0.0,*opcounts,minops,cutoff;
186: #if !defined(PETSC_USE_COMPLEX)
187: double *val;
188: #endif
189: InpMtx *newA ;
190: PetscScalar *av, *bv;
191: int *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
192: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
193: int M=A->M,m=A->m,root,nedges,tagbound,lasttag;
194:
196: MPI_Comm_size(A->comm,&size);
197: MPI_Comm_rank(A->comm,&rank);
199: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
200: /* get input parameters */
201: SetSpoolesOptions(A, &lu->options);
203: (*F)->ops->solve = MatSolve_MPIAIJSpooles;
204: (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
205: (*F)->assembled = PETSC_TRUE;
207: /* to be used by MatSolve() */
208: lu->mtxY = DenseMtx_new() ;
209: lu->mtxX = DenseMtx_new() ;
210: lu->scat = PETSC_NULL;
212: IVzero(20, lu->stats) ;
213: DVzero(20, lu->cpus) ;
215: lu->mtxA = InpMtx_new() ;
216: }
217:
218: /* copy A to Spooles' InpMtx object */
219: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
220: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
221: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
222: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
223: ai=aa->i; aj=aa->j; av=aa->a;
224: bi=bb->i; bj=bb->j; bv=bb->a;
225: lu->rstart = mat->rstart;
226: nz = aa->nz + bb->nz;
227: garray = mat->garray;
228: } else { /* SPOOLES_SYMMETRIC */
229: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
230: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
231: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
232: ai=aa->i; aj=aa->j; av=aa->a;
233: bi=bb->i; bj=bb->j; bv=bb->a;
234: lu->rstart = mat->rstart;
235: nz = aa->nz + bb->nz;
236: garray = mat->garray;
237: }
238:
239: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0) ;
240: row = InpMtx_ivec1(lu->mtxA);
241: col = InpMtx_ivec2(lu->mtxA);
242: #if !defined(PETSC_USE_COMPLEX)
243: val = InpMtx_dvec(lu->mtxA);
244: #endif
246: jj = 0; irow = lu->rstart;
247: for ( i=0; i<m; i++ ) {
248: ajj = aj + ai[i]; /* ptr to the beginning of this row */
249: countA = ai[i+1] - ai[i];
250: countB = bi[i+1] - bi[i];
251: bjj = bj + bi[i];
252: jB = 0;
253:
254: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
255: /* B part, smaller col index */
256: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
257: for (j=0; j<countB; j++){
258: jcol = garray[bjj[j]];
259: if (jcol > colA_start) {
260: jB = j;
261: break;
262: }
263: row[jj] = irow; col[jj] = jcol;
264: #if !defined(PETSC_USE_COMPLEX)
265: val[jj++] = *bv++;
266: #else
267: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv)) ;
268: bv++; jj++;
269: #endif
270: if (j==countB-1) jB = countB;
271: }
272: }
273: /* A part */
274: for (j=0; j<countA; j++){
275: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
276: #if !defined(PETSC_USE_COMPLEX)
277: val[jj++] = *av++;
278: #else
279: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av)) ;
280: av++; jj++;
281: #endif
282: }
283: /* B part, larger col index */
284: for (j=jB; j<countB; j++){
285: row[jj] = irow; col[jj] = garray[bjj[j]];
286: #if !defined(PETSC_USE_COMPLEX)
287: val[jj++] = *bv++;
288: #else
289: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv)) ;
290: bv++; jj++;
291: #endif
292: }
293: irow++;
294: }
295: #if !defined(PETSC_USE_COMPLEX)
296: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
297: #endif
298: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
299: if ( lu->options.msglvl > 0 ) {
300: printf("[%d] input matrix\n",rank);
301: fprintf(lu->options.msgFile, "\n\n [%d] input matrix\n",rank) ;
302: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
303: fflush(lu->options.msgFile) ;
304: }
306: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
307: /*
308: find a low-fill ordering
309: (1) create the Graph object
310: (2) order the graph using multiple minimum degree
311: (3) find out who has the best ordering w.r.t. op count,
312: and broadcast that front tree object
313: */
314: graph = Graph_new() ;
315: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
316: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles) ;
317: nedges = IVL_tsize(adjIVL) ;
318: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL) ;
319: if ( lu->options.msglvl > 2 ) {
320: fprintf(lu->options.msgFile, "\n\n graph of the input matrix") ;
321: Graph_writeForHumanEye(graph, lu->options.msgFile) ;
322: fflush(lu->options.msgFile) ;
323: }
325: switch (lu->options.ordering) {
326: case 0:
327: lu->frontETree = orderViaBestOfNDandMS(graph,
328: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
329: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
330: case 1:
331: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
332: case 2:
333: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
334: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
335: case 3:
336: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
337: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
338: default:
339: SETERRQ(1,"Unknown Spooles's ordering");
340: }
342: Graph_free(graph) ;
343: if ( lu->options.msglvl > 2 ) {
344: fprintf(lu->options.msgFile, "\n\n front tree from ordering") ;
345: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
346: fflush(lu->options.msgFile) ;
347: }
349: opcounts = DVinit(size, 0.0) ;
350: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag) ;
351: MPI_Allgather((void *) &opcounts[rank], 1, MPI_DOUBLE,
352: (void *) opcounts, 1, MPI_DOUBLE, A->comm) ;
353: minops = DVmin(size, opcounts, &root) ;
354: DVfree(opcounts) ;
355:
356: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
357: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles) ;
358: if ( lu->options.msglvl > 2 ) {
359: fprintf(lu->options.msgFile, "\n\n best front tree") ;
360: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
361: fflush(lu->options.msgFile) ;
362: }
363:
364: /* get the permutations, permute the front tree, permute the matrix */
365: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree) ;
366: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree) ;
368: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;
370: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV)) ;
371:
372: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA) ;
374: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
375: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
377: /* generate the owners map IV object and the map from vertices to owners */
378: cutoff = 1./(2*size) ;
379: cumopsDV = DV_new() ;
380: DV_init(cumopsDV, size, NULL) ;
381: lu->ownersIV = ETree_ddMap(lu->frontETree,
382: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff) ;
383: DV_free(cumopsDV) ;
384: lu->vtxmapIV = IV_new() ;
385: IV_init(lu->vtxmapIV, M, NULL) ;
386: IVgather(M, IV_entries(lu->vtxmapIV),
387: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree)) ;
388: if ( lu->options.msglvl > 2 ) {
389: fprintf(lu->options.msgFile, "\n\n map from fronts to owning processes") ;
390: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile) ;
391: fprintf(lu->options.msgFile, "\n\n map from vertices to owning processes") ;
392: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile) ;
393: fflush(lu->options.msgFile) ;
394: }
396: /* redistribute the matrix */
397: lu->firsttag = 0 ;
398: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
399: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
400: lu->firsttag += size ;
402: InpMtx_free(lu->mtxA) ;
403: lu->mtxA = newA ;
404: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
405: if ( lu->options.msglvl > 2 ) {
406: fprintf(lu->options.msgFile, "\n\n split InpMtx") ;
407: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
408: fflush(lu->options.msgFile) ;
409: }
410:
411: /* compute the symbolic factorization */
412: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
413: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
414: lu->firsttag += lu->frontETree->nfront ;
415: if ( lu->options.msglvl > 2 ) {
416: fprintf(lu->options.msgFile, "\n\n local symbolic factorization") ;
417: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile) ;
418: fflush(lu->options.msgFile) ;
419: }
421: lu->mtxmanager = SubMtxManager_new() ;
422: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;
423: lu->frontmtx = FrontMtx_new() ;
425: } else { /* new num factorization using previously computed symbolic factor */
426: if (lu->options.pivotingflag) { /* different FrontMtx is required */
427: FrontMtx_free(lu->frontmtx) ;
428: lu->frontmtx = FrontMtx_new() ;
429: }
431: SubMtxManager_free(lu->mtxmanager) ;
432: lu->mtxmanager = SubMtxManager_new() ;
433: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;
435: /* permute mtxA */
436: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV)) ;
437: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA) ;
438:
439: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
440: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
442: /* redistribute the matrix */
443: MPI_Barrier(A->comm) ;
444: lu->firsttag = 0;
445: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
446: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles) ;
447: lu->firsttag += size ;
449: InpMtx_free(lu->mtxA) ;
450: lu->mtxA = newA ;
451: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
452: if ( lu->options.msglvl > 2 ) {
453: fprintf(lu->options.msgFile, "\n\n split InpMtx") ;
454: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
455: fflush(lu->options.msgFile) ;
456: }
457: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
459: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
460: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
461: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;
463: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
464: if ( lu->options.patchAndGoFlag == 1 ) {
465: lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
466: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
467: lu->options.storeids, lu->options.storevalues) ;
468: } else if ( lu->options.patchAndGoFlag == 2 ) {
469: lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
470: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
471: lu->options.storeids, lu->options.storevalues) ;
472: }
473: }
475: /* numerical factorization */
476: chvmanager = ChvManager_new() ;
477: ChvManager_init(chvmanager, NO_LOCK, 0) ;
479: tagbound = maxTagMPI(lu->comm_spooles) ;
480: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
481: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
482: if ( lasttag > tagbound ) {
483: SETERRQ3(1,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
484: lu->firsttag, lasttag, tagbound) ;
485: }
486: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
487: chvmanager, lu->ownersIV, lookahead, &ierr, lu->cpus,
488: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles) ;
489: ChvManager_free(chvmanager) ;
490: lu->firsttag = lasttag;
491: if ( lu->options.msglvl > 2 ) {
492: fprintf(lu->options.msgFile, "\n\n numeric factorization") ;
493: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
494: fflush(lu->options.msgFile) ;
495: }
497: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
498: if ( lu->options.patchAndGoFlag == 1 ) {
499: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
500: if (lu->options.msglvl > 0 ){
501: fprintf(lu->options.msgFile, "\n small pivots found at these locations") ;
502: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
503: }
504: }
505: PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
506: } else if ( lu->options.patchAndGoFlag == 2 ) {
507: if (lu->options.msglvl > 0 ){
508: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
509: fprintf(lu->options.msgFile, "\n small pivots found at these locations") ;
510: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
511: }
512: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
513: fprintf(lu->options.msgFile, "\n perturbations") ;
514: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile) ;
515: }
516: }
517: PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
518: }
519: }
520: if ( ierr >= 0 ) SETERRQ2(1,"\n proc %d : factorization error at front %d", rank, ierr) ;
521:
522: /* post-process the factorization and split
523: the factor matrices into submatrices */
524: lasttag = lu->firsttag + 5*size;
525: if ( lasttag > tagbound ) {
526: SETERRQ3(1,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
527: lu->firsttag, lasttag, tagbound) ;
528: }
529: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
530: lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
531: lu->firsttag += 5*size ;
532: if ( lu->options.msglvl > 2 ) {
533: fprintf(lu->options.msgFile, "\n\n numeric factorization after post-processing");
534: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
535: fflush(lu->options.msgFile) ;
536: }
537:
538: /* create the solve map object */
539: lu->solvemap = SolveMap_new() ;
540: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
541: FrontMtx_upperBlockIVL(lu->frontmtx),
542: FrontMtx_lowerBlockIVL(lu->frontmtx),
543: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
544: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
545: if ( lu->options.msglvl > 2 ) {
546: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile) ;
547: fflush(lu->options.msgFile) ;
548: }
550: /* redistribute the submatrices of the factors */
551: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
552: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles) ;
553: if ( lu->options.msglvl > 2 ) {
554: fprintf(lu->options.msgFile, "\n\n numeric factorization after split") ;
555: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
556: fflush(lu->options.msgFile) ;
557: }
559: /* create a solution DenseMtx object */
560: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
561: lu->options.msglvl, lu->options.msgFile) ;
562: lu->nmycol = IV_size(lu->ownedColumnsIV) ;
563: if ( lu->nmycol > 0) {
564: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol) ;
565: /* get pointers rowindX and entX */
566: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
567: lu->entX = DenseMtx_entries(lu->mtxX) ;
568: } else { /* lu->nmycol == 0 */
569: lu->entX = 0;
570: lu->rowindX = 0;
571: }
573: if ( lu->scat ){
574: VecDestroy(lu->vec_spooles);
575: ISDestroy(lu->iden);
576: ISDestroy(lu->is_petsc);
577: VecScatterDestroy(lu->scat);
578: }
579: lu->scat = PETSC_NULL;
580: lu->flg = SAME_NONZERO_PATTERN;
582: lu->CleanUpSpooles = PETSC_TRUE;
583: return(0);
584: }
586: EXTERN_C_BEGIN
589: int MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,const MatType type,Mat *newmat) {
590: /* This routine is only called to convert a MATMPIAIJ matrix */
591: /* to a MATMPIAIJSPOOLES matrix, so we will ignore 'MatType type'. */
592: int ierr;
593: Mat B=*newmat;
594: Mat_Spooles *lu;
597: if (B != A) {
598: /* This routine is inherited, so we know the type is correct. */
599: MatDuplicate(A,MAT_COPY_VALUES,&B);
600: }
602: PetscNew(Mat_Spooles,&lu);
603: B->spptr = (void*)lu;
605: lu->basetype = MATMPIAIJ;
606: lu->CleanUpSpooles = PETSC_FALSE;
607: lu->MatDuplicate = A->ops->duplicate;
608: lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic;
609: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
610: lu->MatView = A->ops->view;
611: lu->MatAssemblyEnd = A->ops->assemblyend;
612: lu->MatDestroy = A->ops->destroy;
613: B->ops->duplicate = MatDuplicate_MPIAIJSpooles;
614: B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJSpooles;
615: B->ops->view = MatView_SeqAIJSpooles;
616: B->ops->assemblyend = MatAssemblyEnd_MPIAIJSpooles;
617: B->ops->destroy = MatDestroy_MPIAIJSpooles;
619: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
620: "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
621: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
622: "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
623: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
624: *newmat = B;
625: return(0);
626: }
627: EXTERN_C_END
631: int MatDuplicate_MPIAIJSpooles(Mat A, MatDuplicateOption op, Mat *M) {
632: int ierr;
633: Mat_Spooles *lu=(Mat_Spooles *)A->spptr;
636: (*lu->MatDuplicate)(A,op,M);
637: PetscMemcpy((*M)->spptr,lu,sizeof(Mat_Spooles));
638: return(0);
639: }
641: /*MC
642: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
643: via the external package Spooles.
645: If MPIAIJSPOOLES is installed (see the manual for
646: instructions on how to declare the existence of external packages),
647: a matrix type can be constructed which invokes SPOOLES solvers.
648: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).
649: This matrix type is only supported for double precision real.
651: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation is
652: supported for this matrix type. One can also call MatConvert for an inplace conversion to or from
653: the MATMPIAIJ type without data copy.
655: Consult Spooles documentation for more information about the options database keys below.
657: Options Database Keys:
658: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
659: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
660: . -mat_spooles_seed <seed> - random number seed used for ordering
661: . -mat_spooles_msglvl <msglvl> - message output level
662: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
663: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
664: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
665: . -mat_spooles_maxsize <n> - maximum size of a supernode
666: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
667: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
668: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
669: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
670: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
671: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
672: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
674: Level: beginner
676: .seealso: PCLU
677: M*/
679: EXTERN_C_BEGIN
682: int MatCreate_MPIAIJSpooles(Mat A) {
684: Mat A_diag;
687: /* Change type name before calling MatSetType to force proper construction of MPIAIJ and MPIAIJSpooles types */
688: PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJSPOOLES);
689: MatSetType(A,MATMPIAIJ);
690: A_diag = ((Mat_MPIAIJ *)A->data)->A;
691: MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,&A_diag);
692: MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,&A);
693: return(0);
694: }
695: EXTERN_C_END