Actual source code: dscpack.c
1: /*$Id: dscpack.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
4: */
6: #include src/mat/impls/baij/seq/baij.h
7: #include src/mat/impls/baij/mpi/mpibaij.h
9: EXTERN_C_BEGIN
10: #include "dscmain.h"
11: EXTERN_C_END
13: typedef struct {
14: DSC_Solver My_DSC_Solver;
15: int num_local_strucs, *local_struc_old_num,
16: num_local_cols, num_local_nonz,
17: *global_struc_new_col_num,
18: *global_struc_new_num, *global_struc_owner,
19: dsc_id,bs,*local_cols_old_num,*replication;
20: int order_code,scheme_code,factor_type, stat,
21: LBLASLevel,DBLASLevel,max_mem_allowed;
22: MatStructure flg;
23: IS my_cols,iden,iden_dsc;
24: Vec vec_dsc;
25: VecScatter scat;
26: MPI_Comm comm_dsc;
28: /* A few inheritance details */
29: int size;
30: int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
31: int (*MatView)(Mat,PetscViewer);
32: int (*MatAssemblyEnd)(Mat,MatAssemblyType);
33: int (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
34: int (*MatDestroy)(Mat);
35: int (*MatPreallocate)(Mat,int,int,int*,int,int*);
37: /* Clean up flag for destructor */
38: PetscTruth CleanUpDSCPACK;
39: } Mat_DSC;
41: EXTERN int MatDuplicate_DSCPACK(Mat,MatDuplicateOption,Mat*);
42: EXTERN_C_BEGIN
43: EXTERN int MatConvert_Base_DSCPACK(Mat,const MatType,Mat*);
44: EXTERN_C_END
46: /* DSC function */
49: void isort2(int size, int *list, int *idx_dsc) {
50: /* in increasing order */
51: /* idx_dsc will contain indices such that */
52: /* list can be accessed in sorted order */
53: int i, j, x, y;
54:
55: for (i=0; i<size; i++) idx_dsc[i] =i;
57: for (i=1; i<size; i++){
58: y= idx_dsc[i];
59: x=list[idx_dsc[i]];
60: for (j=i-1; ((j>=0) && (x<list[idx_dsc[j]])); j--)
61: idx_dsc[j+1]=idx_dsc[j];
62: idx_dsc[j+1]=y;
63: }
64: }/*end isort2*/
68: int BAIJtoMyANonz( int *AIndex, int *AStruct, int bs,
69: RealNumberType *ANonz, int NumLocalStructs,
70: int NumLocalNonz, int *GlobalStructNewColNum,
71: int *LocalStructOldNum,
72: int *LocalStructLocalNum,
73: RealNumberType **adr_MyANonz)
74: /*
75: Extract non-zero values of lower triangular part
76: of the permuted matrix that belong to this processor.
78: Only output parameter is adr_MyANonz -- is malloced and changed.
79: Rest are input parameters left unchanged.
81: When LocalStructLocalNum == PETSC_NULL,
82: AIndex, AStruct, and ANonz contain entire original matrix A
83: in PETSc SeqBAIJ format,
84: otherwise,
85: AIndex, AStruct, and ANonz are indeces for the submatrix
86: of A whose colomns (in increasing order) belong to this processor.
88: Other variables supply information on ownership of columns
89: and the new numbering in a fill-reducing permutation
91: This information is used to setup lower half of A nonzeroes
92: for columns owned by this processor
93: */
94: {
95: int i, j, k, iold,inew, jj, kk,ierr, bs2=bs*bs,
96: *idx, *NewColNum,
97: MyANonz_last, max_struct=0, struct_size;
98: RealNumberType *MyANonz;
102: /* loop: to find maximum number of subscripts over columns
103: assigned to this processor */
104: for (i=0; i <NumLocalStructs; i++) {
105: /* for each struct i (local) assigned to this processor */
106: if (LocalStructLocalNum){
107: iold = LocalStructLocalNum[i];
108: } else {
109: iold = LocalStructOldNum[i];
110: }
111:
112: struct_size = AIndex[iold+1] - AIndex[iold];
113: if ( max_struct <= struct_size) max_struct = struct_size;
114: }
116: /* allocate tmp arrays large enough to hold densest struct */
117: PetscMalloc((2*max_struct+1)*sizeof(int),&NewColNum);
118: idx = NewColNum + max_struct;
119:
120: PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
121: *adr_MyANonz = MyANonz;
123: /* loop to set up nonzeroes in MyANonz */
124: MyANonz_last = 0 ; /* points to first empty space in MyANonz */
125: for (i=0; i <NumLocalStructs; i++) {
127: /* for each struct i (local) assigned to this processor */
128: if (LocalStructLocalNum){
129: iold = LocalStructLocalNum[i];
130: } else {
131: iold = LocalStructOldNum[i];
132: }
134: struct_size = AIndex[iold+1] - AIndex[iold];
135: for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
136: NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
137: k++;
138: }
139: isort2(struct_size, NewColNum, idx);
140:
141: kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
142: inew = GlobalStructNewColNum[LocalStructOldNum[i]];
144: for (jj = 0; jj < bs; jj++) {
145: for (j=0; j<struct_size; j++){
146: for ( k = 0; k<bs; k++){
147: if (NewColNum[idx[j]] + k >= inew)
148: MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
149: }
150: }
151: inew++;
152: }
153: } /* end outer loop for i */
155: PetscFree(NewColNum);
156: if (MyANonz_last != NumLocalNonz)
157: SETERRQ2(1,"MyANonz_last %d != NumLocalNonz %d\n",MyANonz_last, NumLocalNonz);
158: return(0);
159: }
161: EXTERN_C_BEGIN
164: int MatConvert_DSCPACK_Base(Mat A,const MatType type,Mat *newmat) {
165: int ierr;
166: Mat B=*newmat;
167: Mat_DSC *lu=(Mat_DSC*)A->spptr;
168:
170: if (B != A) {
171: MatDuplicate(A,MAT_COPY_VALUES,&B);
172: }
173: /* Reset the original function pointers */
174: B->ops->duplicate = lu->MatDuplicate;
175: B->ops->view = lu->MatView;
176: B->ops->assemblyend = lu->MatAssemblyEnd;
177: B->ops->choleskyfactorsymbolic = lu->MatCholeskyFactorSymbolic;
178: B->ops->destroy = lu->MatDestroy;
180: PetscObjectChangeTypeName((PetscObject)B,type);
181: PetscFree(lu);
182: *newmat = B;
184: return(0);
185: }
186: EXTERN_C_END
190: int MatDestroy_DSCPACK(Mat A) {
191: Mat_DSC *lu=(Mat_DSC*)A->spptr;
192: int ierr;
193:
195: if (lu->CleanUpDSCPACK) {
196: if (lu->dsc_id != -1) {
197: if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
198: DSC_FreeAll(lu->My_DSC_Solver);
199: DSC_Close0(lu->My_DSC_Solver);
200:
201: PetscFree(lu->local_cols_old_num);
202: }
203: DSC_End(lu->My_DSC_Solver);
204:
205: MPI_Comm_free(&(lu->comm_dsc));
206: ISDestroy(lu->my_cols);
207: PetscFree(lu->replication);
208: VecDestroy(lu->vec_dsc);
209: ISDestroy(lu->iden_dsc);
210: VecScatterDestroy(lu->scat);
211:
212: if (lu->size >1) ISDestroy(lu->iden);
213: }
214: if (lu->size == 1) {
215: MatConvert_DSCPACK_Base(A,MATSEQBAIJ,&A);
216: } else {
217: MatConvert_DSCPACK_Base(A,MATMPIBAIJ,&A);
218: }
219: (*A->ops->destroy)(A);
220: return(0);
221: }
225: int MatSolve_DSCPACK(Mat A,Vec b,Vec x) {
226: Mat_DSC *lu= (Mat_DSC*)A->spptr;
227: int ierr;
228: RealNumberType *solution_vec,*rhs_vec;
231: /* scatter b into seq vec_dsc */
232: if ( !lu->scat ) {
233: VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
234: }
235: VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
236: VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
238: if (lu->dsc_id != -1){
239: VecGetArray(lu->vec_dsc,&rhs_vec);
240: DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
241: VecRestoreArray(lu->vec_dsc,&rhs_vec);
242:
243: DSC_Solve(lu->My_DSC_Solver);
244: if (ierr != DSC_NO_ERROR) {
245: DSC_ErrorDisplay(lu->My_DSC_Solver);
246: SETERRQ(1,"Error in calling DSC_Solve");
247: }
249: /* get the permuted local solution */
250: VecGetArray(lu->vec_dsc,&solution_vec);
251: DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
252: VecRestoreArray(lu->vec_dsc,&solution_vec);
254: } /* end of if (lu->dsc_id != -1) */
256: /* put permuted local solution solution_vec into x in the original order */
257: VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
258: VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
260: return(0);
261: }
265: int MatCholeskyFactorNumeric_DSCPACK(Mat A,Mat *F) {
266: Mat_SeqBAIJ *a_seq;
267: Mat_DSC *lu=(Mat_DSC*)(*F)->spptr;
268: Mat *tseq,A_seq=0;
269: RealNumberType *my_a_nonz;
270: int ierr, M=A->M, Mbs=M/lu->bs, size,
271: max_mem_estimate, max_single_malloc_blk,
272: number_of_procs,i,j,next,iold,
273: *idx,*iidx=0,*itmp;
274: IS my_cols_sorted;
275:
277: MPI_Comm_size(A->comm,&size);
278:
279: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
281: /* convert A to A_seq */
282: if (size > 1) {
283: ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
284: MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
285:
286: A_seq = *tseq;
287: PetscFree(tseq);
288: a_seq = (Mat_SeqBAIJ*)A_seq->data;
289: } else {
290: a_seq = (Mat_SeqBAIJ*)A->data;
291: }
292:
293: PetscMalloc(Mbs*sizeof(int),&lu->replication);
294: for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;
296: number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
297:
298: i = size;
299: if ( number_of_procs < i ) i = number_of_procs;
300: number_of_procs = 1;
301: while ( i > 1 ){
302: number_of_procs *= 2; i /= 2;
303: }
305: /* DSC_Solver starts */
306: DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, lu->comm_dsc );
308: if (lu->dsc_id != -1) {
309: DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
310: &M,&lu->num_local_strucs,
311: &lu->num_local_cols, &lu->num_local_nonz, &lu->global_struc_new_col_num,
312: &lu->global_struc_new_num, &lu->global_struc_owner,
313: &lu->local_struc_old_num);
314: if (ierr != DSC_NO_ERROR) {
315: DSC_ErrorDisplay(lu->My_DSC_Solver);
316: SETERRQ(1,"Error when use DSC_Order()");
317: }
319: DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
320: lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
321: if (ierr != DSC_NO_ERROR) {
322: DSC_ErrorDisplay(lu->My_DSC_Solver);
323: SETERRQ(1,"Error when use DSC_Order");
324: }
326: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
327: lu->num_local_strucs, lu->num_local_nonz,
328: lu->global_struc_new_col_num,
329: lu->local_struc_old_num,
330: PETSC_NULL,
331: &my_a_nonz);
332: if (ierr <0) {
333: DSC_ErrorDisplay(lu->My_DSC_Solver);
334: SETERRQ1(1,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
335: }
337: /* get local_cols_old_num and IS my_cols to be used later */
338: PetscMalloc(lu->num_local_cols*sizeof(int),&lu->local_cols_old_num);
339: for (next = 0, i=0; i<lu->num_local_strucs; i++){
340: iold = lu->bs*lu->local_struc_old_num[i];
341: for (j=0; j<lu->bs; j++)
342: lu->local_cols_old_num[next++] = iold++;
343: }
344: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
345:
346: } else { /* lu->dsc_id == -1 */
347: lu->num_local_cols = 0;
348: lu->local_cols_old_num = 0;
349: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
350: }
351: /* generate vec_dsc and iden_dsc to be used later */
352: VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
353: ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
354: lu->scat = PETSC_NULL;
356: if ( size>1 ) {MatDestroy(A_seq); }
358: } else { /* use previously computed symbolic factor */
359: /* convert A to my A_seq */
360: if (size > 1) {
361: if (lu->dsc_id == -1) {
362: itmp = 0;
363: } else {
364: PetscMalloc(2*lu->num_local_strucs*sizeof(int),&idx);
365: iidx = idx + lu->num_local_strucs;
366: PetscMalloc(lu->num_local_cols*sizeof(int),&itmp);
367:
368: isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
369: for (next=0, i=0; i< lu->num_local_strucs; i++) {
370: iold = lu->bs*lu->local_struc_old_num[idx[i]];
371: for (j=0; j<lu->bs; j++){
372: itmp[next++] = iold++; /* sorted local_cols_old_num */
373: }
374: }
375: for (i=0; i< lu->num_local_strucs; i++) {
376: iidx[idx[i]] = i; /* inverse of idx */
377: }
378: } /* end of (lu->dsc_id == -1) */
379: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
380: MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
381: ISDestroy(my_cols_sorted);
382:
383: A_seq = *tseq;
384: PetscFree(tseq);
385:
386: if (lu->dsc_id != -1) {
387: DSC_ReFactorInitialize(lu->My_DSC_Solver);
389: a_seq = (Mat_SeqBAIJ*)A_seq->data;
390: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
391: lu->num_local_strucs, lu->num_local_nonz,
392: lu->global_struc_new_col_num,
393: lu->local_struc_old_num,
394: iidx,
395: &my_a_nonz);
396: if (ierr <0) {
397: DSC_ErrorDisplay(lu->My_DSC_Solver);
398: SETERRQ1(1,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
399: }
400:
401: PetscFree(idx);
402: PetscFree(itmp);
403: } /* end of if(lu->dsc_id != -1) */
404: } else { /* size == 1 */
405: a_seq = (Mat_SeqBAIJ*)A->data;
406:
407: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
408: lu->num_local_strucs, lu->num_local_nonz,
409: lu->global_struc_new_col_num,
410: lu->local_struc_old_num,
411: PETSC_NULL,
412: &my_a_nonz);
413: if (ierr <0) {
414: DSC_ErrorDisplay(lu->My_DSC_Solver);
415: SETERRQ1(1,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
416: }
417: }
418: if ( size>1 ) {MatDestroy(A_seq); }
419: }
420:
421: if (lu->dsc_id != -1) {
422: DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
423: PetscFree(my_a_nonz);
424: }
425:
426: (*F)->assembled = PETSC_TRUE;
427: lu->flg = SAME_NONZERO_PATTERN;
429: return(0);
430: }
432: /* Note the Petsc permutation r is ignored */
435: int MatCholeskyFactorSymbolic_DSCPACK(Mat A,IS r,MatFactorInfo *info,Mat *F) {
436: Mat B;
437: Mat_DSC *lu;
438: int ierr,bs,indx;
439: PetscTruth flg;
440: const char *ftype[]={"LDLT","LLT"},*ltype[]={"LBLAS1","LBLAS2","LBLAS3"},*dtype[]={"DBLAS1","DBLAS2"};
444: /* Create the factorization matrix F */
445: MatGetBlockSize(A,&bs);
446: MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);
447: MatSetType(B,A->type_name);
448: MatSeqBAIJSetPreallocation(B,bs,0,PETSC_NULL);
449: MatMPIBAIJSetPreallocation(B,bs,0,PETSC_NULL,0,PETSC_NULL);
450:
451: lu = (Mat_DSC*)B->spptr;
452: lu->bs = bs;
454: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_DSCPACK;
455: B->ops->solve = MatSolve_DSCPACK;
456: B->factor = FACTOR_CHOLESKY;
458: /* Set the default input options */
459: lu->order_code = 2;
460: lu->scheme_code = 1;
461: lu->factor_type = 2;
462: lu->stat = 0; /* do not display stats */
463: lu->LBLASLevel = DSC_LBLAS3;
464: lu->DBLASLevel = DSC_DBLAS2;
465: lu->max_mem_allowed = 256;
466: MPI_Comm_dup(A->comm,&(lu->comm_dsc));
467: /* Get the runtime input options */
468: PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");
470: PetscOptionsInt("-mat_dscpack_order","order_code: \n\
471: 1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", \
472: "None",
473: lu->order_code,&lu->order_code,PETSC_NULL);
475: PetscOptionsInt("-mat_dscpack_scheme","scheme_code: \n\
476: 1 = standard factorization, 2 = factorization + selective inversion", \
477: "None",
478: lu->scheme_code,&lu->scheme_code,PETSC_NULL);
479:
480: PetscOptionsEList("-mat_dscpack_factor","factor_type","None",ftype,2,ftype[0],&indx,&flg);
481: if (flg) {
482: switch (indx) {
483: case 0:
484: lu->factor_type = DSC_LDLT;
485: break;
486: case 1:
487: lu->factor_type = DSC_LLT;
488: break;
489: }
490: }
491: PetscOptionsInt("-mat_dscpack_MaxMemAllowed","","None",
492: lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);
494: PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display, 1 = display",
495: "None", lu->stat,&lu->stat,PETSC_NULL);
496:
497: PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",ltype,3,ltype[2],&indx,&flg);
498: if (flg) {
499: switch (indx) {
500: case 0:
501: lu->LBLASLevel = DSC_LBLAS1;
502: break;
503: case 1:
504: lu->LBLASLevel = DSC_LBLAS2;
505: break;
506: case 2:
507: lu->LBLASLevel = DSC_LBLAS3;
508: break;
509: }
510: }
512: PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",dtype,2,dtype[1],&indx,&flg);
513: if (flg) {
514: switch (indx) {
515: case 0:
516: lu->DBLASLevel = DSC_DBLAS1;
517: break;
518: case 1:
519: lu->DBLASLevel = DSC_DBLAS2;
520: break;
521: }
522: }
524: PetscOptionsEnd();
525:
526: lu->flg = DIFFERENT_NONZERO_PATTERN;
528: lu->My_DSC_Solver = DSC_Begin();
529: lu->CleanUpDSCPACK = PETSC_TRUE;
530: *F = B;
531: return(0);
532: }
536: int MatAssemblyEnd_DSCPACK(Mat A,MatAssemblyType mode) {
537: int ierr;
538: Mat_DSC *lu=(Mat_DSC*)A->spptr;
541: (*lu->MatAssemblyEnd)(A,mode);
542: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
543: A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
544: return(0);
545: }
549: int MatFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
550: {
551: Mat_DSC *lu=(Mat_DSC*)A->spptr;
552: int ierr;
553: char *s=0;
554:
556: PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:\n");
558: switch (lu->order_code) {
559: case 1: s = "ND"; break;
560: case 2: s = "Hybrid with Minimum Degree"; break;
561: case 3: s = "Hybrid with Minimum Deficiency"; break;
562: }
563: PetscViewerASCIIPrintf(viewer," order_code: %s \n",s);
565: switch (lu->scheme_code) {
566: case 1: s = "standard factorization"; break;
567: case 2: s = "factorization + selective inversion"; break;
568: }
569: PetscViewerASCIIPrintf(viewer," scheme_code: %s \n",s);
571: switch (lu->stat) {
572: case 0: s = "NO"; break;
573: case 1: s = "YES"; break;
574: }
575: PetscViewerASCIIPrintf(viewer," display stats: %s \n",s);
576:
577: if ( lu->factor_type == DSC_LLT) {
578: s = "LLT";
579: } else if ( lu->factor_type == DSC_LDLT){
580: s = "LDLT";
581: } else {
582: SETERRQ(1,"Unknown factor type");
583: }
584: PetscViewerASCIIPrintf(viewer," factor type: %s \n",s);
586: if ( lu->LBLASLevel == DSC_LBLAS1) {
587: s = "BLAS1";
588: } else if ( lu->LBLASLevel == DSC_LBLAS2){
589: s = "BLAS2";
590: } else if ( lu->LBLASLevel == DSC_LBLAS3){
591: s = "BLAS3";
592: } else {
593: SETERRQ(1,"Unknown local phase BLAS level");
594: }
595: PetscViewerASCIIPrintf(viewer," local phase BLAS level: %s \n",s);
596:
597: if ( lu->DBLASLevel == DSC_DBLAS1) {
598: s = "BLAS1";
599: } else if ( lu->DBLASLevel == DSC_DBLAS2){
600: s = "BLAS2";
601: } else {
602: SETERRQ(1,"Unknown distributed phase BLAS level");
603: }
604: PetscViewerASCIIPrintf(viewer," distributed phase BLAS level: %s \n",s);
605: return(0);
606: }
610: int MatView_DSCPACK(Mat A,PetscViewer viewer) {
611: int ierr,size;
612: PetscTruth isascii;
613: PetscViewerFormat format;
614: Mat_DSC *lu=(Mat_DSC*)A->spptr;
617: /* This convertion ugliness is because MatView for BAIJ types calls MatConvert to AIJ */
618: size = lu->size;
619: if (size==1) {
620: MatConvert(A,MATSEQBAIJ,&A);
621: } else {
622: MatConvert(A,MATMPIBAIJ,&A);
623: }
625: MatView(A,viewer);
627: MatConvert(A,MATDSCPACK,&A);
629: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
630: if (isascii) {
631: PetscViewerGetFormat(viewer,&format);
632: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
633: MatFactorInfo_DSCPACK(A,viewer);
634: }
635: }
636: return(0);
637: }
639: EXTERN_C_BEGIN
642: int MatMPIBAIJSetPreallocation_MPIDSCPACK(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
643: {
644: Mat A;
645: Mat_DSC *lu = (Mat_DSC*)B->spptr;
646: int ierr;
649: /*
650: After performing the MPIBAIJ Preallocation, we need to convert the local diagonal block matrix
651: into DSCPACK type so that the block jacobi preconditioner (for example) can use DSCPACK. I would
652: like this to be done in the MatCreate routine, but the creation of this inner matrix requires
653: block size info so that PETSc can determine the local size properly. The block size info is set
654: in the preallocation routine.
655: */
656: (*lu->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
657: A = ((Mat_MPIBAIJ *)B->data)->A;
658: MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
659: return(0);
660: }
661: EXTERN_C_END
663: EXTERN_C_BEGIN
666: int MatConvert_Base_DSCPACK(Mat A,const MatType type,Mat *newmat) {
667: /* This routine is only called to convert to MATDSCPACK */
668: /* from MATSEQBAIJ if A has a single process communicator */
669: /* or MATMPIBAIJ otherwise, so we will ignore 'MatType type'. */
670: int ierr;
671: MPI_Comm comm;
672: Mat B=*newmat;
673: Mat_DSC *lu;
674: void (*f)(void);
677: if (B != A) {
678: MatDuplicate(A,MAT_COPY_VALUES,&B);
679: }
681: PetscObjectGetComm((PetscObject)A,&comm);
682: PetscNew(Mat_DSC,&lu);
684: lu->MatDuplicate = A->ops->duplicate;
685: lu->MatView = A->ops->view;
686: lu->MatAssemblyEnd = A->ops->assemblyend;
687: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
688: lu->MatDestroy = A->ops->destroy;
689: lu->CleanUpDSCPACK = PETSC_FALSE;
691: B->spptr = (void*)lu;
692: B->ops->duplicate = MatDuplicate_DSCPACK;
693: B->ops->view = MatView_DSCPACK;
694: B->ops->assemblyend = MatAssemblyEnd_DSCPACK;
695: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
696: B->ops->destroy = MatDestroy_DSCPACK;
698: MPI_Comm_size(comm,&(lu->size));
699: if (lu->size == 1) {
700: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_dscpack_C",
701: "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
702: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_seqbaij_C",
703: "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
704: } else {
705: /* I really don't like needing to know the tag: MatMPIBAIJSetPreallocation_C */
706: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
707: if (f) {
708: lu->MatPreallocate = (int (*)(Mat,int,int,int*,int,int*))f;
709: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
710: "MatMPIBAIJSetPreallocation_MPIDSCPACK",
711: MatMPIBAIJSetPreallocation_MPIDSCPACK);
712: }
713: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_dscpack_C",
714: "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
715: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_mpibaij_C",
716: "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
717: }
718: PetscObjectChangeTypeName((PetscObject)B,MATDSCPACK);
719: *newmat = B;
720: return(0);
721: }
722: EXTERN_C_END
726: int MatDuplicate_DSCPACK(Mat A, MatDuplicateOption op, Mat *M) {
727: int ierr;
728: Mat_DSC *lu=(Mat_DSC *)A->spptr;
731: (*lu->MatDuplicate)(A,op,M);
732: PetscMemcpy((*M)->spptr,lu,sizeof(Mat_DSC));
733: return(0);
734: }
736: /*MC
737: MATDSCPACK - MATDSCPACK = "dscpack" - A matrix type providing direct solvers (Cholesky) for sequential
738: or distributed matrices via the external package DSCPACK.
740: If DSCPACK is installed (see the manual for
741: instructions on how to declare the existence of external packages),
742: a matrix type can be constructed which invokes DSCPACK solvers.
743: After calling MatCreate(...,A), simply call MatSetType(A,MATDSCPACK).
744: This matrix type is only supported for double precision real.
746: This matrix inherits from MATSEQBAIJ if constructed with a single process communicator,
747: and from MATMPIBAIJ otherwise. As a result, for sequential matrices, MatSeqBAIJSetPreallocation is
748: supported, and similarly MatMPIBAIJSetPreallocation is supported for distributed matrices. It is
749: recommended that you call both of the above preallocation routines for simplicity. Also,
750: MatConvert can be called to perform inplace conversion to and from MATSEQBAIJ or MATMPIBAIJ
751: for sequential or distributed matrices respectively.
753: Options Database Keys:
754: + -mat_type dscpack - sets the matrix type to dscpack during a call to MatSetFromOptions()
755: . -mat_dscpack_order <1,2,3> - DSCPACK ordering, 1:ND, 2:Hybrid with Minimum Degree, 3:Hybrid with Minimum Deficiency
756: . -mat_dscpack_scheme <1,2> - factorization scheme, 1:standard factorization, 2: factorization with selective inversion
757: . -mat_dscpack_factor <LLT,LDLT> - the type of factorization to be performed.
758: . -mat_dscpack_MaxMemAllowed <n> - the maximum memory to be used during factorization
759: . -mat_dscpack_stats <0,1> - display stats of the factorization and solves during MatDestroy(), 0: no display, 1: display
760: . -mat_dscpack_LBLAS <LBLAS1,LBLAS2,LBLAS3> - BLAS level used in the local phase
761: - -mat_dscpack_DBLAS <DBLAS1,DBLAS2> - BLAS level used in the distributed phase
763: Level: beginner
765: .seealso: PCCHOLESKY
766: M*/
768: EXTERN_C_BEGIN
771: int MatCreate_DSCPACK(Mat A) {
772: int ierr,size;
775: /* Change type name before calling MatSetType to force proper construction of SeqBAIJ or MPIBAIJ */
776: /* and DSCPACK types */
777: PetscObjectChangeTypeName((PetscObject)A,MATDSCPACK);
778: MPI_Comm_size(A->comm,&size);
779: if (size == 1) {
780: MatSetType(A,MATSEQBAIJ);
781: } else {
782: MatSetType(A,MATMPIBAIJ);
783: }
784: MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
785: return(0);
786: }
787: EXTERN_C_END