Actual source code: aijfact.c
1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
9: int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
10: {
13: SETERRQ(PETSC_ERR_SUP,"Code not written");
14: #if !defined(PETSC_USE_DEBUG)
15: return(0);
16: #endif
17: }
20: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
21: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
23: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
24: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
25: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
29: /* ------------------------------------------------------------
31: This interface was contribed by Tony Caola
33: This routine is an interface to the pivoting drop-tolerance
34: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35: SPARSEKIT2.
37: The SPARSEKIT2 routines used here are covered by the GNU
38: copyright; see the file gnu in this directory.
40: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41: help in getting this routine ironed out.
43: The major drawback to this routine is that if info->fill is
44: not large enough it fails rather than allocating more space;
45: this can be fixed by hacking/improving the f2c version of
46: Yousef Saad's code.
48: ------------------------------------------------------------
49: */
50: int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
51: {
52: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54: SETERRQ(1,"This distribution does not include GNU Copyright code\n\
55: You can obtain the drop tolerance routines by installing PETSc from\n\
56: www.mcs.anl.gov/petsc\n");
57: #else
58: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
59: IS iscolf,isicol,isirow;
60: PetscTruth reorder;
61: int *c,*r,*ic,ierr,i,n = A->m;
62: int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63: int *ordcol,*iwk,*iperm,*jw;
64: int jmax,lfill,job,*o_i,*o_j;
65: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66: PetscReal permtol,af;
70: if (info->dt == PETSC_DEFAULT) info->dt = .005;
71: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
72: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
73: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
74: lfill = (int)(info->dtcount/2.0);
75: jmax = (int)(info->fill*a->nz);
76: permtol = info->dtcol;
79: /* ------------------------------------------------------------
80: If reorder=.TRUE., then the original matrix has to be
81: reordered to reflect the user selected ordering scheme, and
82: then de-reordered so it is in it's original format.
83: Because Saad's dperm() is NOT in place, we have to copy
84: the original matrix and allocate more storage. . .
85: ------------------------------------------------------------
86: */
88: /* set reorder to true if either isrow or iscol is not identity */
89: ISIdentity(isrow,&reorder);
90: if (reorder) {ISIdentity(iscol,&reorder);}
91: reorder = PetscNot(reorder);
93:
94: /* storage for ilu factor */
95: PetscMalloc((n+1)*sizeof(int),&new_i);
96: PetscMalloc(jmax*sizeof(int),&new_j);
97: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
98: PetscMalloc(n*sizeof(int),&ordcol);
100: /* ------------------------------------------------------------
101: Make sure that everything is Fortran formatted (1-Based)
102: ------------------------------------------------------------
103: */
104: for (i=old_i[0];i<old_i[n];i++) {
105: old_j[i]++;
106: }
107: for(i=0;i<n+1;i++) {
108: old_i[i]++;
109: };
110:
112: if (reorder) {
113: ISGetIndices(iscol,&c);
114: ISGetIndices(isrow,&r);
115: for(i=0;i<n;i++) {
116: r[i] = r[i]+1;
117: c[i] = c[i]+1;
118: }
119: PetscMalloc((n+1)*sizeof(int),&old_i2);
120: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
122: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123: for (i=0;i<n;i++) {
124: r[i] = r[i]-1;
125: c[i] = c[i]-1;
126: }
127: ISRestoreIndices(iscol,&c);
128: ISRestoreIndices(isrow,&r);
129: o_a = old_a2;
130: o_j = old_j2;
131: o_i = old_i2;
132: } else {
133: o_a = old_a;
134: o_j = old_j;
135: o_i = old_i;
136: }
138: /* ------------------------------------------------------------
139: Call Saad's ilutp() routine to generate the factorization
140: ------------------------------------------------------------
141: */
143: PetscMalloc(2*n*sizeof(int),&iperm);
144: PetscMalloc(2*n*sizeof(int),&jw);
145: PetscMalloc(n*sizeof(PetscScalar),&w);
147: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
148: if (ierr) {
149: switch (ierr) {
150: case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
151: case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
152: case -5: SETERRQ(1,"ilutp(), zero row encountered");
153: case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
154: case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
155: default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
156: }
157: }
159: PetscFree(w);
160: PetscFree(jw);
162: /* ------------------------------------------------------------
163: Saad's routine gives the result in Modified Sparse Row (msr)
164: Convert to Compressed Sparse Row format (csr)
165: ------------------------------------------------------------
166: */
168: PetscMalloc(n*sizeof(PetscScalar),&wk);
169: PetscMalloc((n+1)*sizeof(int),&iwk);
171: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173: PetscFree(iwk);
174: PetscFree(wk);
176: if (reorder) {
177: PetscFree(old_a2);
178: PetscFree(old_j2);
179: PetscFree(old_i2);
180: } else {
181: /* fix permutation of old_j that the factorization introduced */
182: for (i=old_i[0]; i<old_i[n]; i++) {
183: old_j[i-1] = iperm[old_j[i-1]-1];
184: }
185: }
187: /* get rid of the shift to indices starting at 1 */
188: for (i=0; i<n+1; i++) {
189: old_i[i]--;
190: }
191: for (i=old_i[0];i<old_i[n];i++) {
192: old_j[i]--;
193: }
194:
195: /* Make the factored matrix 0-based */
196: for (i=0; i<n+1; i++) {
197: new_i[i]--;
198: }
199: for (i=new_i[0];i<new_i[n];i++) {
200: new_j[i]--;
201: }
203: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204: /*-- permute the right-hand-side and solution vectors --*/
205: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
206: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
207: ISGetIndices(isicol,&ic);
208: for(i=0; i<n; i++) {
209: ordcol[i] = ic[iperm[i]-1];
210: };
211: ISRestoreIndices(isicol,&ic);
212: ISDestroy(isicol);
214: PetscFree(iperm);
216: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
217: PetscFree(ordcol);
219: /*----- put together the new matrix -----*/
221: MatCreate(A->comm,n,n,n,n,fact);
222: MatSetType(*fact,A->type_name);
223: MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);
224: (*fact)->factor = FACTOR_LU;
225: (*fact)->assembled = PETSC_TRUE;
227: b = (Mat_SeqAIJ*)(*fact)->data;
228: PetscFree(b->imax);
229: b->sorted = PETSC_FALSE;
230: b->singlemalloc = PETSC_FALSE;
231: /* the next line frees the default space generated by the MatCreate() */
232: PetscFree(b->a);
233: PetscFree(b->ilen);
234: b->a = new_a;
235: b->j = new_j;
236: b->i = new_i;
237: b->ilen = 0;
238: b->imax = 0;
239: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240: b->row = isirow;
241: b->col = iscolf;
242: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
243: b->maxnz = b->nz = new_i[n];
244: MatMarkDiagonal_SeqAIJ(*fact);
245: (*fact)->info.factor_mallocs = 0;
247: MatMarkDiagonal_SeqAIJ(A);
249: /* check out for identical nodes. If found, use inode functions */
250: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
252: af = ((double)b->nz)/((double)a->nz) + .001;
253: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
254: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
255: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
256: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
258: return(0);
259: #endif
260: }
262: /*
263: Factorization code for AIJ format.
264: */
267: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
268: {
269: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
270: IS isicol;
271: int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
272: int *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273: int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
274: PetscReal f;
277: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
279: ISGetIndices(isrow,&r);
280: ISGetIndices(isicol,&ic);
282: /* get new row pointers */
283: PetscMalloc((n+1)*sizeof(int),&ainew);
284: ainew[0] = 0;
285: /* don't know how many column pointers are needed so estimate */
286: f = info->fill;
287: jmax = (int)(f*ai[n]+1);
288: PetscMalloc((jmax)*sizeof(int),&ajnew);
289: /* fill is a linked list of nonzeros in active row */
290: PetscMalloc((2*n+1)*sizeof(int),&fill);
291: im = fill + n + 1;
292: /* idnew is location of diagonal in factor */
293: PetscMalloc((n+1)*sizeof(int),&idnew);
294: idnew[0] = 0;
296: for (i=0; i<n; i++) {
297: /* first copy previous fill into linked list */
298: nnz = nz = ai[r[i]+1] - ai[r[i]];
299: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300: ajtmp = aj + ai[r[i]];
301: fill[n] = n;
302: while (nz--) {
303: fm = n;
304: idx = ic[*ajtmp++];
305: do {
306: m = fm;
307: fm = fill[m];
308: } while (fm < idx);
309: fill[m] = idx;
310: fill[idx] = fm;
311: }
312: row = fill[n];
313: while (row < i) {
314: ajtmp = ajnew + idnew[row] + 1;
315: nzbd = 1 + idnew[row] - ainew[row];
316: nz = im[row] - nzbd;
317: fm = row;
318: while (nz-- > 0) {
319: idx = *ajtmp++ ;
320: nzbd++;
321: if (idx == i) im[row] = nzbd;
322: do {
323: m = fm;
324: fm = fill[m];
325: } while (fm < idx);
326: if (fm != idx) {
327: fill[m] = idx;
328: fill[idx] = fm;
329: fm = idx;
330: nnz++;
331: }
332: }
333: row = fill[row];
334: }
335: /* copy new filled row into permanent storage */
336: ainew[i+1] = ainew[i] + nnz;
337: if (ainew[i+1] > jmax) {
339: /* estimate how much additional space we will need */
340: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341: /* just double the memory each time */
342: int maxadd = jmax;
343: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345: jmax += maxadd;
347: /* allocate a longer ajnew */
348: PetscMalloc(jmax*sizeof(int),&ajtmp);
349: PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));
350: PetscFree(ajnew);
351: ajnew = ajtmp;
352: realloc++; /* count how many times we realloc */
353: }
354: ajtmp = ajnew + ainew[i];
355: fm = fill[n];
356: nzi = 0;
357: im[i] = nnz;
358: while (nnz--) {
359: if (fm < i) nzi++;
360: *ajtmp++ = fm ;
361: fm = fill[fm];
362: }
363: idnew[i] = ainew[i] + nzi;
364: }
365: if (ai[n] != 0) {
366: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
368: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371: } else {
372: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373: }
375: ISRestoreIndices(isrow,&r);
376: ISRestoreIndices(isicol,&ic);
378: PetscFree(fill);
380: /* put together the new matrix */
381: MatCreate(A->comm,n,n,n,n,B);
382: MatSetType(*B,A->type_name);
383: MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);
384: PetscLogObjectParent(*B,isicol);
385: b = (Mat_SeqAIJ*)(*B)->data;
386: PetscFree(b->imax);
387: b->singlemalloc = PETSC_FALSE;
388: /* the next line frees the default space generated by the Create() */
389: PetscFree(b->a);
390: PetscFree(b->ilen);
391: PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);
392: b->j = ajnew;
393: b->i = ainew;
394: b->diag = idnew;
395: b->ilen = 0;
396: b->imax = 0;
397: b->row = isrow;
398: b->col = iscol;
399: b->lu_damping = info->damping;
400: b->lu_zeropivot = info->zeropivot;
401: b->lu_shift = info->shift;
402: b->lu_shift_fraction = info->shift_fraction;
403: PetscObjectReference((PetscObject)isrow);
404: PetscObjectReference((PetscObject)iscol);
405: b->icol = isicol;
406: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
407: /* In b structure: Free imax, ilen, old a, old j.
408: Allocate idnew, solve_work, new a, new j */
409: PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar)));
410: b->maxnz = b->nz = ainew[n] ;
412: (*B)->factor = FACTOR_LU;
413: (*B)->info.factor_mallocs = realloc;
414: (*B)->info.fill_ratio_given = f;
415: Mat_AIJ_CheckInode(*B,PETSC_FALSE);
416: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
418: if (ai[n] != 0) {
419: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420: } else {
421: (*B)->info.fill_ratio_needed = 0.0;
422: }
423: return(0);
424: }
425: /* ----------------------------------------------------------- */
426: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
430: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431: {
432: Mat C = *B;
433: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
434: IS isrow = b->row,isicol = b->icol;
435: int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
436: int *ajtmpold,*ajtmp,nz,row,*ics;
437: int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
438: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
439: PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
440: PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
441: PetscTruth damp,lushift;
444: ISGetIndices(isrow,&r);
445: ISGetIndices(isicol,&ic);
446: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
447: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
448: rtmps = rtmp; ics = ic;
450: if (!a->diag) {
451: MatMarkDiagonal_SeqAIJ(A);
452: }
454: if (b->lu_shift) { /* set max shift */
455: int *aai = a->i,*ddiag = a->diag;
456: shift_top = 0;
457: for (i=0; i<n; i++) {
458: d = PetscAbsScalar((a->a)[ddiag[i]]);
459: /* calculate amt of shift needed for this row */
460: if (d<=0) {
461: row_shift = 0;
462: } else {
463: row_shift = -2*d;
464: }
465: v = a->a+aai[i];
466: for (j=0; j<aai[i+1]-aai[i]; j++)
467: row_shift += PetscAbsScalar(v[j]);
468: if (row_shift>shift_top) shift_top = row_shift;
469: }
470: }
472: shift_fraction = 0; shift_amount = 0;
473: do {
474: damp = PETSC_FALSE;
475: lushift = PETSC_FALSE;
476: for (i=0; i<n; i++) {
477: nz = ai[i+1] - ai[i];
478: ajtmp = aj + ai[i];
479: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
481: /* load in initial (unfactored row) */
482: nz = a->i[r[i]+1] - a->i[r[i]];
483: ajtmpold = a->j + a->i[r[i]];
484: v = a->a + a->i[r[i]];
485: for (j=0; j<nz; j++) {
486: rtmp[ics[ajtmpold[j]]] = v[j];
487: }
488: rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
490: row = *ajtmp++ ;
491: while (row < i) {
492: pc = rtmp + row;
493: if (*pc != 0.0) {
494: pv = b->a + diag_offset[row] ;
495: pj = b->j + diag_offset[row] + 1;
496: multiplier = *pc / *pv++;
497: *pc = multiplier;
498: nz = ai[row+1] - diag_offset[row] - 1;
499: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
500: PetscLogFlops(2*nz);
501: }
502: row = *ajtmp++;
503: }
504: /* finished row so stick it into b->a */
505: pv = b->a + ai[i] ;
506: pj = b->j + ai[i] ;
507: nz = ai[i+1] - ai[i];
508: diag = diag_offset[i] - ai[i];
509: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
510: rs = 0.0;
511: for (j=0; j<nz; j++) {
512: pv[j] = rtmps[pj[j]];
513: if (j != diag) rs += PetscAbsScalar(pv[j]);
514: }
515: #define MAX_NSHIFT 5
516: if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
517: if (nshift>MAX_NSHIFT) {
518: SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner");
519: } else if (nshift==MAX_NSHIFT) {
520: shift_fraction = shift_hi;
521: lushift = PETSC_FALSE;
522: } else {
523: shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
524: lushift = PETSC_TRUE;
525: }
526: shift_amount = shift_fraction * shift_top;
527: nshift++;
528: break;
529: }
530: if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
531: if (damping) {
532: if (ndamp) damping *= 2.0;
533: damp = PETSC_TRUE;
534: ndamp++;
535: break;
536: } else {
537: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
538: }
539: }
540: }
541: if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
542: /*
543: * if no shift in this attempt & shifting & started shifting & can refine,
544: * then try lower shift
545: */
546: shift_hi = shift_fraction;
547: shift_fraction = (shift_hi+shift_lo)/2.;
548: shift_amount = shift_fraction * shift_top;
549: lushift = PETSC_TRUE;
550: nshift++;
551: }
552: } while (damp || lushift);
554: /* invert diagonal entries for simplier triangular solves */
555: for (i=0; i<n; i++) {
556: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
557: }
559: PetscFree(rtmp);
560: ISRestoreIndices(isicol,&ic);
561: ISRestoreIndices(isrow,&r);
562: C->factor = FACTOR_LU;
563: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
564: C->assembled = PETSC_TRUE;
565: PetscLogFlops(C->n);
566: if (ndamp) {
567: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping);
568: }
569: if (nshift) {
570: b->lu_shift_fraction = shift_fraction;
571: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift);
572: }
573: return(0);
574: }
578: int MatUsePETSc_SeqAIJ(Mat A)
579: {
581: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
582: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
583: return(0);
584: }
587: /* ----------------------------------------------------------- */
590: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
591: {
593: Mat C;
596: MatLUFactorSymbolic(A,row,col,info,&C);
597: MatLUFactorNumeric(A,&C);
598: MatHeaderCopy(A,C);
599: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
600: return(0);
601: }
602: /* ----------------------------------------------------------- */
605: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
606: {
607: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
608: IS iscol = a->col,isrow = a->row;
609: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
610: int nz,*rout,*cout;
611: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
614: if (!n) return(0);
616: VecGetArray(bb,&b);
617: VecGetArray(xx,&x);
618: tmp = a->solve_work;
620: ISGetIndices(isrow,&rout); r = rout;
621: ISGetIndices(iscol,&cout); c = cout + (n-1);
623: /* forward solve the lower triangular */
624: tmp[0] = b[*r++];
625: tmps = tmp;
626: for (i=1; i<n; i++) {
627: v = aa + ai[i] ;
628: vi = aj + ai[i] ;
629: nz = a->diag[i] - ai[i];
630: sum = b[*r++];
631: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
632: tmp[i] = sum;
633: }
635: /* backward solve the upper triangular */
636: for (i=n-1; i>=0; i--){
637: v = aa + a->diag[i] + 1;
638: vi = aj + a->diag[i] + 1;
639: nz = ai[i+1] - a->diag[i] - 1;
640: sum = tmp[i];
641: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
642: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
643: }
645: ISRestoreIndices(isrow,&rout);
646: ISRestoreIndices(iscol,&cout);
647: VecRestoreArray(bb,&b);
648: VecRestoreArray(xx,&x);
649: PetscLogFlops(2*a->nz - A->n);
650: return(0);
651: }
653: /* ----------------------------------------------------------- */
656: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
657: {
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
659: int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
660: PetscScalar *x,*b,*aa = a->a;
661: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
662: int adiag_i,i,*vi,nz,ai_i;
663: PetscScalar *v,sum;
664: #endif
667: if (!n) return(0);
669: VecGetArray(bb,&b);
670: VecGetArray(xx,&x);
672: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
673: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
674: #else
675: /* forward solve the lower triangular */
676: x[0] = b[0];
677: for (i=1; i<n; i++) {
678: ai_i = ai[i];
679: v = aa + ai_i;
680: vi = aj + ai_i;
681: nz = adiag[i] - ai_i;
682: sum = b[i];
683: while (nz--) sum -= *v++ * x[*vi++];
684: x[i] = sum;
685: }
687: /* backward solve the upper triangular */
688: for (i=n-1; i>=0; i--){
689: adiag_i = adiag[i];
690: v = aa + adiag_i + 1;
691: vi = aj + adiag_i + 1;
692: nz = ai[i+1] - adiag_i - 1;
693: sum = x[i];
694: while (nz--) sum -= *v++ * x[*vi++];
695: x[i] = sum*aa[adiag_i];
696: }
697: #endif
698: PetscLogFlops(2*a->nz - A->n);
699: VecRestoreArray(bb,&b);
700: VecRestoreArray(xx,&x);
701: return(0);
702: }
706: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
707: {
708: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
709: IS iscol = a->col,isrow = a->row;
710: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
711: int nz,*rout,*cout;
712: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
715: if (yy != xx) {VecCopy(yy,xx);}
717: VecGetArray(bb,&b);
718: VecGetArray(xx,&x);
719: tmp = a->solve_work;
721: ISGetIndices(isrow,&rout); r = rout;
722: ISGetIndices(iscol,&cout); c = cout + (n-1);
724: /* forward solve the lower triangular */
725: tmp[0] = b[*r++];
726: for (i=1; i<n; i++) {
727: v = aa + ai[i] ;
728: vi = aj + ai[i] ;
729: nz = a->diag[i] - ai[i];
730: sum = b[*r++];
731: while (nz--) sum -= *v++ * tmp[*vi++ ];
732: tmp[i] = sum;
733: }
735: /* backward solve the upper triangular */
736: for (i=n-1; i>=0; i--){
737: v = aa + a->diag[i] + 1;
738: vi = aj + a->diag[i] + 1;
739: nz = ai[i+1] - a->diag[i] - 1;
740: sum = tmp[i];
741: while (nz--) sum -= *v++ * tmp[*vi++ ];
742: tmp[i] = sum*aa[a->diag[i]];
743: x[*c--] += tmp[i];
744: }
746: ISRestoreIndices(isrow,&rout);
747: ISRestoreIndices(iscol,&cout);
748: VecRestoreArray(bb,&b);
749: VecRestoreArray(xx,&x);
750: PetscLogFlops(2*a->nz);
752: return(0);
753: }
754: /* -------------------------------------------------------------------*/
757: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
758: {
759: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
760: IS iscol = a->col,isrow = a->row;
761: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
762: int nz,*rout,*cout,*diag = a->diag;
763: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
766: VecGetArray(bb,&b);
767: VecGetArray(xx,&x);
768: tmp = a->solve_work;
770: ISGetIndices(isrow,&rout); r = rout;
771: ISGetIndices(iscol,&cout); c = cout;
773: /* copy the b into temp work space according to permutation */
774: for (i=0; i<n; i++) tmp[i] = b[c[i]];
776: /* forward solve the U^T */
777: for (i=0; i<n; i++) {
778: v = aa + diag[i] ;
779: vi = aj + diag[i] + 1;
780: nz = ai[i+1] - diag[i] - 1;
781: s1 = tmp[i];
782: s1 *= (*v++); /* multiply by inverse of diagonal entry */
783: while (nz--) {
784: tmp[*vi++ ] -= (*v++)*s1;
785: }
786: tmp[i] = s1;
787: }
789: /* backward solve the L^T */
790: for (i=n-1; i>=0; i--){
791: v = aa + diag[i] - 1 ;
792: vi = aj + diag[i] - 1 ;
793: nz = diag[i] - ai[i];
794: s1 = tmp[i];
795: while (nz--) {
796: tmp[*vi-- ] -= (*v--)*s1;
797: }
798: }
800: /* copy tmp into x according to permutation */
801: for (i=0; i<n; i++) x[r[i]] = tmp[i];
803: ISRestoreIndices(isrow,&rout);
804: ISRestoreIndices(iscol,&cout);
805: VecRestoreArray(bb,&b);
806: VecRestoreArray(xx,&x);
808: PetscLogFlops(2*a->nz-A->n);
809: return(0);
810: }
814: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
815: {
816: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
817: IS iscol = a->col,isrow = a->row;
818: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
819: int nz,*rout,*cout,*diag = a->diag;
820: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
823: if (zz != xx) VecCopy(zz,xx);
825: VecGetArray(bb,&b);
826: VecGetArray(xx,&x);
827: tmp = a->solve_work;
829: ISGetIndices(isrow,&rout); r = rout;
830: ISGetIndices(iscol,&cout); c = cout;
832: /* copy the b into temp work space according to permutation */
833: for (i=0; i<n; i++) tmp[i] = b[c[i]];
835: /* forward solve the U^T */
836: for (i=0; i<n; i++) {
837: v = aa + diag[i] ;
838: vi = aj + diag[i] + 1;
839: nz = ai[i+1] - diag[i] - 1;
840: tmp[i] *= *v++;
841: while (nz--) {
842: tmp[*vi++ ] -= (*v++)*tmp[i];
843: }
844: }
846: /* backward solve the L^T */
847: for (i=n-1; i>=0; i--){
848: v = aa + diag[i] - 1 ;
849: vi = aj + diag[i] - 1 ;
850: nz = diag[i] - ai[i];
851: while (nz--) {
852: tmp[*vi-- ] -= (*v--)*tmp[i];
853: }
854: }
856: /* copy tmp into x according to permutation */
857: for (i=0; i<n; i++) x[r[i]] += tmp[i];
859: ISRestoreIndices(isrow,&rout);
860: ISRestoreIndices(iscol,&cout);
861: VecRestoreArray(bb,&b);
862: VecRestoreArray(xx,&x);
864: PetscLogFlops(2*a->nz);
865: return(0);
866: }
867: /* ----------------------------------------------------------------*/
868: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
872: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
873: {
874: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
875: IS isicol;
876: int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
877: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
878: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
879: int incrlev,nnz,i,levels,diagonal_fill;
880: PetscTruth col_identity,row_identity;
881: PetscReal f;
882:
884: f = info->fill;
885: levels = (int)info->levels;
886: diagonal_fill = (int)info->diagonal_fill;
887: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
889: /* special case that simply copies fill pattern */
890: ISIdentity(isrow,&row_identity);
891: ISIdentity(iscol,&col_identity);
892: if (!levels && row_identity && col_identity) {
893: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
894: (*fact)->factor = FACTOR_LU;
895: b = (Mat_SeqAIJ*)(*fact)->data;
896: if (!b->diag) {
897: MatMarkDiagonal_SeqAIJ(*fact);
898: }
899: MatMissingDiagonal_SeqAIJ(*fact);
900: b->row = isrow;
901: b->col = iscol;
902: b->icol = isicol;
903: b->lu_damping = info->damping;
904: b->lu_zeropivot = info->zeropivot;
905: b->lu_shift = info->shift;
906: b->lu_shift_fraction= info->shift_fraction;
907: PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
908: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
909: PetscObjectReference((PetscObject)isrow);
910: PetscObjectReference((PetscObject)iscol);
911: return(0);
912: }
914: ISGetIndices(isrow,&r);
915: ISGetIndices(isicol,&ic);
917: /* get new row pointers */
918: PetscMalloc((n+1)*sizeof(int),&ainew);
919: ainew[0] = 0;
920: /* don't know how many column pointers are needed so estimate */
921: jmax = (int)(f*(ai[n]+1));
922: PetscMalloc((jmax)*sizeof(int),&ajnew);
923: /* ajfill is level of fill for each fill entry */
924: PetscMalloc((jmax)*sizeof(int),&ajfill);
925: /* fill is a linked list of nonzeros in active row */
926: PetscMalloc((n+1)*sizeof(int),&fill);
927: /* im is level for each filled value */
928: PetscMalloc((n+1)*sizeof(int),&im);
929: /* dloc is location of diagonal in factor */
930: PetscMalloc((n+1)*sizeof(int),&dloc);
931: dloc[0] = 0;
932: for (prow=0; prow<n; prow++) {
934: /* copy current row into linked list */
935: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
936: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
937: xi = aj + ai[r[prow]] ;
938: fill[n] = n;
939: fill[prow] = -1; /* marker to indicate if diagonal exists */
940: while (nz--) {
941: fm = n;
942: idx = ic[*xi++ ];
943: do {
944: m = fm;
945: fm = fill[m];
946: } while (fm < idx);
947: fill[m] = idx;
948: fill[idx] = fm;
949: im[idx] = 0;
950: }
952: /* make sure diagonal entry is included */
953: if (diagonal_fill && fill[prow] == -1) {
954: fm = n;
955: while (fill[fm] < prow) fm = fill[fm];
956: fill[prow] = fill[fm]; /* insert diagonal into linked list */
957: fill[fm] = prow;
958: im[prow] = 0;
959: nzf++;
960: dcount++;
961: }
963: nzi = 0;
964: row = fill[n];
965: while (row < prow) {
966: incrlev = im[row] + 1;
967: nz = dloc[row];
968: xi = ajnew + ainew[row] + nz + 1;
969: flev = ajfill + ainew[row] + nz + 1;
970: nnz = ainew[row+1] - ainew[row] - nz - 1;
971: fm = row;
972: while (nnz-- > 0) {
973: idx = *xi++ ;
974: if (*flev + incrlev > levels) {
975: flev++;
976: continue;
977: }
978: do {
979: m = fm;
980: fm = fill[m];
981: } while (fm < idx);
982: if (fm != idx) {
983: im[idx] = *flev + incrlev;
984: fill[m] = idx;
985: fill[idx] = fm;
986: fm = idx;
987: nzf++;
988: } else {
989: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
990: }
991: flev++;
992: }
993: row = fill[row];
994: nzi++;
995: }
996: /* copy new filled row into permanent storage */
997: ainew[prow+1] = ainew[prow] + nzf;
998: if (ainew[prow+1] > jmax) {
1000: /* estimate how much additional space we will need */
1001: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1002: /* just double the memory each time */
1003: /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1004: int maxadd = jmax;
1005: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1006: jmax += maxadd;
1008: /* allocate a longer ajnew and ajfill */
1009: PetscMalloc(jmax*sizeof(int),&xi);
1010: PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));
1011: PetscFree(ajnew);
1012: ajnew = xi;
1013: PetscMalloc(jmax*sizeof(int),&xi);
1014: PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));
1015: PetscFree(ajfill);
1016: ajfill = xi;
1017: realloc++; /* count how many times we realloc */
1018: }
1019: xi = ajnew + ainew[prow] ;
1020: flev = ajfill + ainew[prow] ;
1021: dloc[prow] = nzi;
1022: fm = fill[n];
1023: while (nzf--) {
1024: *xi++ = fm ;
1025: *flev++ = im[fm];
1026: fm = fill[fm];
1027: }
1028: /* make sure row has diagonal entry */
1029: if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1030: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
1031: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1032: }
1033: }
1034: PetscFree(ajfill);
1035: ISRestoreIndices(isrow,&r);
1036: ISRestoreIndices(isicol,&ic);
1037: PetscFree(fill);
1038: PetscFree(im);
1040: {
1041: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1042: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1043: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1044: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1045: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1046: if (diagonal_fill) {
1047: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
1048: }
1049: }
1051: /* put together the new matrix */
1052: MatCreate(A->comm,n,n,n,n,fact);
1053: MatSetType(*fact,A->type_name);
1054: MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);
1055: PetscLogObjectParent(*fact,isicol);
1056: b = (Mat_SeqAIJ*)(*fact)->data;
1057: PetscFree(b->imax);
1058: b->singlemalloc = PETSC_FALSE;
1059: len = (ainew[n] )*sizeof(PetscScalar);
1060: /* the next line frees the default space generated by the Create() */
1061: PetscFree(b->a);
1062: PetscFree(b->ilen);
1063: PetscMalloc(len+1,&b->a);
1064: b->j = ajnew;
1065: b->i = ainew;
1066: for (i=0; i<n; i++) dloc[i] += ainew[i];
1067: b->diag = dloc;
1068: b->ilen = 0;
1069: b->imax = 0;
1070: b->row = isrow;
1071: b->col = iscol;
1072: PetscObjectReference((PetscObject)isrow);
1073: PetscObjectReference((PetscObject)iscol);
1074: b->icol = isicol;
1075: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1076: /* In b structure: Free imax, ilen, old a, old j.
1077: Allocate dloc, solve_work, new a, new j */
1078: PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar)));
1079: b->maxnz = b->nz = ainew[n] ;
1080: b->lu_damping = info->damping;
1081: b->lu_shift = info->shift;
1082: b->lu_shift_fraction = info->shift_fraction;
1083: b->lu_zeropivot = info->zeropivot;
1084: (*fact)->factor = FACTOR_LU;
1085: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1086: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1088: (*fact)->info.factor_mallocs = realloc;
1089: (*fact)->info.fill_ratio_given = f;
1090: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1091: return(0);
1092: }
1094: #include src/mat/impls/sbaij/seq/sbaij.h
1097: int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1098: {
1099: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1100: int ierr;
1103: if (!a->sbaijMat){
1104: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1105: }
1106:
1107: MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);
1108: MatDestroy(a->sbaijMat);
1109: a->sbaijMat = PETSC_NULL;
1110:
1111: return(0);
1112: }
1116: int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1117: {
1118: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1119: int ierr;
1120: PetscTruth perm_identity;
1121:
1123: ISIdentity(perm,&perm_identity);
1124: if (!perm_identity){
1125: SETERRQ(1,"Non-identity permutation is not supported yet");
1126: }
1127: if (!a->sbaijMat){
1128: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1129: }
1131: MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);
1132: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1133:
1134: return(0);
1135: }
1139: int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1140: {
1141: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1142: int ierr;
1143: PetscTruth perm_identity;
1144:
1146: ISIdentity(perm,&perm_identity);
1147: if (!perm_identity){
1148: SETERRQ(1,"Non-identity permutation is not supported yet");
1149: }
1150: if (!a->sbaijMat){
1151: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1152: }
1154: MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);
1155: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1156:
1157: return(0);
1158: }