Actual source code: sbaijfact.c
1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/mat/impls/sbaij/seq/sbaij.h
5: #include src/inline/ilu.h
6: #include include/petscis.h
8: #if !defined(PETSC_USE_COMPLEX)
9: /*
10: input:
11: F -- numeric factor
12: output:
13: nneg, nzero, npos: matrix inertia
14: */
18: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
19: {
20: Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
21: PetscScalar *dd = fact_ptr->a;
22: int mbs=fact_ptr->mbs,bs=fact_ptr->bs,i,nneig_tmp,npos_tmp,
23: *fi = fact_ptr->i;
26: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %d >1 yet",bs);
27: nneig_tmp = 0; npos_tmp = 0;
28: for (i=0; i<mbs; i++){
29: if (PetscRealPart(dd[*fi]) > 0.0){
30: npos_tmp++;
31: } else if (PetscRealPart(dd[*fi]) < 0.0){
32: nneig_tmp++;
33: }
34: fi++;
35: }
36: if (nneig) *nneig = nneig_tmp;
37: if (npos) *npos = npos_tmp;
38: if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;
40: return(0);
41: }
42: #endif /* !defined(PETSC_USE_COMPLEX) */
44: /* Using Modified Sparse Row (MSR) storage.
45: See page 85, "Iterative Methods ..." by Saad. */
46: /*
47: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
48: */
49: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
52: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
53: {
54: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
55: int *rip,ierr,i,mbs = a->mbs,*ai,*aj;
56: int *jutmp,bs = a->bs,bs2=a->bs2;
57: int m,realloc = 0,prow;
58: int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
59: int *il,ili,nextprow;
60: PetscReal f = info->fill;
61: PetscTruth perm_identity;
64: /* check whether perm is the identity mapping */
65: ISIdentity(perm,&perm_identity);
67: /* -- inplace factorization, i.e., use sbaij for *B -- */
68: if (perm_identity && bs==1 ){
69: if (!perm_identity) a->permute = PETSC_TRUE;
70:
71: ISGetIndices(perm,&rip);
72:
73: if (perm_identity){ /* without permutation */
74: ai = a->i; aj = a->j;
75: } else { /* non-trivial permutation */
76: MatReorderingSeqSBAIJ(A,perm);
77: ai = a->inew; aj = a->jnew;
78: }
79:
80: /* initialization */
81: PetscMalloc((mbs+1)*sizeof(int),&iu);
82: umax = (int)(f*ai[mbs] + 1);
83: PetscMalloc(umax*sizeof(int),&ju);
84: iu[0] = 0;
85: juidx = 0; /* index for ju */
86: PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
87: q = jl + mbs; /* linked list for col index of active row */
88: il = q + mbs;
89: for (i=0; i<mbs; i++){
90: jl[i] = mbs;
91: q[i] = 0;
92: il[i] = 0;
93: }
95: /* for each row k */
96: for (k=0; k<mbs; k++){
97: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
98: q[k] = mbs;
99: /* initialize nonzero structure of k-th row to row rip[k] of A */
100: jmin = ai[rip[k]] +1; /* exclude diag[k] */
101: jmax = ai[rip[k]+1];
102: for (j=jmin; j<jmax; j++){
103: vj = rip[aj[j]]; /* col. value */
104: if(vj > k){
105: qm = k;
106: do {
107: m = qm; qm = q[m];
108: } while(qm < vj);
109: if (qm == vj) {
110: SETERRQ(1," error: duplicate entry in A\n");
111: }
112: nzk++;
113: q[m] = vj;
114: q[vj] = qm;
115: } /* if(vj > k) */
116: } /* for (j=jmin; j<jmax; j++) */
118: /* modify nonzero structure of k-th row by computing fill-in
119: for each row i to be merged in */
120: prow = k;
121: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
122:
123: while (prow < k){
124: nextprow = jl[prow];
125:
126: /* merge row prow into k-th row */
127: ili = il[prow];
128: jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */
129: jmax = iu[prow+1];
130: qm = k;
131: for (j=jmin; j<jmax; j++){
132: vj = ju[j];
133: do {
134: m = qm; qm = q[m];
135: } while (qm < vj);
136: if (qm != vj){ /* a fill */
137: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
138: }
139: } /* end of for (j=jmin; j<jmax; j++) */
140: if (jmin < jmax){
141: il[prow] = jmin;
142: j = ju[jmin];
143: jl[prow] = jl[j]; jl[j] = prow; /* update jl */
144: }
145: prow = nextprow;
146: }
147:
148: /* update il and jl */
149: if (nzk > 0){
150: i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
151: jl[k] = jl[i]; jl[i] = k;
152: il[k] = iu[k] + 1;
153: }
154: iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */
156: /* allocate more space to ju if needed */
157: if (iu[k+1] > umax) {
158: /* estimate how much additional space we will need */
159: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
160: /* just double the memory each time */
161: maxadd = umax;
162: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
163: umax += maxadd;
165: /* allocate a longer ju */
166: PetscMalloc(umax*sizeof(int),&jutmp);
167: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
168: PetscFree(ju);
169: ju = jutmp;
170: realloc++; /* count how many times we realloc */
171: }
173: /* save nonzero structure of k-th row in ju */
174: ju[juidx++] = k; /* diag[k] */
175: i = k;
176: while (nzk --) {
177: i = q[i];
178: ju[juidx++] = i;
179: }
180: }
182: if (ai[mbs] != 0) {
183: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
184: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
185: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
186: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
187: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
188: } else {
189: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
190: }
192: ISRestoreIndices(perm,&rip);
193: /* PetscFree(q); */
194: PetscFree(jl);
196: /* put together the new matrix */
197: MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
198: MatSetType(*B,A->type_name);
199: MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);
201: /* PetscLogObjectParent(*B,iperm); */
202: b = (Mat_SeqSBAIJ*)(*B)->data;
203: PetscFree(b->imax);
204: b->singlemalloc = PETSC_FALSE;
205: /* the next line frees the default space generated by the Create() */
206: PetscFree(b->a);
207: PetscFree(b->ilen);
208: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
209: b->j = ju;
210: b->i = iu;
211: b->diag = 0;
212: b->ilen = 0;
213: b->imax = 0;
214: b->row = perm;
215: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
216: PetscObjectReference((PetscObject)perm);
217: b->icol = perm;
218: PetscObjectReference((PetscObject)perm);
219: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
220: /* In b structure: Free imax, ilen, old a, old j.
221: Allocate idnew, solve_work, new a, new j */
222: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
223: b->maxnz = b->nz = iu[mbs];
224:
225: (*B)->factor = FACTOR_CHOLESKY;
226: (*B)->info.factor_mallocs = realloc;
227: (*B)->info.fill_ratio_given = f;
228: if (ai[mbs] != 0) {
229: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
230: } else {
231: (*B)->info.fill_ratio_needed = 0.0;
232: }
235: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
236: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
237: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
238:
239: return(0);
240: }
241: /* ----------- end of new code --------------------*/
244: if (!perm_identity) a->permute = PETSC_TRUE;
245:
246: ISGetIndices(perm,&rip);
247:
248: if (perm_identity){ /* without permutation */
249: ai = a->i; aj = a->j;
250: } else { /* non-trivial permutation */
251: MatReorderingSeqSBAIJ(A,perm);
252: ai = a->inew; aj = a->jnew;
253: }
254:
255: /* initialization */
256: PetscMalloc((mbs+1)*sizeof(int),&iu);
257: umax = (int)(f*ai[mbs] + 1); umax += mbs + 1;
258: PetscMalloc(umax*sizeof(int),&ju);
259: iu[0] = mbs+1;
260: juidx = mbs + 1; /* index for ju */
261: PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
262: q = jl + mbs; /* linked list for col index */
263: for (i=0; i<mbs; i++){
264: jl[i] = mbs;
265: q[i] = 0;
266: }
268: /* for each row k */
269: for (k=0; k<mbs; k++){
270: for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
271: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272: q[k] = mbs;
273: /* initialize nonzero structure of k-th row to row rip[k] of A */
274: jmin = ai[rip[k]] +1; /* exclude diag[k] */
275: jmax = ai[rip[k]+1];
276: for (j=jmin; j<jmax; j++){
277: vj = rip[aj[j]]; /* col. value */
278: if(vj > k){
279: qm = k;
280: do {
281: m = qm; qm = q[m];
282: } while(qm < vj);
283: if (qm == vj) {
284: SETERRQ(1," error: duplicate entry in A\n");
285: }
286: nzk++;
287: q[m] = vj;
288: q[vj] = qm;
289: } /* if(vj > k) */
290: } /* for (j=jmin; j<jmax; j++) */
292: /* modify nonzero structure of k-th row by computing fill-in
293: for each row i to be merged in */
294: prow = k;
295: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296:
297: while (prow < k){
298: /* merge row prow into k-th row */
299: jmin = iu[prow] + 1; jmax = iu[prow+1];
300: qm = k;
301: for (j=jmin; j<jmax; j++){
302: vj = ju[j];
303: do {
304: m = qm; qm = q[m];
305: } while (qm < vj);
306: if (qm != vj){
307: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308: }
309: }
310: prow = jl[prow]; /* next pivot row */
311: }
312:
313: /* add k to row list for first nonzero element in k-th row */
314: if (nzk > 0){
315: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316: jl[k] = jl[i]; jl[i] = k;
317: }
318: iu[k+1] = iu[k] + nzk;
320: /* allocate more space to ju if needed */
321: if (iu[k+1] > umax) {
322: /* estimate how much additional space we will need */
323: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324: /* just double the memory each time */
325: maxadd = umax;
326: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327: umax += maxadd;
329: /* allocate a longer ju */
330: PetscMalloc(umax*sizeof(int),&jutmp);
331: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
332: PetscFree(ju);
333: ju = jutmp;
334: realloc++; /* count how many times we realloc */
335: }
337: /* save nonzero structure of k-th row in ju */
338: i=k;
339: while (nzk --) {
340: i = q[i];
341: ju[juidx++] = i;
342: }
343: }
345: if (ai[mbs] != 0) {
346: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
348: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
349: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
350: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
351: } else {
352: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
353: }
355: ISRestoreIndices(perm,&rip);
356: /* PetscFree(q); */
357: PetscFree(jl);
359: /* put together the new matrix */
360: MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
361: MatSetType(*B,A->type_name);
362: MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);
364: /* PetscLogObjectParent(*B,iperm); */
365: b = (Mat_SeqSBAIJ*)(*B)->data;
366: PetscFree(b->imax);
367: b->singlemalloc = PETSC_FALSE;
368: /* the next line frees the default space generated by the Create() */
369: PetscFree(b->a);
370: PetscFree(b->ilen);
371: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
372: b->j = ju;
373: b->i = iu;
374: b->diag = 0;
375: b->ilen = 0;
376: b->imax = 0;
377: b->row = perm;
378: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
379: PetscObjectReference((PetscObject)perm);
380: b->icol = perm;
381: PetscObjectReference((PetscObject)perm);
382: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
383: /* In b structure: Free imax, ilen, old a, old j.
384: Allocate idnew, solve_work, new a, new j */
385: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
386: b->maxnz = b->nz = iu[mbs];
387:
388: (*B)->factor = FACTOR_CHOLESKY;
389: (*B)->info.factor_mallocs = realloc;
390: (*B)->info.fill_ratio_given = f;
391: if (ai[mbs] != 0) {
392: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
393: } else {
394: (*B)->info.fill_ratio_needed = 0.0;
395: }
397: if (perm_identity){
398: switch (bs) {
399: case 1:
400: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
401: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
402: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
403: break;
404: case 2:
405: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
406: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
407: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
408: break;
409: case 3:
410: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
411: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
412: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
413: break;
414: case 4:
415: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
416: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
417: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
418: break;
419: case 5:
420: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
421: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
422: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
423: break;
424: case 6:
425: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
426: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
427: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
428: break;
429: case 7:
430: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
431: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
432: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
433: break;
434: default:
435: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
436: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
437: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
438: break;
439: }
440: }
442: return(0);
443: }
448: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
449: {
450: Mat C = *B;
451: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
452: IS perm = b->row;
453: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
454: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
455: int bs=a->bs,bs2 = a->bs2;
456: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
457: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
458: MatScalar *work;
459: int *pivots;
463: /* initialization */
464: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
465: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
466: PetscMalloc(2*mbs*sizeof(int),&il);
467: jl = il + mbs;
468: for (i=0; i<mbs; i++) {
469: jl[i] = mbs; il[0] = 0;
470: }
471: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
472: uik = dk + bs2;
473: work = uik + bs2;
474: PetscMalloc(bs*sizeof(int),&pivots);
475:
476: ISGetIndices(perm,&perm_ptr);
477:
478: /* check permutation */
479: if (!a->permute){
480: ai = a->i; aj = a->j; aa = a->a;
481: } else {
482: ai = a->inew; aj = a->jnew;
483: PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
484: PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
485: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
486: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
488: for (i=0; i<mbs; i++){
489: jmin = ai[i]; jmax = ai[i+1];
490: for (j=jmin; j<jmax; j++){
491: while (a2anew[j] != j){
492: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
493: for (k1=0; k1<bs2; k1++){
494: dk[k1] = aa[k*bs2+k1];
495: aa[k*bs2+k1] = aa[j*bs2+k1];
496: aa[j*bs2+k1] = dk[k1];
497: }
498: }
499: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
500: if (i > aj[j]){
501: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
502: ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
503: for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
504: for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */
505: for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
506: }
507: }
508: }
509: }
510: PetscFree(a2anew);
511: }
512:
513: /* for each row k */
514: for (k = 0; k<mbs; k++){
516: /*initialize k-th row with elements nonzero in row perm(k) of A */
517: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
518:
519: ap = aa + jmin*bs2;
520: for (j = jmin; j < jmax; j++){
521: vj = perm_ptr[aj[j]]; /* block col. index */
522: rtmp_ptr = rtmp + vj*bs2;
523: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
524: }
526: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
527: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
528: i = jl[k]; /* first row to be added to k_th row */
530: while (i < k){
531: nexti = jl[i]; /* next row to be added to k_th row */
533: /* compute multiplier */
534: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
536: /* uik = -inv(Di)*U_bar(i,k) */
537: diag = ba + i*bs2;
538: u = ba + ili*bs2;
539: PetscMemzero(uik,bs2*sizeof(MatScalar));
540: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
541:
542: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
543: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
544:
545: /* update -U(i,k) */
546: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
548: /* add multiple of row i to k-th row ... */
549: jmin = ili + 1; jmax = bi[i+1];
550: if (jmin < jmax){
551: for (j=jmin; j<jmax; j++) {
552: /* rtmp += -U(i,k)^T * U_bar(i,j) */
553: rtmp_ptr = rtmp + bj[j]*bs2;
554: u = ba + j*bs2;
555: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
556: }
557:
558: /* ... add i to row list for next nonzero entry */
559: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
560: j = bj[jmin];
561: jl[i] = jl[j]; jl[j] = i; /* update jl */
562: }
563: i = nexti;
564: }
566: /* save nonzero entries in k-th row of U ... */
568: /* invert diagonal block */
569: diag = ba+k*bs2;
570: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
571: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
572:
573: jmin = bi[k]; jmax = bi[k+1];
574: if (jmin < jmax) {
575: for (j=jmin; j<jmax; j++){
576: vj = bj[j]; /* block col. index of U */
577: u = ba + j*bs2;
578: rtmp_ptr = rtmp + vj*bs2;
579: for (k1=0; k1<bs2; k1++){
580: *u++ = *rtmp_ptr;
581: *rtmp_ptr++ = 0.0;
582: }
583: }
584:
585: /* ... add k to row list for first nonzero entry in k-th row */
586: il[k] = jmin;
587: i = bj[jmin];
588: jl[k] = jl[i]; jl[i] = k;
589: }
590: }
592: PetscFree(rtmp);
593: PetscFree(il);
594: PetscFree(dk);
595: PetscFree(pivots);
596: if (a->permute){
597: PetscFree(aa);
598: }
600: ISRestoreIndices(perm,&perm_ptr);
601: C->factor = FACTOR_CHOLESKY;
602: C->assembled = PETSC_TRUE;
603: C->preallocated = PETSC_TRUE;
604: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
605: return(0);
606: }
610: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
611: {
612: Mat C = *B;
613: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
614: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
615: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
616: int bs=a->bs,bs2 = a->bs2;
617: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
618: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
619: MatScalar *work;
620: int *pivots;
624: /* initialization */
625:
626: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
627: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
628: PetscMalloc(2*mbs*sizeof(int),&il);
629: jl = il + mbs;
630: for (i=0; i<mbs; i++) {
631: jl[i] = mbs; il[0] = 0;
632: }
633: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
634: uik = dk + bs2;
635: work = uik + bs2;
636: PetscMalloc(bs*sizeof(int),&pivots);
637:
638: ai = a->i; aj = a->j; aa = a->a;
639:
640: /* for each row k */
641: for (k = 0; k<mbs; k++){
643: /*initialize k-th row with elements nonzero in row k of A */
644: jmin = ai[k]; jmax = ai[k+1];
645: ap = aa + jmin*bs2;
646: for (j = jmin; j < jmax; j++){
647: vj = aj[j]; /* block col. index */
648: rtmp_ptr = rtmp + vj*bs2;
649: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
650: }
652: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
653: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
654: i = jl[k]; /* first row to be added to k_th row */
656: while (i < k){
657: nexti = jl[i]; /* next row to be added to k_th row */
659: /* compute multiplier */
660: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
662: /* uik = -inv(Di)*U_bar(i,k) */
663: diag = ba + i*bs2;
664: u = ba + ili*bs2;
665: PetscMemzero(uik,bs2*sizeof(MatScalar));
666: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
667:
668: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
669: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
670:
671: /* update -U(i,k) */
672: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
674: /* add multiple of row i to k-th row ... */
675: jmin = ili + 1; jmax = bi[i+1];
676: if (jmin < jmax){
677: for (j=jmin; j<jmax; j++) {
678: /* rtmp += -U(i,k)^T * U_bar(i,j) */
679: rtmp_ptr = rtmp + bj[j]*bs2;
680: u = ba + j*bs2;
681: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
682: }
683:
684: /* ... add i to row list for next nonzero entry */
685: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
686: j = bj[jmin];
687: jl[i] = jl[j]; jl[j] = i; /* update jl */
688: }
689: i = nexti;
690: }
692: /* save nonzero entries in k-th row of U ... */
694: /* invert diagonal block */
695: diag = ba+k*bs2;
696: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
697: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
698:
699: jmin = bi[k]; jmax = bi[k+1];
700: if (jmin < jmax) {
701: for (j=jmin; j<jmax; j++){
702: vj = bj[j]; /* block col. index of U */
703: u = ba + j*bs2;
704: rtmp_ptr = rtmp + vj*bs2;
705: for (k1=0; k1<bs2; k1++){
706: *u++ = *rtmp_ptr;
707: *rtmp_ptr++ = 0.0;
708: }
709: }
710:
711: /* ... add k to row list for first nonzero entry in k-th row */
712: il[k] = jmin;
713: i = bj[jmin];
714: jl[k] = jl[i]; jl[i] = k;
715: }
716: }
718: PetscFree(rtmp);
719: PetscFree(il);
720: PetscFree(dk);
721: PetscFree(pivots);
723: C->factor = FACTOR_CHOLESKY;
724: C->assembled = PETSC_TRUE;
725: C->preallocated = PETSC_TRUE;
726: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
727: return(0);
728: }
730: /*
731: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
732: Version for blocks 2 by 2.
733: */
736: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
737: {
738: Mat C = *B;
739: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
740: IS perm = b->row;
741: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
742: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
743: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
744: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
747:
748: /* initialization */
749: /* il and jl record the first nonzero element in each row of the accessing
750: window U(0:k, k:mbs-1).
751: jl: list of rows to be added to uneliminated rows
752: i>= k: jl(i) is the first row to be added to row i
753: i< k: jl(i) is the row following row i in some list of rows
754: jl(i) = mbs indicates the end of a list
755: il(i): points to the first nonzero element in columns k,...,mbs-1 of
756: row i of U */
757: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
758: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
759: PetscMalloc(2*mbs*sizeof(int),&il);
760: jl = il + mbs;
761: for (i=0; i<mbs; i++) {
762: jl[i] = mbs; il[0] = 0;
763: }
764: PetscMalloc(8*sizeof(MatScalar),&dk);
765: uik = dk + 4;
766: ISGetIndices(perm,&perm_ptr);
768: /* check permutation */
769: if (!a->permute){
770: ai = a->i; aj = a->j; aa = a->a;
771: } else {
772: ai = a->inew; aj = a->jnew;
773: PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
774: PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
775: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
776: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
778: for (i=0; i<mbs; i++){
779: jmin = ai[i]; jmax = ai[i+1];
780: for (j=jmin; j<jmax; j++){
781: while (a2anew[j] != j){
782: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
783: for (k1=0; k1<4; k1++){
784: dk[k1] = aa[k*4+k1];
785: aa[k*4+k1] = aa[j*4+k1];
786: aa[j*4+k1] = dk[k1];
787: }
788: }
789: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
790: if (i > aj[j]){
791: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
792: ap = aa + j*4; /* ptr to the beginning of the block */
793: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
794: ap[1] = ap[2];
795: ap[2] = dk[1];
796: }
797: }
798: }
799: PetscFree(a2anew);
800: }
802: /* for each row k */
803: for (k = 0; k<mbs; k++){
805: /*initialize k-th row with elements nonzero in row perm(k) of A */
806: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
807: ap = aa + jmin*4;
808: for (j = jmin; j < jmax; j++){
809: vj = perm_ptr[aj[j]]; /* block col. index */
810: rtmp_ptr = rtmp + vj*4;
811: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
812: }
814: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
815: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
816: i = jl[k]; /* first row to be added to k_th row */
818: while (i < k){
819: nexti = jl[i]; /* next row to be added to k_th row */
821: /* compute multiplier */
822: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
824: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
825: diag = ba + i*4;
826: u = ba + ili*4;
827: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
828: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
829: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
830: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
831:
832: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
833: dk[0] += uik[0]*u[0] + uik[1]*u[1];
834: dk[1] += uik[2]*u[0] + uik[3]*u[1];
835: dk[2] += uik[0]*u[2] + uik[1]*u[3];
836: dk[3] += uik[2]*u[2] + uik[3]*u[3];
838: /* update -U(i,k): ba[ili] = uik */
839: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
841: /* add multiple of row i to k-th row ... */
842: jmin = ili + 1; jmax = bi[i+1];
843: if (jmin < jmax){
844: for (j=jmin; j<jmax; j++) {
845: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
846: rtmp_ptr = rtmp + bj[j]*4;
847: u = ba + j*4;
848: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
849: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
850: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
851: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
852: }
853:
854: /* ... add i to row list for next nonzero entry */
855: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
856: j = bj[jmin];
857: jl[i] = jl[j]; jl[j] = i; /* update jl */
858: }
859: i = nexti;
860: }
862: /* save nonzero entries in k-th row of U ... */
864: /* invert diagonal block */
865: diag = ba+k*4;
866: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
867: Kernel_A_gets_inverse_A_2(diag);
868:
869: jmin = bi[k]; jmax = bi[k+1];
870: if (jmin < jmax) {
871: for (j=jmin; j<jmax; j++){
872: vj = bj[j]; /* block col. index of U */
873: u = ba + j*4;
874: rtmp_ptr = rtmp + vj*4;
875: for (k1=0; k1<4; k1++){
876: *u++ = *rtmp_ptr;
877: *rtmp_ptr++ = 0.0;
878: }
879: }
880:
881: /* ... add k to row list for first nonzero entry in k-th row */
882: il[k] = jmin;
883: i = bj[jmin];
884: jl[k] = jl[i]; jl[i] = k;
885: }
886: }
888: PetscFree(rtmp);
889: PetscFree(il);
890: PetscFree(dk);
891: if (a->permute) {
892: PetscFree(aa);
893: }
894: ISRestoreIndices(perm,&perm_ptr);
895: C->factor = FACTOR_CHOLESKY;
896: C->assembled = PETSC_TRUE;
897: C->preallocated = PETSC_TRUE;
898: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
899: return(0);
900: }
902: /*
903: Version for when blocks are 2 by 2 Using natural ordering
904: */
907: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
908: {
909: Mat C = *B;
910: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
911: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
912: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
913: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
914: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
917:
918: /* initialization */
919: /* il and jl record the first nonzero element in each row of the accessing
920: window U(0:k, k:mbs-1).
921: jl: list of rows to be added to uneliminated rows
922: i>= k: jl(i) is the first row to be added to row i
923: i< k: jl(i) is the row following row i in some list of rows
924: jl(i) = mbs indicates the end of a list
925: il(i): points to the first nonzero element in columns k,...,mbs-1 of
926: row i of U */
927: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
928: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
929: PetscMalloc(2*mbs*sizeof(int),&il);
930: jl = il + mbs;
931: for (i=0; i<mbs; i++) {
932: jl[i] = mbs; il[0] = 0;
933: }
934: PetscMalloc(8*sizeof(MatScalar),&dk);
935: uik = dk + 4;
936:
937: ai = a->i; aj = a->j; aa = a->a;
939: /* for each row k */
940: for (k = 0; k<mbs; k++){
942: /*initialize k-th row with elements nonzero in row k of A */
943: jmin = ai[k]; jmax = ai[k+1];
944: ap = aa + jmin*4;
945: for (j = jmin; j < jmax; j++){
946: vj = aj[j]; /* block col. index */
947: rtmp_ptr = rtmp + vj*4;
948: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
949: }
950:
951: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
952: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
953: i = jl[k]; /* first row to be added to k_th row */
955: while (i < k){
956: nexti = jl[i]; /* next row to be added to k_th row */
958: /* compute multiplier */
959: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
961: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
962: diag = ba + i*4;
963: u = ba + ili*4;
964: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
965: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
966: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
967: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
968:
969: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
970: dk[0] += uik[0]*u[0] + uik[1]*u[1];
971: dk[1] += uik[2]*u[0] + uik[3]*u[1];
972: dk[2] += uik[0]*u[2] + uik[1]*u[3];
973: dk[3] += uik[2]*u[2] + uik[3]*u[3];
975: /* update -U(i,k): ba[ili] = uik */
976: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
978: /* add multiple of row i to k-th row ... */
979: jmin = ili + 1; jmax = bi[i+1];
980: if (jmin < jmax){
981: for (j=jmin; j<jmax; j++) {
982: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983: rtmp_ptr = rtmp + bj[j]*4;
984: u = ba + j*4;
985: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
986: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
987: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
988: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
989: }
990:
991: /* ... add i to row list for next nonzero entry */
992: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993: j = bj[jmin];
994: jl[i] = jl[j]; jl[j] = i; /* update jl */
995: }
996: i = nexti;
997: }
999: /* save nonzero entries in k-th row of U ... */
1001: /* invert diagonal block */
1002: diag = ba+k*4;
1003: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1004: Kernel_A_gets_inverse_A_2(diag);
1005:
1006: jmin = bi[k]; jmax = bi[k+1];
1007: if (jmin < jmax) {
1008: for (j=jmin; j<jmax; j++){
1009: vj = bj[j]; /* block col. index of U */
1010: u = ba + j*4;
1011: rtmp_ptr = rtmp + vj*4;
1012: for (k1=0; k1<4; k1++){
1013: *u++ = *rtmp_ptr;
1014: *rtmp_ptr++ = 0.0;
1015: }
1016: }
1017:
1018: /* ... add k to row list for first nonzero entry in k-th row */
1019: il[k] = jmin;
1020: i = bj[jmin];
1021: jl[k] = jl[i]; jl[i] = k;
1022: }
1023: }
1025: PetscFree(rtmp);
1026: PetscFree(il);
1027: PetscFree(dk);
1029: C->factor = FACTOR_CHOLESKY;
1030: C->assembled = PETSC_TRUE;
1031: C->preallocated = PETSC_TRUE;
1032: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1033: return(0);
1034: }
1036: /*
1037: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1038: Version for blocks are 1 by 1.
1039: */
1042: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1043: {
1044: Mat C = *B;
1045: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1046: IS ip = b->row;
1047: int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1048: int *ai,*aj,*r;
1049: int k,jmin,jmax,*jl,*il,vj,nexti,ili;
1050: MatScalar *rtmp;
1051: MatScalar *ba = b->a,*aa,ak;
1052: MatScalar dk,uikdi;
1055: ISGetIndices(ip,&rip);
1056: if (!a->permute){
1057: ai = a->i; aj = a->j; aa = a->a;
1058: } else {
1059: ai = a->inew; aj = a->jnew;
1060: PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1061: PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1062: PetscMalloc(ai[mbs]*sizeof(int),&r);
1063: ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));
1065: jmin = ai[0]; jmax = ai[mbs];
1066: for (j=jmin; j<jmax; j++){
1067: while (r[j] != j){
1068: k = r[j]; r[j] = r[k]; r[k] = k;
1069: ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1070: }
1071: }
1072: PetscFree(r);
1073: }
1074:
1075: /* initialization */
1076: /* il and jl record the first nonzero element in each row of the accessing
1077: window U(0:k, k:mbs-1).
1078: jl: list of rows to be added to uneliminated rows
1079: i>= k: jl(i) is the first row to be added to row i
1080: i< k: jl(i) is the row following row i in some list of rows
1081: jl(i) = mbs indicates the end of a list
1082: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1083: row i of U */
1084: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1085: PetscMalloc(2*mbs*sizeof(int),&il);
1086: jl = il + mbs;
1087: for (i=0; i<mbs; i++) {
1088: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1089: }
1091: /* for each row k */
1092: for (k = 0; k<mbs; k++){
1094: /*initialize k-th row with elements nonzero in row perm(k) of A */
1095: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1096:
1097: for (j = jmin; j < jmax; j++){
1098: vj = rip[aj[j]];
1099: rtmp[vj] = aa[j];
1100: }
1102: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1103: dk = rtmp[k];
1104: i = jl[k]; /* first row to be added to k_th row */
1106: while (i < k){
1107: nexti = jl[i]; /* next row to be added to k_th row */
1109: /* compute multiplier, update D(k) and U(i,k) */
1110: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1111: uikdi = - ba[ili]*ba[i];
1112: dk += uikdi*ba[ili];
1113: ba[ili] = uikdi; /* -U(i,k) */
1115: /* add multiple of row i to k-th row ... */
1116: jmin = ili + 1; jmax = bi[i+1];
1117: if (jmin < jmax){
1118: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1119: /* ... add i to row list for next nonzero entry */
1120: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1121: j = bj[jmin];
1122: jl[i] = jl[j]; jl[j] = i; /* update jl */
1123: }
1124: i = nexti;
1125: }
1127: /* check for zero pivot and save diagoanl element */
1128: if (dk == 0.0){
1129: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1130: /*
1131: } else if (PetscRealPart(dk) < 0.0){
1132: SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
1133: */
1134: }
1136: /* save nonzero entries in k-th row of U ... */
1137: ba[k] = 1.0/dk;
1138: jmin = bi[k]; jmax = bi[k+1];
1139: if (jmin < jmax) {
1140: for (j=jmin; j<jmax; j++){
1141: vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1142: }
1143: /* ... add k to row list for first nonzero entry in k-th row */
1144: il[k] = jmin;
1145: i = bj[jmin];
1146: jl[k] = jl[i]; jl[i] = k;
1147: }
1148: }
1149:
1150: PetscFree(rtmp);
1151: PetscFree(il);
1152: if (a->permute){
1153: PetscFree(aa);
1154: }
1156: ISRestoreIndices(ip,&rip);
1157: C->factor = FACTOR_CHOLESKY;
1158: C->assembled = PETSC_TRUE;
1159: C->preallocated = PETSC_TRUE;
1160: PetscLogFlops(b->mbs);
1161: return(0);
1162: }
1164: /*
1165: Version for when blocks are 1 by 1 Using natural ordering
1166: */
1169: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1170: {
1171: Mat C = *B;
1172: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1173: int ierr,i,j,mbs = a->mbs;
1174: int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1175: int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1176: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1177: PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1178: PetscTruth damp,chshift;
1179: int nshift=0;
1182: /* initialization */
1183: /* il and jl record the first nonzero element in each row of the accessing
1184: window U(0:k, k:mbs-1).
1185: jl: list of rows to be added to uneliminated rows
1186: i>= k: jl(i) is the first row to be added to row i
1187: i< k: jl(i) is the row following row i in some list of rows
1188: jl(i) = mbs indicates the end of a list
1189: il(i): points to the first nonzero element in U(i,k:mbs-1)
1190: */
1191: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1192: PetscMalloc(2*mbs*sizeof(int),&il);
1193: jl = il + mbs;
1195: shift_amount = 0;
1196: do {
1197: damp = PETSC_FALSE;
1198: chshift = PETSC_FALSE;
1199: for (i=0; i<mbs; i++) {
1200: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1201: }
1203: for (k = 0; k<mbs; k++){ /* row k */
1204: /*initialize k-th row with elements nonzero in row perm(k) of A */
1205: nz = ai[k+1] - ai[k];
1206: acol = aj + ai[k];
1207: aval = aa + ai[k];
1208: bval = ba + bi[k];
1209: while (nz -- ){
1210: rtmp[*acol++] = *aval++;
1211: *bval++ = 0.0; /* for in-place factorization */
1212: }
1213: /* damp the diagonal of the matrix */
1214: if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1215:
1216: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1217: dk = rtmp[k];
1218: i = jl[k]; /* first row to be added to k_th row */
1220: while (i < k){
1221: nexti = jl[i]; /* next row to be added to k_th row */
1222:
1223: /* compute multiplier, update D(k) and U(i,k) */
1224: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1225: uikdi = - ba[ili]*ba[bi[i]];
1226: dk += uikdi*ba[ili];
1227: ba[ili] = uikdi; /* -U(i,k) */
1229: /* add multiple of row i to k-th row ... */
1230: jmin = ili + 1;
1231: nz = bi[i+1] - jmin;
1232: if (nz > 0){
1233: bcol = bj + jmin;
1234: bval = ba + jmin;
1235: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1236: /* ... add i to row list for next nonzero entry */
1237: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1238: j = bj[jmin];
1239: jl[i] = jl[j]; jl[j] = i; /* update jl */
1240: }
1241: i = nexti;
1242: }
1244: if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1245: /* calculate a shift that would make this row diagonally dominant */
1246: PetscReal rs = PetscAbs(PetscRealPart(dk));
1247: jmin = bi[k]+1;
1248: nz = bi[k+1] - jmin;
1249: if (nz){
1250: bcol = bj + jmin;
1251: bval = ba + jmin;
1252: while (nz--){
1253: rs += PetscAbsScalar(rtmp[*bcol++]);
1254: }
1255: }
1256: /* if this shift is less than the previous, just up the previous
1257: one by a bit */
1258: shift_amount = PetscMax(rs,1.1*shift_amount);
1259: chshift = PETSC_TRUE;
1260: /* Unlike in the ILU case there is no exit condition on nshift:
1261: we increase the shift until it converges. There is no guarantee that
1262: this algorithm converges faster or slower, or is better or worse
1263: than the ILU algorithm. */
1264: nshift++;
1265: break;
1266: }
1267: if (PetscRealPart(dk) < zeropivot){
1268: if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1269: if (damping > 0.0) {
1270: if (ndamp) damping *= 2.0;
1271: damp = PETSC_TRUE;
1272: ndamp++;
1273: break;
1274: } else if (PetscAbsScalar(dk) < zeropivot){
1275: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1276: } else {
1277: PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",PetscRealPart(dk),k);
1278: }
1279: }
1280:
1281: /* save nonzero entries in k-th row of U ... */
1282: /* printf("%d, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1283: ba[bi[k]] = 1.0/dk;
1284: jmin = bi[k]+1;
1285: nz = bi[k+1] - jmin;
1286: if (nz){
1287: bcol = bj + jmin;
1288: bval = ba + jmin;
1289: while (nz--){
1290: *bval++ = rtmp[*bcol];
1291: rtmp[*bcol++] = 0.0;
1292: }
1293: /* ... add k to row list for first nonzero entry in k-th row */
1294: il[k] = jmin;
1295: i = bj[jmin];
1296: jl[k] = jl[i]; jl[i] = k;
1297: }
1298: } /* end of for (k = 0; k<mbs; k++) */
1299: } while (damp||chshift);
1300: PetscFree(rtmp);
1301: PetscFree(il);
1302:
1303: C->factor = FACTOR_CHOLESKY;
1304: C->assembled = PETSC_TRUE;
1305: C->preallocated = PETSC_TRUE;
1306: PetscLogFlops(b->mbs);
1307: if (ndamp) {
1308: PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping);
1309: }
1310: if (nshift) {
1311: PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %d shifts\n",nshift);
1312: }
1313:
1314: return(0);
1315: }
1319: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1320: {
1322: Mat C;
1325: MatCholeskyFactorSymbolic(A,perm,info,&C);
1326: MatCholeskyFactorNumeric(A,&C);
1327: MatHeaderCopy(A,C);
1328: return(0);
1329: }