Actual source code: sbaijfact.c

  1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/mat/impls/sbaij/seq/sbaij.h
 5:  #include src/inline/ilu.h
 6:  #include include/petscis.h

  8: #if !defined(PETSC_USE_COMPLEX)
  9: /* 
 10:   input:
 11:    F -- numeric factor 
 12:   output:
 13:    nneg, nzero, npos: matrix inertia 
 14: */

 18: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
 19: {
 20:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 21:   PetscScalar  *dd = fact_ptr->a;
 22:   int          mbs=fact_ptr->mbs,bs=fact_ptr->bs,i,nneig_tmp,npos_tmp,
 23:                *fi = fact_ptr->i;

 26:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %d >1 yet",bs);
 27:   nneig_tmp = 0; npos_tmp = 0;
 28:   for (i=0; i<mbs; i++){
 29:     if (PetscRealPart(dd[*fi]) > 0.0){
 30:       npos_tmp++;
 31:     } else if (PetscRealPart(dd[*fi]) < 0.0){
 32:       nneig_tmp++;
 33:     }
 34:     fi++;
 35:   }
 36:   if (nneig) *nneig = nneig_tmp;
 37:   if (npos)  *npos  = npos_tmp;
 38:   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;

 40:   return(0);
 41: }
 42: #endif /* !defined(PETSC_USE_COMPLEX) */

 44: /* Using Modified Sparse Row (MSR) storage.
 45: See page 85, "Iterative Methods ..." by Saad. */
 46: /*
 47:     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 48: */
 49: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
 52: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
 53: {
 54:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
 55:   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
 56:   int          *jutmp,bs = a->bs,bs2=a->bs2;
 57:   int          m,realloc = 0,prow;
 58:   int          *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 59:   int          *il,ili,nextprow;
 60:   PetscReal    f = info->fill;
 61:   PetscTruth   perm_identity;

 64:   /* check whether perm is the identity mapping */
 65:   ISIdentity(perm,&perm_identity);

 67:   /* -- inplace factorization, i.e., use sbaij for *B -- */
 68:   if (perm_identity && bs==1 ){
 69:     if (!perm_identity) a->permute = PETSC_TRUE;
 70: 
 71:   ISGetIndices(perm,&rip);
 72: 
 73:   if (perm_identity){ /* without permutation */
 74:     ai = a->i; aj = a->j;
 75:   } else {            /* non-trivial permutation */
 76:     MatReorderingSeqSBAIJ(A,perm);
 77:     ai = a->inew; aj = a->jnew;
 78:   }
 79: 
 80:   /* initialization */
 81:   PetscMalloc((mbs+1)*sizeof(int),&iu);
 82:   umax  = (int)(f*ai[mbs] + 1);
 83:   PetscMalloc(umax*sizeof(int),&ju);
 84:   iu[0] = 0;
 85:   juidx = 0; /* index for ju */
 86:   PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
 87:   q     = jl + mbs;   /* linked list for col index of active row */
 88:   il    = q  + mbs;
 89:   for (i=0; i<mbs; i++){
 90:     jl[i] = mbs;
 91:     q[i]  = 0;
 92:     il[i] = 0;
 93:   }

 95:   /* for each row k */
 96:   for (k=0; k<mbs; k++){
 97:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 98:     q[k] = mbs;
 99:     /* initialize nonzero structure of k-th row to row rip[k] of A */
100:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
101:     jmax = ai[rip[k]+1];
102:     for (j=jmin; j<jmax; j++){
103:       vj = rip[aj[j]]; /* col. value */
104:       if(vj > k){
105:         qm = k;
106:         do {
107:           m  = qm; qm = q[m];
108:         } while(qm < vj);
109:         if (qm == vj) {
110:           SETERRQ(1," error: duplicate entry in A\n");
111:         }
112:         nzk++;
113:         q[m]  = vj;
114:         q[vj] = qm;
115:       } /* if(vj > k) */
116:     } /* for (j=jmin; j<jmax; j++) */

118:     /* modify nonzero structure of k-th row by computing fill-in
119:        for each row i to be merged in */
120:     prow = k;
121:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
122: 
123:     while (prow < k){
124:       nextprow = jl[prow];
125: 
126:       /* merge row prow into k-th row */
127:       ili = il[prow];
128:       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
129:       jmax = iu[prow+1];
130:       qm = k;
131:       for (j=jmin; j<jmax; j++){
132:         vj = ju[j];
133:         do {
134:           m = qm; qm = q[m];
135:         } while (qm < vj);
136:         if (qm != vj){  /* a fill */
137:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
138:         }
139:       } /* end of for (j=jmin; j<jmax; j++) */
140:       if (jmin < jmax){
141:         il[prow] = jmin;
142:         j = ju[jmin];
143:         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
144:       }
145:       prow = nextprow;
146:     }
147: 
148:     /* update il and jl */
149:     if (nzk > 0){
150:       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
151:       jl[k] = jl[i]; jl[i] = k;
152:       il[k] = iu[k] + 1;
153:     }
154:     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */

156:     /* allocate more space to ju if needed */
157:     if (iu[k+1] > umax) {
158:       /* estimate how much additional space we will need */
159:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
160:       /* just double the memory each time */
161:       maxadd = umax;
162:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
163:       umax += maxadd;

165:       /* allocate a longer ju */
166:       PetscMalloc(umax*sizeof(int),&jutmp);
167:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
168:       PetscFree(ju);
169:       ju   = jutmp;
170:       realloc++; /* count how many times we realloc */
171:     }

173:     /* save nonzero structure of k-th row in ju */
174:     ju[juidx++] = k; /* diag[k] */
175:     i = k;
176:     while (nzk --) {
177:       i           = q[i];
178:       ju[juidx++] = i;
179:     }
180:   }

182:   if (ai[mbs] != 0) {
183:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
184:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
185:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
186:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
187:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
188:   } else {
189:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
190:   }

192:   ISRestoreIndices(perm,&rip);
193:   /* PetscFree(q); */
194:   PetscFree(jl);

196:   /* put together the new matrix */
197:   MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
198:   MatSetType(*B,A->type_name);
199:   MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);

201:   /* PetscLogObjectParent(*B,iperm); */
202:   b = (Mat_SeqSBAIJ*)(*B)->data;
203:   PetscFree(b->imax);
204:   b->singlemalloc = PETSC_FALSE;
205:   /* the next line frees the default space generated by the Create() */
206:   PetscFree(b->a);
207:   PetscFree(b->ilen);
208:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
209:   b->j    = ju;
210:   b->i    = iu;
211:   b->diag = 0;
212:   b->ilen = 0;
213:   b->imax = 0;
214:   b->row  = perm;
215:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
216:   PetscObjectReference((PetscObject)perm);
217:   b->icol = perm;
218:   PetscObjectReference((PetscObject)perm);
219:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
220:   /* In b structure:  Free imax, ilen, old a, old j.  
221:      Allocate idnew, solve_work, new a, new j */
222:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
223:   b->maxnz = b->nz = iu[mbs];
224: 
225:   (*B)->factor                 = FACTOR_CHOLESKY;
226:   (*B)->info.factor_mallocs    = realloc;
227:   (*B)->info.fill_ratio_given  = f;
228:   if (ai[mbs] != 0) {
229:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
230:   } else {
231:     (*B)->info.fill_ratio_needed = 0.0;
232:   }


235:   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
236:   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
237:   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
238: 
239:   return(0);
240:   }
241:   /* -----------  end of new code --------------------*/


244:   if (!perm_identity) a->permute = PETSC_TRUE;
245: 
246:   ISGetIndices(perm,&rip);
247: 
248:   if (perm_identity){ /* without permutation */
249:     ai = a->i; aj = a->j;
250:   } else {            /* non-trivial permutation */
251:     MatReorderingSeqSBAIJ(A,perm);
252:     ai = a->inew; aj = a->jnew;
253:   }
254: 
255:   /* initialization */
256:   PetscMalloc((mbs+1)*sizeof(int),&iu);
257:   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
258:   PetscMalloc(umax*sizeof(int),&ju);
259:   iu[0] = mbs+1;
260:   juidx = mbs + 1; /* index for ju */
261:   PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
262:   q     = jl + mbs;   /* linked list for col index */
263:   for (i=0; i<mbs; i++){
264:     jl[i] = mbs;
265:     q[i] = 0;
266:   }

268:   /* for each row k */
269:   for (k=0; k<mbs; k++){
270:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
271:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272:     q[k] = mbs;
273:     /* initialize nonzero structure of k-th row to row rip[k] of A */
274:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
275:     jmax = ai[rip[k]+1];
276:     for (j=jmin; j<jmax; j++){
277:       vj = rip[aj[j]]; /* col. value */
278:       if(vj > k){
279:         qm = k;
280:         do {
281:           m  = qm; qm = q[m];
282:         } while(qm < vj);
283:         if (qm == vj) {
284:           SETERRQ(1," error: duplicate entry in A\n");
285:         }
286:         nzk++;
287:         q[m]  = vj;
288:         q[vj] = qm;
289:       } /* if(vj > k) */
290:     } /* for (j=jmin; j<jmax; j++) */

292:     /* modify nonzero structure of k-th row by computing fill-in
293:        for each row i to be merged in */
294:     prow = k;
295:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296: 
297:     while (prow < k){
298:       /* merge row prow into k-th row */
299:       jmin = iu[prow] + 1; jmax = iu[prow+1];
300:       qm = k;
301:       for (j=jmin; j<jmax; j++){
302:         vj = ju[j];
303:         do {
304:           m = qm; qm = q[m];
305:         } while (qm < vj);
306:         if (qm != vj){
307:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308:         }
309:       }
310:       prow = jl[prow]; /* next pivot row */
311:     }
312: 
313:     /* add k to row list for first nonzero element in k-th row */
314:     if (nzk > 0){
315:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316:       jl[k] = jl[i]; jl[i] = k;
317:     }
318:     iu[k+1] = iu[k] + nzk;

320:     /* allocate more space to ju if needed */
321:     if (iu[k+1] > umax) {
322:       /* estimate how much additional space we will need */
323:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324:       /* just double the memory each time */
325:       maxadd = umax;
326:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327:       umax += maxadd;

329:       /* allocate a longer ju */
330:       PetscMalloc(umax*sizeof(int),&jutmp);
331:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
332:       PetscFree(ju);
333:       ju   = jutmp;
334:       realloc++; /* count how many times we realloc */
335:     }

337:     /* save nonzero structure of k-th row in ju */
338:     i=k;
339:     while (nzk --) {
340:       i           = q[i];
341:       ju[juidx++] = i;
342:     }
343:   }

345:   if (ai[mbs] != 0) {
346:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
348:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
349:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
350:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
351:   } else {
352:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
353:   }

355:   ISRestoreIndices(perm,&rip);
356:   /* PetscFree(q); */
357:   PetscFree(jl);

359:   /* put together the new matrix */
360:   MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
361:   MatSetType(*B,A->type_name);
362:   MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);

364:   /* PetscLogObjectParent(*B,iperm); */
365:   b = (Mat_SeqSBAIJ*)(*B)->data;
366:   PetscFree(b->imax);
367:   b->singlemalloc = PETSC_FALSE;
368:   /* the next line frees the default space generated by the Create() */
369:   PetscFree(b->a);
370:   PetscFree(b->ilen);
371:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
372:   b->j    = ju;
373:   b->i    = iu;
374:   b->diag = 0;
375:   b->ilen = 0;
376:   b->imax = 0;
377:   b->row  = perm;
378:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
379:   PetscObjectReference((PetscObject)perm);
380:   b->icol = perm;
381:   PetscObjectReference((PetscObject)perm);
382:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
383:   /* In b structure:  Free imax, ilen, old a, old j.  
384:      Allocate idnew, solve_work, new a, new j */
385:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
386:   b->maxnz = b->nz = iu[mbs];
387: 
388:   (*B)->factor                 = FACTOR_CHOLESKY;
389:   (*B)->info.factor_mallocs    = realloc;
390:   (*B)->info.fill_ratio_given  = f;
391:   if (ai[mbs] != 0) {
392:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
393:   } else {
394:     (*B)->info.fill_ratio_needed = 0.0;
395:   }

397:   if (perm_identity){
398:     switch (bs) {
399:       case 1:
400:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
401:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
402:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
403:         break;
404:       case 2:
405:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
406:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
407:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
408:         break;
409:       case 3:
410:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
411:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
412:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
413:         break;
414:       case 4:
415:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
416:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
417:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
418:         break;
419:       case 5:
420:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
421:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
422:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
423:         break;
424:       case 6:
425:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
426:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
427:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
428:         break;
429:       case 7:
430:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
431:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
432:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
433:       break;
434:       default:
435:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
436:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
437:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
438:       break;
439:     }
440:   }

442:   return(0);
443: }


448: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
449: {
450:   Mat                C = *B;
451:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
452:   IS                 perm = b->row;
453:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
454:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
455:   int                bs=a->bs,bs2 = a->bs2;
456:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
457:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
458:   MatScalar          *work;
459:   int                *pivots;


463:   /* initialization */
464:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
465:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
466:   PetscMalloc(2*mbs*sizeof(int),&il);
467:   jl   = il + mbs;
468:   for (i=0; i<mbs; i++) {
469:     jl[i] = mbs; il[0] = 0;
470:   }
471:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
472:   uik  = dk + bs2;
473:   work = uik + bs2;
474:   PetscMalloc(bs*sizeof(int),&pivots);
475: 
476:   ISGetIndices(perm,&perm_ptr);
477: 
478:   /* check permutation */
479:   if (!a->permute){
480:     ai = a->i; aj = a->j; aa = a->a;
481:   } else {
482:     ai   = a->inew; aj = a->jnew;
483:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
484:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
485:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
486:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

488:     for (i=0; i<mbs; i++){
489:       jmin = ai[i]; jmax = ai[i+1];
490:       for (j=jmin; j<jmax; j++){
491:         while (a2anew[j] != j){
492:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
493:           for (k1=0; k1<bs2; k1++){
494:             dk[k1]       = aa[k*bs2+k1];
495:             aa[k*bs2+k1] = aa[j*bs2+k1];
496:             aa[j*bs2+k1] = dk[k1];
497:           }
498:         }
499:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
500:         if (i > aj[j]){
501:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
502:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
503:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
504:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
505:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
506:           }
507:         }
508:       }
509:     }
510:     PetscFree(a2anew);
511:   }
512: 
513:   /* for each row k */
514:   for (k = 0; k<mbs; k++){

516:     /*initialize k-th row with elements nonzero in row perm(k) of A */
517:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
518: 
519:     ap = aa + jmin*bs2;
520:     for (j = jmin; j < jmax; j++){
521:       vj = perm_ptr[aj[j]];         /* block col. index */
522:       rtmp_ptr = rtmp + vj*bs2;
523:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
524:     }

526:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
527:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
528:     i = jl[k]; /* first row to be added to k_th row  */

530:     while (i < k){
531:       nexti = jl[i]; /* next row to be added to k_th row */

533:       /* compute multiplier */
534:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

536:       /* uik = -inv(Di)*U_bar(i,k) */
537:       diag = ba + i*bs2;
538:       u    = ba + ili*bs2;
539:       PetscMemzero(uik,bs2*sizeof(MatScalar));
540:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
541: 
542:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
543:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
544: 
545:       /* update -U(i,k) */
546:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

548:       /* add multiple of row i to k-th row ... */
549:       jmin = ili + 1; jmax = bi[i+1];
550:       if (jmin < jmax){
551:         for (j=jmin; j<jmax; j++) {
552:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
553:           rtmp_ptr = rtmp + bj[j]*bs2;
554:           u = ba + j*bs2;
555:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
556:         }
557: 
558:         /* ... add i to row list for next nonzero entry */
559:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
560:         j     = bj[jmin];
561:         jl[i] = jl[j]; jl[j] = i; /* update jl */
562:       }
563:       i = nexti;
564:     }

566:     /* save nonzero entries in k-th row of U ... */

568:     /* invert diagonal block */
569:     diag = ba+k*bs2;
570:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
571:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
572: 
573:     jmin = bi[k]; jmax = bi[k+1];
574:     if (jmin < jmax) {
575:       for (j=jmin; j<jmax; j++){
576:          vj = bj[j];           /* block col. index of U */
577:          u   = ba + j*bs2;
578:          rtmp_ptr = rtmp + vj*bs2;
579:          for (k1=0; k1<bs2; k1++){
580:            *u++        = *rtmp_ptr;
581:            *rtmp_ptr++ = 0.0;
582:          }
583:       }
584: 
585:       /* ... add k to row list for first nonzero entry in k-th row */
586:       il[k] = jmin;
587:       i     = bj[jmin];
588:       jl[k] = jl[i]; jl[i] = k;
589:     }
590:   }

592:   PetscFree(rtmp);
593:   PetscFree(il);
594:   PetscFree(dk);
595:   PetscFree(pivots);
596:   if (a->permute){
597:     PetscFree(aa);
598:   }

600:   ISRestoreIndices(perm,&perm_ptr);
601:   C->factor       = FACTOR_CHOLESKY;
602:   C->assembled    = PETSC_TRUE;
603:   C->preallocated = PETSC_TRUE;
604:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
605:   return(0);
606: }

610: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
611: {
612:   Mat                C = *B;
613:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
614:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
615:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
616:   int                bs=a->bs,bs2 = a->bs2;
617:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
618:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
619:   MatScalar          *work;
620:   int                *pivots;


624:   /* initialization */
625: 
626:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
627:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
628:   PetscMalloc(2*mbs*sizeof(int),&il);
629:   jl   = il + mbs;
630:   for (i=0; i<mbs; i++) {
631:     jl[i] = mbs; il[0] = 0;
632:   }
633:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
634:   uik  = dk + bs2;
635:   work = uik + bs2;
636:   PetscMalloc(bs*sizeof(int),&pivots);
637: 
638:   ai = a->i; aj = a->j; aa = a->a;
639: 
640:   /* for each row k */
641:   for (k = 0; k<mbs; k++){

643:     /*initialize k-th row with elements nonzero in row k of A */
644:     jmin = ai[k]; jmax = ai[k+1];
645:     ap = aa + jmin*bs2;
646:     for (j = jmin; j < jmax; j++){
647:       vj = aj[j];         /* block col. index */
648:       rtmp_ptr = rtmp + vj*bs2;
649:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
650:     }

652:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
653:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
654:     i = jl[k]; /* first row to be added to k_th row  */

656:     while (i < k){
657:       nexti = jl[i]; /* next row to be added to k_th row */

659:       /* compute multiplier */
660:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

662:       /* uik = -inv(Di)*U_bar(i,k) */
663:       diag = ba + i*bs2;
664:       u    = ba + ili*bs2;
665:       PetscMemzero(uik,bs2*sizeof(MatScalar));
666:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
667: 
668:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
669:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
670: 
671:       /* update -U(i,k) */
672:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

674:       /* add multiple of row i to k-th row ... */
675:       jmin = ili + 1; jmax = bi[i+1];
676:       if (jmin < jmax){
677:         for (j=jmin; j<jmax; j++) {
678:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
679:           rtmp_ptr = rtmp + bj[j]*bs2;
680:           u = ba + j*bs2;
681:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
682:         }
683: 
684:         /* ... add i to row list for next nonzero entry */
685:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
686:         j     = bj[jmin];
687:         jl[i] = jl[j]; jl[j] = i; /* update jl */
688:       }
689:       i = nexti;
690:     }

692:     /* save nonzero entries in k-th row of U ... */

694:     /* invert diagonal block */
695:     diag = ba+k*bs2;
696:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
697:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
698: 
699:     jmin = bi[k]; jmax = bi[k+1];
700:     if (jmin < jmax) {
701:       for (j=jmin; j<jmax; j++){
702:          vj = bj[j];           /* block col. index of U */
703:          u   = ba + j*bs2;
704:          rtmp_ptr = rtmp + vj*bs2;
705:          for (k1=0; k1<bs2; k1++){
706:            *u++        = *rtmp_ptr;
707:            *rtmp_ptr++ = 0.0;
708:          }
709:       }
710: 
711:       /* ... add k to row list for first nonzero entry in k-th row */
712:       il[k] = jmin;
713:       i     = bj[jmin];
714:       jl[k] = jl[i]; jl[i] = k;
715:     }
716:   }

718:   PetscFree(rtmp);
719:   PetscFree(il);
720:   PetscFree(dk);
721:   PetscFree(pivots);

723:   C->factor    = FACTOR_CHOLESKY;
724:   C->assembled = PETSC_TRUE;
725:   C->preallocated = PETSC_TRUE;
726:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
727:   return(0);
728: }

730: /*
731:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
732:     Version for blocks 2 by 2.
733: */
736: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
737: {
738:   Mat                C = *B;
739:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
740:   IS                 perm = b->row;
741:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
742:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
743:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
744:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

747: 
748:   /* initialization */
749:   /* il and jl record the first nonzero element in each row of the accessing 
750:      window U(0:k, k:mbs-1).
751:      jl:    list of rows to be added to uneliminated rows 
752:             i>= k: jl(i) is the first row to be added to row i
753:             i<  k: jl(i) is the row following row i in some list of rows
754:             jl(i) = mbs indicates the end of a list                        
755:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
756:             row i of U */
757:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
758:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
759:   PetscMalloc(2*mbs*sizeof(int),&il);
760:   jl   = il + mbs;
761:   for (i=0; i<mbs; i++) {
762:     jl[i] = mbs; il[0] = 0;
763:   }
764:   PetscMalloc(8*sizeof(MatScalar),&dk);
765:   uik  = dk + 4;
766:   ISGetIndices(perm,&perm_ptr);

768:   /* check permutation */
769:   if (!a->permute){
770:     ai = a->i; aj = a->j; aa = a->a;
771:   } else {
772:     ai   = a->inew; aj = a->jnew;
773:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
774:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
775:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
776:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

778:     for (i=0; i<mbs; i++){
779:       jmin = ai[i]; jmax = ai[i+1];
780:       for (j=jmin; j<jmax; j++){
781:         while (a2anew[j] != j){
782:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
783:           for (k1=0; k1<4; k1++){
784:             dk[k1]       = aa[k*4+k1];
785:             aa[k*4+k1] = aa[j*4+k1];
786:             aa[j*4+k1] = dk[k1];
787:           }
788:         }
789:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
790:         if (i > aj[j]){
791:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
792:           ap = aa + j*4;     /* ptr to the beginning of the block */
793:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
794:           ap[1] = ap[2];
795:           ap[2] = dk[1];
796:         }
797:       }
798:     }
799:     PetscFree(a2anew);
800:   }

802:   /* for each row k */
803:   for (k = 0; k<mbs; k++){

805:     /*initialize k-th row with elements nonzero in row perm(k) of A */
806:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
807:     ap = aa + jmin*4;
808:     for (j = jmin; j < jmax; j++){
809:       vj = perm_ptr[aj[j]];         /* block col. index */
810:       rtmp_ptr = rtmp + vj*4;
811:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
812:     }

814:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
815:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
816:     i = jl[k]; /* first row to be added to k_th row  */

818:     while (i < k){
819:       nexti = jl[i]; /* next row to be added to k_th row */

821:       /* compute multiplier */
822:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

824:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
825:       diag = ba + i*4;
826:       u    = ba + ili*4;
827:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
828:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
829:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
830:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
831: 
832:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
833:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
834:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
835:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
836:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

838:       /* update -U(i,k): ba[ili] = uik */
839:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

841:       /* add multiple of row i to k-th row ... */
842:       jmin = ili + 1; jmax = bi[i+1];
843:       if (jmin < jmax){
844:         for (j=jmin; j<jmax; j++) {
845:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
846:           rtmp_ptr = rtmp + bj[j]*4;
847:           u = ba + j*4;
848:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
849:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
850:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
851:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
852:         }
853: 
854:         /* ... add i to row list for next nonzero entry */
855:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
856:         j     = bj[jmin];
857:         jl[i] = jl[j]; jl[j] = i; /* update jl */
858:       }
859:       i = nexti;
860:     }

862:     /* save nonzero entries in k-th row of U ... */

864:     /* invert diagonal block */
865:     diag = ba+k*4;
866:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
867:     Kernel_A_gets_inverse_A_2(diag);
868: 
869:     jmin = bi[k]; jmax = bi[k+1];
870:     if (jmin < jmax) {
871:       for (j=jmin; j<jmax; j++){
872:          vj = bj[j];           /* block col. index of U */
873:          u   = ba + j*4;
874:          rtmp_ptr = rtmp + vj*4;
875:          for (k1=0; k1<4; k1++){
876:            *u++        = *rtmp_ptr;
877:            *rtmp_ptr++ = 0.0;
878:          }
879:       }
880: 
881:       /* ... add k to row list for first nonzero entry in k-th row */
882:       il[k] = jmin;
883:       i     = bj[jmin];
884:       jl[k] = jl[i]; jl[i] = k;
885:     }
886:   }

888:   PetscFree(rtmp);
889:   PetscFree(il);
890:   PetscFree(dk);
891:   if (a->permute) {
892:     PetscFree(aa);
893:   }
894:   ISRestoreIndices(perm,&perm_ptr);
895:   C->factor    = FACTOR_CHOLESKY;
896:   C->assembled = PETSC_TRUE;
897:   C->preallocated = PETSC_TRUE;
898:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
899:   return(0);
900: }

902: /*
903:       Version for when blocks are 2 by 2 Using natural ordering
904: */
907: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
908: {
909:   Mat                C = *B;
910:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
911:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
912:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
913:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
914:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

917: 
918:   /* initialization */
919:   /* il and jl record the first nonzero element in each row of the accessing 
920:      window U(0:k, k:mbs-1).
921:      jl:    list of rows to be added to uneliminated rows 
922:             i>= k: jl(i) is the first row to be added to row i
923:             i<  k: jl(i) is the row following row i in some list of rows
924:             jl(i) = mbs indicates the end of a list                        
925:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
926:             row i of U */
927:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
928:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
929:   PetscMalloc(2*mbs*sizeof(int),&il);
930:   jl   = il + mbs;
931:   for (i=0; i<mbs; i++) {
932:     jl[i] = mbs; il[0] = 0;
933:   }
934:   PetscMalloc(8*sizeof(MatScalar),&dk);
935:   uik  = dk + 4;
936: 
937:   ai = a->i; aj = a->j; aa = a->a;

939:   /* for each row k */
940:   for (k = 0; k<mbs; k++){

942:     /*initialize k-th row with elements nonzero in row k of A */
943:     jmin = ai[k]; jmax = ai[k+1];
944:     ap = aa + jmin*4;
945:     for (j = jmin; j < jmax; j++){
946:       vj = aj[j];         /* block col. index */
947:       rtmp_ptr = rtmp + vj*4;
948:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
949:     }
950: 
951:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
952:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
953:     i = jl[k]; /* first row to be added to k_th row  */

955:     while (i < k){
956:       nexti = jl[i]; /* next row to be added to k_th row */

958:       /* compute multiplier */
959:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

961:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
962:       diag = ba + i*4;
963:       u    = ba + ili*4;
964:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
965:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
966:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
967:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
968: 
969:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
970:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
971:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
972:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
973:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

975:       /* update -U(i,k): ba[ili] = uik */
976:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

978:       /* add multiple of row i to k-th row ... */
979:       jmin = ili + 1; jmax = bi[i+1];
980:       if (jmin < jmax){
981:         for (j=jmin; j<jmax; j++) {
982:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983:           rtmp_ptr = rtmp + bj[j]*4;
984:           u = ba + j*4;
985:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
986:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
987:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
988:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
989:         }
990: 
991:         /* ... add i to row list for next nonzero entry */
992:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
993:         j     = bj[jmin];
994:         jl[i] = jl[j]; jl[j] = i; /* update jl */
995:       }
996:       i = nexti;
997:     }

999:     /* save nonzero entries in k-th row of U ... */

1001:     /* invert diagonal block */
1002:     diag = ba+k*4;
1003:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1004:     Kernel_A_gets_inverse_A_2(diag);
1005: 
1006:     jmin = bi[k]; jmax = bi[k+1];
1007:     if (jmin < jmax) {
1008:       for (j=jmin; j<jmax; j++){
1009:          vj = bj[j];           /* block col. index of U */
1010:          u   = ba + j*4;
1011:          rtmp_ptr = rtmp + vj*4;
1012:          for (k1=0; k1<4; k1++){
1013:            *u++        = *rtmp_ptr;
1014:            *rtmp_ptr++ = 0.0;
1015:          }
1016:       }
1017: 
1018:       /* ... add k to row list for first nonzero entry in k-th row */
1019:       il[k] = jmin;
1020:       i     = bj[jmin];
1021:       jl[k] = jl[i]; jl[i] = k;
1022:     }
1023:   }

1025:   PetscFree(rtmp);
1026:   PetscFree(il);
1027:   PetscFree(dk);

1029:   C->factor    = FACTOR_CHOLESKY;
1030:   C->assembled = PETSC_TRUE;
1031:   C->preallocated = PETSC_TRUE;
1032:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1033:   return(0);
1034: }

1036: /*
1037:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1038:     Version for blocks are 1 by 1.
1039: */
1042: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1043: {
1044:   Mat                C = *B;
1045:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1046:   IS                 ip = b->row;
1047:   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1048:   int                *ai,*aj,*r;
1049:   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
1050:   MatScalar          *rtmp;
1051:   MatScalar          *ba = b->a,*aa,ak;
1052:   MatScalar          dk,uikdi;

1055:   ISGetIndices(ip,&rip);
1056:   if (!a->permute){
1057:     ai = a->i; aj = a->j; aa = a->a;
1058:   } else {
1059:     ai = a->inew; aj = a->jnew;
1060:     PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1061:     PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1062:     PetscMalloc(ai[mbs]*sizeof(int),&r);
1063:     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));

1065:     jmin = ai[0]; jmax = ai[mbs];
1066:     for (j=jmin; j<jmax; j++){
1067:       while (r[j] != j){
1068:         k = r[j]; r[j] = r[k]; r[k] = k;
1069:         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1070:       }
1071:     }
1072:     PetscFree(r);
1073:   }
1074: 
1075:   /* initialization */
1076:   /* il and jl record the first nonzero element in each row of the accessing 
1077:      window U(0:k, k:mbs-1).
1078:      jl:    list of rows to be added to uneliminated rows 
1079:             i>= k: jl(i) is the first row to be added to row i
1080:             i<  k: jl(i) is the row following row i in some list of rows
1081:             jl(i) = mbs indicates the end of a list                        
1082:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1083:             row i of U */
1084:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1085:   PetscMalloc(2*mbs*sizeof(int),&il);
1086:   jl   = il + mbs;
1087:   for (i=0; i<mbs; i++) {
1088:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1089:   }

1091:   /* for each row k */
1092:   for (k = 0; k<mbs; k++){

1094:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1095:     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1096: 
1097:     for (j = jmin; j < jmax; j++){
1098:       vj = rip[aj[j]];
1099:       rtmp[vj] = aa[j];
1100:     }

1102:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1103:     dk = rtmp[k];
1104:     i = jl[k]; /* first row to be added to k_th row  */

1106:     while (i < k){
1107:       nexti = jl[i]; /* next row to be added to k_th row */

1109:       /* compute multiplier, update D(k) and U(i,k) */
1110:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1111:       uikdi = - ba[ili]*ba[i];
1112:       dk += uikdi*ba[ili];
1113:       ba[ili] = uikdi; /* -U(i,k) */

1115:       /* add multiple of row i to k-th row ... */
1116:       jmin = ili + 1; jmax = bi[i+1];
1117:       if (jmin < jmax){
1118:         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1119:         /* ... add i to row list for next nonzero entry */
1120:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1121:         j     = bj[jmin];
1122:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1123:       }
1124:       i = nexti;
1125:     }

1127:     /* check for zero pivot and save diagoanl element */
1128:     if (dk == 0.0){
1129:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1130:       /*
1131:     } else if (PetscRealPart(dk) < 0.0){
1132:       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);  
1133:       */
1134:     }

1136:     /* save nonzero entries in k-th row of U ... */
1137:     ba[k] = 1.0/dk;
1138:     jmin = bi[k]; jmax = bi[k+1];
1139:     if (jmin < jmax) {
1140:       for (j=jmin; j<jmax; j++){
1141:          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1142:       }
1143:       /* ... add k to row list for first nonzero entry in k-th row */
1144:       il[k] = jmin;
1145:       i     = bj[jmin];
1146:       jl[k] = jl[i]; jl[i] = k;
1147:     }
1148:   }
1149: 
1150:   PetscFree(rtmp);
1151:   PetscFree(il);
1152:   if (a->permute){
1153:     PetscFree(aa);
1154:   }

1156:   ISRestoreIndices(ip,&rip);
1157:   C->factor    = FACTOR_CHOLESKY;
1158:   C->assembled = PETSC_TRUE;
1159:   C->preallocated = PETSC_TRUE;
1160:   PetscLogFlops(b->mbs);
1161:   return(0);
1162: }

1164: /*
1165:   Version for when blocks are 1 by 1 Using natural ordering
1166: */
1169: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1170: {
1171:   Mat                C = *B;
1172:   Mat_SeqSBAIJ       *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1173:   int                ierr,i,j,mbs = a->mbs;
1174:   int                *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1175:   int                k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1176:   MatScalar          *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1177:   PetscReal          damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1178:   PetscTruth         damp,chshift;
1179:   int                nshift=0;

1182:   /* initialization */
1183:   /* il and jl record the first nonzero element in each row of the accessing 
1184:      window U(0:k, k:mbs-1).
1185:      jl:    list of rows to be added to uneliminated rows 
1186:             i>= k: jl(i) is the first row to be added to row i
1187:             i<  k: jl(i) is the row following row i in some list of rows
1188:             jl(i) = mbs indicates the end of a list                        
1189:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1190:   */
1191:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1192:   PetscMalloc(2*mbs*sizeof(int),&il);
1193:   jl   = il + mbs;

1195:   shift_amount = 0;
1196:   do {
1197:     damp = PETSC_FALSE;
1198:     chshift = PETSC_FALSE;
1199:     for (i=0; i<mbs; i++) {
1200:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1201:     }

1203:     for (k = 0; k<mbs; k++){ /* row k */
1204:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1205:       nz   = ai[k+1] - ai[k];
1206:       acol = aj + ai[k];
1207:       aval = aa + ai[k];
1208:       bval = ba + bi[k];
1209:       while (nz -- ){
1210:         rtmp[*acol++] = *aval++;
1211:         *bval++       = 0.0; /* for in-place factorization */
1212:       }
1213:       /* damp the diagonal of the matrix */
1214:       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1215: 
1216:       /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1217:       dk = rtmp[k];
1218:       i  = jl[k]; /* first row to be added to k_th row  */

1220:       while (i < k){
1221:         nexti = jl[i]; /* next row to be added to k_th row */
1222: 
1223:         /* compute multiplier, update D(k) and U(i,k) */
1224:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1225:         uikdi = - ba[ili]*ba[bi[i]];
1226:         dk   += uikdi*ba[ili];
1227:         ba[ili] = uikdi; /* -U(i,k) */

1229:         /* add multiple of row i to k-th row ... */
1230:         jmin = ili + 1;
1231:         nz   = bi[i+1] - jmin;
1232:         if (nz > 0){
1233:           bcol = bj + jmin;
1234:           bval = ba + jmin;
1235:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1236:           /* ... add i to row list for next nonzero entry */
1237:           il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1238:           j     = bj[jmin];
1239:           jl[i] = jl[j]; jl[j] = i; /* update jl */
1240:         }
1241:         i = nexti;
1242:       }

1244:       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1245:         /* calculate a shift that would make this row diagonally dominant */
1246:         PetscReal rs = PetscAbs(PetscRealPart(dk));
1247:         jmin      = bi[k]+1;
1248:         nz        = bi[k+1] - jmin;
1249:         if (nz){
1250:           bcol = bj + jmin;
1251:           bval = ba + jmin;
1252:           while (nz--){
1253:             rs += PetscAbsScalar(rtmp[*bcol++]);
1254:           }
1255:         }
1256:         /* if this shift is less than the previous, just up the previous
1257:            one by a bit */
1258:         shift_amount = PetscMax(rs,1.1*shift_amount);
1259:         chshift  = PETSC_TRUE;
1260:         /* Unlike in the ILU case there is no exit condition on nshift:
1261:            we increase the shift until it converges. There is no guarantee that
1262:            this algorithm converges faster or slower, or is better or worse
1263:            than the ILU algorithm. */
1264:         nshift++;
1265:         break;
1266:       }
1267:       if (PetscRealPart(dk) < zeropivot){
1268:         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1269:         if (damping > 0.0) {
1270:           if (ndamp) damping *= 2.0;
1271:           damp = PETSC_TRUE;
1272:           ndamp++;
1273:           break;
1274:         } else if (PetscAbsScalar(dk) < zeropivot){
1275:           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1276:         } else {
1277:           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",PetscRealPart(dk),k);
1278:         }
1279:       }
1280: 
1281:       /* save nonzero entries in k-th row of U ... */
1282:       /* printf("%d, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1283:       ba[bi[k]] = 1.0/dk;
1284:       jmin      = bi[k]+1;
1285:       nz        = bi[k+1] - jmin;
1286:       if (nz){
1287:         bcol = bj + jmin;
1288:         bval = ba + jmin;
1289:         while (nz--){
1290:           *bval++       = rtmp[*bcol];
1291:           rtmp[*bcol++] = 0.0;
1292:         }
1293:         /* ... add k to row list for first nonzero entry in k-th row */
1294:         il[k] = jmin;
1295:         i     = bj[jmin];
1296:         jl[k] = jl[i]; jl[i] = k;
1297:       }
1298:     } /* end of for (k = 0; k<mbs; k++) */
1299:   } while (damp||chshift);
1300:   PetscFree(rtmp);
1301:   PetscFree(il);
1302: 
1303:   C->factor       = FACTOR_CHOLESKY;
1304:   C->assembled    = PETSC_TRUE;
1305:   C->preallocated = PETSC_TRUE;
1306:   PetscLogFlops(b->mbs);
1307:   if (ndamp) {
1308:     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping);
1309:   }
1310:  if (nshift) {
1311:     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %d shifts\n",nshift);
1312:   }
1313: 
1314:   return(0);
1315: }

1319: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1320: {
1322:   Mat C;

1325:   MatCholeskyFactorSymbolic(A,perm,info,&C);
1326:   MatCholeskyFactorNumeric(A,&C);
1327:   MatHeaderCopy(A,C);
1328:   return(0);
1329: }