Actual source code: bdfact.c

  1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format - factorization and triangular solves */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/inline/ilu.h

 10: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
 11: {
 12:   PetscTruth   idn;
 13:   int          ierr;

 16:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 17:   if (isrow) {
 18:     ISIdentity(isrow,&idn);
 19:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 20:   }
 21:   if (iscol) {
 22:     ISIdentity(iscol,&idn);
 23:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 24:   }
 25:   if (info->levels != 0) {
 26:     SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 27:   }
 28:   MatConvert(A,MATSAME,B);

 30:   /* Must set to zero for repeated calls with different nonzero structure */
 31:   (*B)->factor = 0;
 32:   return(0);
 33: }

 37: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
 38: {
 39:   PetscTruth   idn;
 40:   int          ierr;

 43:   /* For now, no fill is allocated in symbolic factorization phase, so we
 44:      directly use the input matrix for numeric factorization. */
 45:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 46:   if (isrow) {
 47:     ISIdentity(isrow,&idn);
 48:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 49:   }
 50:   if (iscol) {
 51:     ISIdentity(iscol,&idn);
 52:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 53:   }
 54:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 55:   MatLUFactorNumeric(A,&A);
 56:   return(0);
 57: }

 59: /* --------------------------------------------------------------------------*/
 62: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
 63: {
 64:   Mat          C = *B;
 65:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
 66:   int          k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
 67:   int          dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
 68:   int          *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
 69:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],*v_work;
 70:   PetscScalar  *multiplier;

 73:   /* Copy input matrix to factored matrix if we've already factored the
 74:      matrix before AND the nonzero structure remains the same.  This is done
 75:      in symbolic factorization the first time through, but there's no symbolic
 76:      factorization for successive calls with same matrix sparsity structure. */
 77:   if (C->factor == FACTOR_LU) {
 78:     for (i=0; i<a->nd; i++) {
 79:       len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
 80:       d   = diag[i];
 81:       if (d > 0) {
 82:         PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
 83:       } else {
 84:         PetscMemcpy(dv[i],a1->diagv[i],len);
 85:       }
 86:     }
 87:   }

 89:   if (!a->pivot) {
 90:     PetscMalloc((m+1)*sizeof(int),&a->pivot);
 91:     PetscLogObjectMemory(C,m*sizeof(int));
 92:   }
 93:   PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
 94:   multiplier = v_work + bs;
 95:   PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
 96:   PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
 97:   for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
 98:   for (k=0; k<mblock; k++) { /* k = block pivot_row */
 99:     knb = k*bs; knb2 = knb*bs;
100:     /* invert the diagonal block */
101:     Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
102:     for (d=mainbd-1; d>=0; d--) {
103:       elim_row = k + diag[d];
104:       if (elim_row < mblock) { /* sweep down */
105:         /* dv[d][knb2]: test if entire block is zero? */
106:         Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
107:         for (d2=d+1; d2<nd; d2++) {
108:           elim_col = elim_row - diag[d2];
109:           if (elim_col >=0 && elim_col < nblock) {
110:             dgk = k - elim_col;
111:             if ((dnum = dgptr[dgk+mblock])) {
112:               Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
113:                              &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
114:             }
115:           }
116:         }
117:       }
118:     }
119:   }
120:   PetscFree(dgptr);
121:   PetscFree(v_work);
122:   C->factor = FACTOR_LU;
123:   return(0);
124: }

128: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
129: {
130:   Mat          C = *B;
131:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
132:   int          k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
133:   int          *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
134:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],mult;

137:   /* Copy input matrix to factored matrix if we've already factored the
138:      matrix before AND the nonzero structure remains the same.  This is done
139:      in symbolic factorization the first time through, but there's no symbolic
140:      factorization for successive calls with same matrix sparsity structure. */
141:   if (C->factor == FACTOR_LU) {
142:     for (i=0; i<nd; i++) {
143:       len = a->bdlen[i] * sizeof(PetscScalar);
144:       d   = diag[i];
145:       if (d > 0) {
146:         PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
147:       } else {
148:         PetscMemcpy(dv[i],a1->diagv[i],len);
149:       }
150:     }
151:   }

153:   PetscMalloc((m+n+1)*sizeof(int),&dgptr);
154:   PetscMemzero(dgptr,(m+n)*sizeof(int));
155:   for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
156:   for (k=0; k<m; k++) { /* k = pivot_row */
157:     dd[k] = 1.0/dd[k];
158:     for (d=mainbd-1; d>=0; d--) {
159:       elim_row = k + diag[d];
160:       if (elim_row < m) { /* sweep down */
161:         if (dv[d][elim_row] != 0.0) {
162:           dv[d][elim_row] *= dd[k];
163:           mult = dv[d][elim_row];
164:           for (d2=d+1; d2<nd; d2++) {
165:             elim_col = elim_row - diag[d2];
166:             dgk = k - elim_col;
167:             if (elim_col >=0 && elim_col < n) {
168:               if ((dnum = dgptr[dgk+m])) {
169:                 dv[d2][elim_row] -= mult * dv[dnum-1][k];
170:               }
171:             }
172:           }
173:         }
174:       }
175:     }
176:   }
177:   PetscFree(dgptr);
178:   C->factor = FACTOR_LU;
179:   return(0);
180: }

182: /* -----------------------------------------------------------------*/

186: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
187: {
188:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
189:   int          ierr,i,d,loc,mainbd = a->mainbd;
190:   int          n = A->n,m = A->m,*diag = a->diag,col;
191:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;

194:   VecGetArray(xx,&x);
195:   VecGetArray(yy,&y);
196:   /* forward solve the lower triangular part */
197:   for (i=0; i<m; i++) {
198:     sum = x[i];
199:     for (d=0; d<mainbd; d++) {
200:       loc = i - diag[d];
201:       if (loc >= 0) sum -= dv[d][i] * y[loc];
202:     }
203:     y[i] = sum;
204:   }
205:   /* backward solve the upper triangular part */
206:   for (i=m-1; i>=0; i--) {
207:     sum = y[i];
208:     for (d=mainbd+1; d<a->nd; d++) {
209:       col = i - diag[d];
210:       if (col < n) sum -= dv[d][i] * y[col];
211:     }
212:     y[i] = sum*dd[i];
213:   }
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(yy,&y);
216:   PetscLogFlops(2*a->nz - A->n);
217:   return(0);
218: }

222: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
223: {
224:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
225:   int          i,d,loc,mainbd = a->mainbd;
226:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
227:   int          ierr,m = A->m,*diag = a->diag,col;
228:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
229:   PetscScalar  w0,w1,sum0,sum1;

232:   VecGetArray(xx,&x);
233:   VecGetArray(yy,&y);
234:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

236:   /* forward solve the lower triangular part */
237:   if (mainbd != 0) {
238:     inb = 0;
239:     for (i=0; i<mblock; i++) {
240:       sum0 = sum1 = 0.0;
241:       for (d=0; d<mainbd; d++) {
242:         loc = 2*(i - diag[d]);
243:         if (loc >= 0) {
244:           dvt = &dv[d][4*i];
245:           w0 = y[loc]; w1 = y[loc+1];
246:           sum0 += dvt[0]*w0 + dvt[2]*w1;
247:           sum1 += dvt[1]*w0 + dvt[3]*w1;
248:         }
249:       }
250:       y[inb] -= sum0; y[inb+1] -= sum1;

252:       inb += 2;
253:     }
254:   }
255:   /* backward solve the upper triangular part */
256:   inb = 2*(mblock-1); inb2 = 2*inb;
257:   for (i=mblock-1; i>=0; i--) {
258:     sum0 = y[inb]; sum1 = y[inb+1];
259:     for (d=mainbd+1; d<a->nd; d++) {
260:       col = 2*(i - diag[d]);
261:       if (col < 2*nblock) {
262:         dvt = &dv[d][4*i];
263:         w0 = y[col]; w1 = y[col+1];
264:         sum0 -= dvt[0]*w0 + dvt[2]*w1;
265:         sum1 -= dvt[1]*w0 + dvt[3]*w1;
266:       }
267:     }
268:     dvt = dd+inb2;
269:     y[inb]   = dvt[0]*sum0 + dvt[2]*sum1;
270:     y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
271:     inb -= 2; inb2 -= 4;
272:   }
273:   VecRestoreArray(xx,&x);
274:   VecRestoreArray(yy,&y);
275:   PetscLogFlops(2*a->nz - A->n);
276:   return(0);
277: }

281: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
282: {
283:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
284:   int          i,d,loc,mainbd = a->mainbd;
285:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
286:   int          ierr,m = A->m,*diag = a->diag,col;
287:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
288:   PetscScalar  w0,w1,w2,sum0,sum1,sum2;

291:   VecGetArray(xx,&x);
292:   VecGetArray(yy,&y);
293:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

295:   /* forward solve the lower triangular part */
296:   if (mainbd != 0) {
297:     inb = 0;
298:     for (i=0; i<mblock; i++) {
299:       sum0 = sum1 = sum2 = 0.0;
300:       for (d=0; d<mainbd; d++) {
301:         loc = 3*(i - diag[d]);
302:         if (loc >= 0) {
303:           dvt = &dv[d][9*i];
304:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
305:           sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
306:           sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
307:           sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
308:         }
309:       }
310:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
311:       inb += 3;
312:     }
313:   }
314:   /* backward solve the upper triangular part */
315:   inb = 3*(mblock-1); inb2 = 3*inb;
316:   for (i=mblock-1; i>=0; i--) {
317:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2];
318:     for (d=mainbd+1; d<a->nd; d++) {
319:       col = 3*(i - diag[d]);
320:       if (col < 3*nblock) {
321:         dvt = &dv[d][9*i];
322:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
323:         sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
324:         sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
325:         sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
326:       }
327:     }
328:     dvt = dd+inb2;
329:     y[inb]   = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
330:     y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
331:     y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
332:     inb -= 3; inb2 -= 9;
333:   }
334:   VecRestoreArray(xx,&x);
335:   VecRestoreArray(yy,&y);
336:   PetscLogFlops(2*a->nz - A->n);
337:   return(0);
338: }

342: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
343: {
344:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
345:   int          i,d,loc,mainbd = a->mainbd;
346:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
347:   int          ierr,m = A->m,*diag = a->diag,col;
348:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
349:   PetscScalar  w0,w1,w2,w3,sum0,sum1,sum2,sum3;

352:   VecGetArray(xx,&x);
353:   VecGetArray(yy,&y);
354:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

356:   /* forward solve the lower triangular part */
357:   if (mainbd != 0) {
358:     inb = 0;
359:     for (i=0; i<mblock; i++) {
360:       sum0 = sum1 = sum2 = sum3 = 0.0;
361:       for (d=0; d<mainbd; d++) {
362:         loc = 4*(i - diag[d]);
363:         if (loc >= 0) {
364:           dvt = &dv[d][16*i];
365:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
366:           sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
367:           sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
368:           sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
369:           sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
370:         }
371:       }
372:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
373:       inb += 4;
374:     }
375:   }
376:   /* backward solve the upper triangular part */
377:   inb = 4*(mblock-1); inb2 = 4*inb;
378:   for (i=mblock-1; i>=0; i--) {
379:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2]; sum3 =  y[inb+3];
380:     for (d=mainbd+1; d<a->nd; d++) {
381:       col = 4*(i - diag[d]);
382:       if (col < 4*nblock) {
383:         dvt = &dv[d][16*i];
384:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
385:         sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
386:         sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
387:         sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
388:         sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
389:       }
390:     }
391:     dvt = dd+inb2;
392:     y[inb]   = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
393:     y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
394:     y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
395:     y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
396:     inb -= 4; inb2 -= 16;
397:   }
398:   VecRestoreArray(xx,&x);
399:   VecRestoreArray(yy,&y);
400:   PetscLogFlops(2*a->nz - A->n);
401:   return(0);
402: }

406: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
407: {
408:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
409:   int          i,d,loc,mainbd = a->mainbd;
410:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
411:   int          ierr,m = A->m,*diag = a->diag,col;
412:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
413:   PetscScalar  w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;

416:   VecGetArray(xx,&x);
417:   VecGetArray(yy,&y);
418:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

420:   /* forward solve the lower triangular part */
421:   if (mainbd != 0) {
422:     inb = 0;
423:     for (i=0; i<mblock; i++) {
424:       sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
425:       for (d=0; d<mainbd; d++) {
426:         loc = 5*(i - diag[d]);
427:         if (loc >= 0) {
428:           dvt = &dv[d][25*i];
429:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
430:           sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
431:           sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
432:           sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
433:           sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
434:           sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
435:         }
436:       }
437:       y[inb]   -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
438:       y[inb+4] -= sum4;
439:       inb += 5;
440:     }
441:   }
442:   /* backward solve the upper triangular part */
443:   inb = 5*(mblock-1); inb2 = 5*inb;
444:   for (i=mblock-1; i>=0; i--) {
445:     sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
446:     for (d=mainbd+1; d<a->nd; d++) {
447:       col = 5*(i - diag[d]);
448:       if (col < 5*nblock) {
449:         dvt = &dv[d][25*i];
450:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
451:         sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
452:         sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
453:         sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
454:         sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
455:         sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
456:       }
457:     }
458:     dvt = dd+inb2;
459:     y[inb]   = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
460:                + dvt[20]*sum4;
461:     y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
462:                + dvt[21]*sum4;
463:     y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
464:                + dvt[22]*sum4;
465:     y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
466:                + dvt[23]*sum4;
467:     y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
468:                + dvt[24]*sum4;
469:     inb -= 5; inb2 -= 25;
470:   }
471:   VecRestoreArray(xx,&x);
472:   VecRestoreArray(yy,&y);
473:   PetscLogFlops(2*a->nz - A->n);
474:   return(0);
475: }

479: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
480: {
481:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
482:   int          i,d,loc,mainbd = a->mainbd;
483:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
484:   int          ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
485:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
486:   PetscScalar  work[25];

489:   VecGetArray(xx,&x);
490:   VecGetArray(yy,&y);
491:   if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
492:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

494:   /* forward solve the lower triangular part */
495:   if (mainbd != 0) {
496:     inb = 0;
497:     for (i=0; i<mblock; i++) {
498:       for (d=0; d<mainbd; d++) {
499:         loc = i - diag[d];
500:         if (loc >= 0) {
501:           Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
502:         }
503:       }
504:       inb += bs;
505:     }
506:   }
507:   /* backward solve the upper triangular part */
508:   inb = bs*(mblock-1); inb2 = inb*bs;
509:   for (i=mblock-1; i>=0; i--) {
510:     for (d=mainbd+1; d<a->nd; d++) {
511:       col = i - diag[d];
512:       if (col < nblock) {
513:         Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
514:       }
515:     }
516:     Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
517:     PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
518:     inb -= bs; inb2 -= bs2;
519:   }
520:   VecRestoreArray(xx,&x);
521:   VecRestoreArray(yy,&y);
522:   PetscLogFlops(2*a->nz - A->n);
523:   return(0);
524: }