Actual source code: bdiag2.c

  1: /*$Id: bdiag2.c,v 1.21 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/inline/ilu.h

 10: int MatSetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
 11: {
 12:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
 13:   int          kk,ldiag,row,newnz,*bdlen_new;
 14:   int          j,k, *diag_new,ierr;
 15:   PetscTruth   roworiented = a->roworiented,dfound;
 16:   PetscScalar  value,**diagv_new;

 19:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 20:     row = im[kk];
 21:     if (row < 0) continue;
 22:     if (row >= A->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->M-1);
 23:     for (j=0; j<n; j++) {
 24:       if (in[j] < 0) continue;
 25:       if (in[j] >= A->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],A->N-1);
 26:       ldiag  = row - in[j]; /* diagonal number */
 27:       dfound = PETSC_FALSE;
 28:       if (roworiented) {
 29:         value = v[j + kk*n];
 30:       } else {
 31:         value = v[kk + j*m];
 32:       }
 33:       /* search diagonals for required one */
 34:       for (k=0; k<a->nd; k++) {
 35:         if (a->diag[k] == ldiag) {
 36:           dfound = PETSC_TRUE;
 37:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 38:           else                  a->diagv[k][row]  = value;
 39:           break;
 40:         }
 41:       }
 42:       if (!dfound) {
 43:         if (a->nonew || a->nonew_diag) {
 44: #if !defined(PETSC_USE_COMPLEX)
 45:           if (a->user_alloc && value) {
 46: #else
 47:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
 48: #endif
 49:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
 50:           }
 51:         } else {
 52:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
 53:           a->reallocs++;
 54:           /* free old bdiag storage info and reallocate */
 55:           PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
 56:           bdlen_new = diag_new + a->nd + 1;
 57:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
 58:           for (k=0; k<a->nd; k++) {
 59:             diag_new[k]  = a->diag[k];
 60:             diagv_new[k] = a->diagv[k];
 61:             bdlen_new[k] = a->bdlen[k];
 62:           }
 63:           diag_new[a->nd]  = ldiag;
 64:           if (ldiag > 0) { /* lower triangular */
 65:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 66:           } else {         /* upper triangular */
 67:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 68:           }
 69:           newnz = bdlen_new[a->nd];
 70:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
 71:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
 72:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 73:           if (diag_new[a->nd] > 0) {
 74:             diagv_new[a->nd] -= diag_new[a->nd];
 75:           }
 76:           a->maxnz += newnz;
 77:           a->nz    += newnz;
 78:           PetscFree(a->diagv);
 79:           PetscFree(a->diag);
 80:           a->diag  = diag_new;
 81:           a->bdlen = bdlen_new;
 82:           a->diagv = diagv_new;

 84:           /* Insert value */
 85:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 86:           else                  a->diagv[a->nd][row] = value;
 87:           a->nd++;
 88:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
 89:         }
 90:       }
 91:     }
 92:   }
 93:   return(0);
 94: }


 99: int MatSetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
100: {
101:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
102:   int          kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
103:   int          j,k,bs = a->bs,*diag_new,idx=0;
104:   PetscTruth   roworiented = a->roworiented,dfound;
105:   PetscScalar  value,**diagv_new;

108:   for (kk=0; kk<m; kk++) { /* loop over added rows */
109:     row = im[kk];
110:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
111:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
112:     shift = (row/bs)*bs*bs + row%bs;
113:     for (j=0; j<n; j++) {
114:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
115:       dfound = PETSC_FALSE;
116:       if (roworiented) {
117:         value = v[idx++];
118:       } else {
119:         value = v[kk + j*m];
120:       }
121:       /* seach for appropriate diagonal */
122:       for (k=0; k<a->nd; k++) {
123:         if (a->diag[k] == ldiag) {
124:           dfound = PETSC_TRUE;
125:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
126:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
127:           break;
128:         }
129:       }
130:       if (!dfound) {
131:         if (a->nonew || a->nonew_diag) {
132: #if !defined(PETSC_USE_COMPLEX)
133:           if (a->user_alloc && value) {
134: #else
135:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
136: #endif
137:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
138:           }
139:         } else {
140:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
141:           a->reallocs++;
142:           /* free old bdiag storage info and reallocate */
143:           PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
144:           bdlen_new = diag_new + a->nd + 1;
145:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
146:           for (k=0; k<a->nd; k++) {
147:             diag_new[k]  = a->diag[k];
148:             diagv_new[k] = a->diagv[k];
149:             bdlen_new[k] = a->bdlen[k];
150:           }
151:           diag_new[a->nd]  = ldiag;
152:           if (ldiag > 0) {/* lower triangular */
153:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
154:           } else {         /* upper triangular */
155:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
156:           }
157:           newnz = bs*bs*bdlen_new[a->nd];
158:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
159:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
160:           /* adjust pointer so that dv[diag][row] works for all diagonals */
161:           if (diag_new[a->nd] > 0) {
162:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
163:           }
164:           a->maxnz += newnz; a->nz += newnz;
165:           PetscFree(a->diagv);
166:           PetscFree(a->diag);
167:           a->diag  = diag_new;
168:           a->bdlen = bdlen_new;
169:           a->diagv = diagv_new;

171:           /* Insert value */
172:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
173:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
174:           a->nd++;
175:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
176:         }
177:       }
178:     }
179:   }
180:   return(0);
181: }

185: int MatGetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
186: {
187:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
188:   int          kk,ldiag,row,j,k;
189:   PetscScalar  zero = 0.0;
190:   PetscTruth   dfound;

193:   for (kk=0; kk<m; kk++) { /* loop over rows */
194:     row = im[kk];
195:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
196:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
197:     for (j=0; j<n; j++) {
198:       if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
199:       if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
200:       ldiag = row - in[j]; /* diagonal number */
201:       dfound = PETSC_FALSE;
202:       for (k=0; k<a->nd; k++) {
203:         if (a->diag[k] == ldiag) {
204:           dfound = PETSC_TRUE;
205:           *v++ = a->diagv[k][row];
206:           break;
207:         }
208:       }
209:       if (!dfound) *v++ = zero;
210:     }
211:   }
212:   return(0);
213: }

217: int MatGetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
218: {
219:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
220:   int          kk,ldiag,shift,row,j,k,bs = a->bs;
221:   PetscScalar  zero = 0.0;
222:   PetscTruth   dfound;

225:   for (kk=0; kk<m; kk++) { /* loop over rows */
226:     row = im[kk];
227:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
228:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
229:     shift = (row/bs)*bs*bs + row%bs;
230:     for (j=0; j<n; j++) {
231:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
232:       dfound = PETSC_FALSE;
233:       for (k=0; k<a->nd; k++) {
234:         if (a->diag[k] == ldiag) {
235:           dfound = PETSC_TRUE;
236:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
237:           break;
238:         }
239:       }
240:       if (!dfound) *v++ = zero;
241:     }
242:   }
243:   return(0);
244: }

246: /*
247:     MatMults for blocksize 1 to 5 and N -------------------------------
248:  */
251: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
252: {
253:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
254:   int             nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
255:   int             ierr,d,j,len;
256:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
257:   PetscScalar     *pvin,*pvout,*dv;

260:   VecGetArray(xx,&vin);
261:   VecGetArray(yy,&vout);
262:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
263:   for (d=0; d<nd; d++) {
264:     dv   = a_diagv[d];
265:     diag = a_diag[d];
266:     len  = a_bdlen[d];
267:     if (diag > 0) {             /* lower triangle */
268:       pvin  = vin;
269:       pvout = vout + diag;
270:       dv    = dv   + diag;
271:     } else {                     /* upper triangle,including main diagonal */
272:       pvin  = vin - diag;
273:       pvout = vout;
274:     }
275:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
276:     PetscLogFlops(2*len);
277:   }
278:   VecRestoreArray(xx,&vin);
279:   VecRestoreArray(yy,&vout);
280:   return(0);
281: }

285: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
286: {
287:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
288:   int             nd = a->nd,nb_diag;
289:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
290:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
291:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

294:   VecGetArray(xx,&vin);
295:   VecGetArray(yy,&vout);
296:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
297:   for (d=0; d<nd; d++) {
298:     dv      = a_diagv[d];
299:     nb_diag = 2*a_diag[d];
300:     len     = a_bdlen[d];
301:     if (nb_diag > 0) {                /* lower triangle */
302:       pvin  = vin;
303:       pvout = vout + nb_diag;
304:       dv    = dv   + 2*nb_diag;
305:     } else {                       /* upper triangle, including main diagonal */
306:       pvin  = vin - nb_diag;
307:       pvout = vout;
308:     }
309:     for (k=0; k<len; k++) {
310:       pvin0     = pvin[0]; pvin1 = pvin[1];

312:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
313:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

315:       pvout += 2; pvin += 2; dv += 4;
316:     }
317:     PetscLogFlops(8*len);
318:   }
319:   VecRestoreArray(xx,&vin);
320:   VecRestoreArray(yy,&vout);
321:   return(0);
322: }

326: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
327: {
328:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
329:   int             nd = a->nd,nb_diag;
330:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
331:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
332:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

335:   VecGetArray(xx,&vin);
336:   VecGetArray(yy,&vout);
337:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
338:   for (d=0; d<nd; d++) {
339:     dv      = a_diagv[d];
340:     nb_diag = 3*a_diag[d];
341:     len     = a_bdlen[d];
342:     if (nb_diag > 0) {                /* lower triangle */
343:       pvin  = vin;
344:       pvout = vout + nb_diag;
345:       dv    = dv   + 3*nb_diag;
346:     } else {                       /* upper triangle,including main diagonal */
347:       pvin  = vin - nb_diag;
348:       pvout = vout;
349:     }
350:     for (k=0; k<len; k++) {
351:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

353:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
354:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
355:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

357:       pvout += 3; pvin += 3; dv += 9;
358:     }
359:     PetscLogFlops(18*len);
360:   }
361:   VecRestoreArray(xx,&vin);
362:   VecRestoreArray(yy,&vout);
363:   return(0);
364: }

368: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
369: {
370:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
371:   int             nd = a->nd,nb_diag;
372:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
373:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
374:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

377:   VecGetArray(xx,&vin);
378:   VecGetArray(yy,&vout);
379:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
380:   for (d=0; d<nd; d++) {
381:     dv      = a_diagv[d];
382:     nb_diag = 4*a_diag[d];
383:     len     = a_bdlen[d];
384:     if (nb_diag > 0) {                /* lower triangle */
385:       pvin  = vin;
386:       pvout = vout + nb_diag;
387:       dv    = dv   + 4*nb_diag;
388:     } else {                       /* upper triangle,including main diagonal */
389:       pvin  = vin - nb_diag;
390:       pvout = vout;
391:     }
392:     for (k=0; k<len; k++) {
393:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

395:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
396:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
397:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
398:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

400:       pvout += 4; pvin += 4; dv += 16;
401:     }
402:     PetscLogFlops(32*len);
403:   }
404:   VecRestoreArray(xx,&vin);
405:   VecRestoreArray(yy,&vout);
406:   return(0);
407: }

411: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
412: {
413:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
414:   int             nd = a->nd,nb_diag;
415:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
416:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
417:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

420:   VecGetArray(xx,&vin);
421:   VecGetArray(yy,&vout);
422:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
423:   for (d=0; d<nd; d++) {
424:     dv      = a_diagv[d];
425:     nb_diag = 5*a_diag[d];
426:     len     = a_bdlen[d];
427:     if (nb_diag > 0) {                /* lower triangle */
428:       pvin  = vin;
429:       pvout = vout + nb_diag;
430:       dv    = dv   + 5*nb_diag;
431:     } else {                       /* upper triangle,including main diagonal */
432:       pvin  = vin - nb_diag;
433:       pvout = vout;
434:     }
435:     for (k=0; k<len; k++) {
436:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

438:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
439:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
440:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
441:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
442:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

444:       pvout += 5; pvin += 5; dv += 25;
445:     }
446:     PetscLogFlops(50*len);
447:   }
448:   VecRestoreArray(xx,&vin);
449:   VecRestoreArray(yy,&vout);
450:   return(0);
451: }

455: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
456: {
457:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
458:   int             nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
459:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
460:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
461:   PetscScalar     *pvin,*pvout,*dv;

464:   VecGetArray(xx,&vin);
465:   VecGetArray(yy,&vout);
466:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
467:   for (d=0; d<nd; d++) {
468:     dv      = a_diagv[d];
469:     nb_diag = bs*a_diag[d];
470:     len     = a_bdlen[d];
471:     if (nb_diag > 0) {                /* lower triangle */
472:       pvin  = vin;
473:       pvout = vout + nb_diag;
474:       dv    = dv   + bs*nb_diag;
475:     } else {                       /* upper triangle, including main diagonal */
476:       pvin  = vin - nb_diag;
477:       pvout = vout;
478:     }
479:     for (k=0; k<len; k++) {
480:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
481:       pvout += bs; pvin += bs; dv += bs2;
482:     }
483:     PetscLogFlops(2*bs2*len);
484:   }
485:   VecRestoreArray(xx,&vin);
486:   VecRestoreArray(yy,&vout);
487:   return(0);
488: }

490: /*
491:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
492:  */
495: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
496: {
497:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
498:   int             ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
499:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
500:   PetscScalar     *pvin,*pvout,*dv;

503:   if (zz != yy) {VecCopy(zz,yy);}
504:   VecGetArray(xx,&vin);
505:   VecGetArray(yy,&vout);
506:   for (d=0; d<nd; d++) {
507:     dv   = a_diagv[d];
508:     diag = a_diag[d];
509:     len  = a_bdlen[d];
510:     if (diag > 0) {             /* lower triangle */
511:       pvin  = vin;
512:       pvout = vout + diag;
513:       dv    = dv   + diag;
514:     } else {                     /* upper triangle, including main diagonal */
515:       pvin  = vin - diag;
516:       pvout = vout;
517:     }
518:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
519:     PetscLogFlops(2*len);
520:   }
521:   VecRestoreArray(xx,&vin);
522:   VecRestoreArray(yy,&vout);
523:   return(0);
524: }

528: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
529: {
530:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
531:   int             ierr,nd = a->nd,nb_diag;
532:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
533:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
534:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

537:   if (zz != yy) {VecCopy(zz,yy);}
538:   VecGetArray(xx,&vin);
539:   VecGetArray(yy,&vout);
540:   for (d=0; d<nd; d++) {
541:     dv      = a_diagv[d];
542:     nb_diag = 2*a_diag[d];
543:     len     = a_bdlen[d];
544:     if (nb_diag > 0) {                /* lower triangle */
545:       pvin  = vin;
546:       pvout = vout + nb_diag;
547:       dv    = dv   + 2*nb_diag;
548:     } else {                       /* upper triangle, including main diagonal */
549:       pvin  = vin - nb_diag;
550:       pvout = vout;
551:     }
552:     for (k=0; k<len; k++) {
553:       pvin0 = pvin[0]; pvin1 = pvin[1];

555:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
556:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

558:       pvout += 2; pvin += 2; dv += 4;
559:     }
560:     PetscLogFlops(8*len);
561:   }
562:   VecRestoreArray(xx,&vin);
563:   VecRestoreArray(yy,&vout);
564:   return(0);
565: }

569: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
570: {
571:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
572:   int             ierr,nd = a->nd,nb_diag;
573:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
574:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
575:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

578:   if (zz != yy) {VecCopy(zz,yy);}
579:   VecGetArray(xx,&vin);
580:   VecGetArray(yy,&vout);
581:   for (d=0; d<nd; d++) {
582:     dv      = a_diagv[d];
583:     nb_diag = 3*a_diag[d];
584:     len     = a_bdlen[d];
585:     if (nb_diag > 0) {                /* lower triangle */
586:       pvin  = vin;
587:       pvout = vout + nb_diag;
588:       dv    = dv   + 3*nb_diag;
589:     } else {                       /* upper triangle, including main diagonal */
590:       pvin  = vin - nb_diag;
591:       pvout = vout;
592:     }
593:     for (k=0; k<len; k++) {
594:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

596:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
597:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
598:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

600:       pvout += 3; pvin += 3; dv += 9;
601:     }
602:     PetscLogFlops(18*len);
603:   }
604:   VecRestoreArray(xx,&vin);
605:   VecRestoreArray(yy,&vout);
606:   return(0);
607: }

611: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
612: {
613:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
614:   int             ierr,nd = a->nd,nb_diag;
615:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
616:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
617:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

620:   if (zz != yy) {VecCopy(zz,yy);}
621:   VecGetArray(xx,&vin);
622:   VecGetArray(yy,&vout);
623:   for (d=0; d<nd; d++) {
624:     dv      = a_diagv[d];
625:     nb_diag = 4*a_diag[d];
626:     len     = a_bdlen[d];
627:     if (nb_diag > 0) {                /* lower triangle */
628:       pvin  = vin;
629:       pvout = vout + nb_diag;
630:       dv    = dv   + 4*nb_diag;
631:     } else {                       /* upper triangle, including main diagonal */
632:       pvin  = vin - nb_diag;
633:       pvout = vout;
634:     }
635:     for (k=0; k<len; k++) {
636:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

638:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
639:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
640:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
641:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

643:       pvout += 4; pvin += 4; dv += 16;
644:     }
645:     PetscLogFlops(32*len);
646:   }
647:   VecRestoreArray(xx,&vin);
648:   VecRestoreArray(yy,&vout);
649:   return(0);
650: }

654: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
655: {
656:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
657:   int             ierr,nd = a->nd,nb_diag;
658:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
659:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
660:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

663:   if (zz != yy) {VecCopy(zz,yy);}
664:   VecGetArray(xx,&vin);
665:   VecGetArray(yy,&vout);
666:   for (d=0; d<nd; d++) {
667:     dv      = a_diagv[d];
668:     nb_diag = 5*a_diag[d];
669:     len     = a_bdlen[d];
670:     if (nb_diag > 0) {                /* lower triangle */
671:       pvin  = vin;
672:       pvout = vout + nb_diag;
673:       dv    = dv   + 5*nb_diag;
674:     } else {                       /* upper triangle, including main diagonal */
675:       pvin  = vin - nb_diag;
676:       pvout = vout;
677:     }
678:     for (k=0; k<len; k++) {
679:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

681:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
682:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
683:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
684:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
685:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

687:       pvout += 5; pvin += 5; dv += 25;
688:     }
689:     PetscLogFlops(50*len);
690:   }
691:   VecRestoreArray(xx,&vin);
692:   VecRestoreArray(yy,&vout);
693:   return(0);
694: }

698: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
699: {
700:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
701:   int             ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
702:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
703:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
704:   PetscScalar     *pvin,*pvout,*dv;

707:   if (zz != yy) {VecCopy(zz,yy);}
708:   VecGetArray(xx,&vin);
709:   VecGetArray(yy,&vout);
710:   for (d=0; d<nd; d++) {
711:     dv      = a_diagv[d];
712:     nb_diag = bs*a_diag[d];
713:     len     = a_bdlen[d];
714:     if (nb_diag > 0) {                /* lower triangle */
715:       pvin  = vin;
716:       pvout = vout + nb_diag;
717:       dv    = dv   + bs*nb_diag;
718:     } else {                       /* upper triangle, including main diagonal */
719:       pvin  = vin - nb_diag;
720:       pvout = vout;
721:     }
722:     for (k=0; k<len; k++) {
723:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
724:       pvout += bs; pvin += bs; dv += bs2;
725:     }
726:     PetscLogFlops(2*bs2*len);
727:   }
728:   VecRestoreArray(xx,&vin);
729:   VecRestoreArray(yy,&vout);
730:   return(0);
731: }

733: /*
734:      MatMultTranspose ----------------------------------------------
735:  */
738: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
739: {
740:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
741:   int             ierr,nd = a->nd,diag,d,j,len;
742:   PetscScalar     *pvin,*pvout,*dv;
743:   PetscScalar     *vin,*vout;
744: 
746:   VecGetArray(xx,&vin);
747:   VecGetArray(yy,&vout);
748:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
749:   for (d=0; d<nd; d++) {
750:     dv   = a->diagv[d];
751:     diag = a->diag[d];
752:     len  = a->bdlen[d];
753:       /* diag of original matrix is (row/bs - col/bs) */
754:       /* diag of transpose matrix is (col/bs - row/bs) */
755:     if (diag < 0) {        /* transpose is lower triangle */
756:       pvin  = vin;
757:       pvout = vout - diag;
758:     } else {        /* transpose is upper triangle, including main diagonal */
759:       pvin  = vin + diag;
760:       pvout = vout;
761:       dv    = dv + diag;
762:     }
763:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
764:   }
765:   VecRestoreArray(xx,&vin);
766:   VecRestoreArray(yy,&vout);
767:   return(0);
768: }

772: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
773: {
774:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
775:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
776:   PetscScalar     *pvin,*pvout,*dv;
777:   PetscScalar     *vin,*vout;
778: 
780:   VecGetArray(xx,&vin);
781:   VecGetArray(yy,&vout);
782:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
783:   for (d=0; d<nd; d++) {
784:     dv   = a->diagv[d];
785:     diag = a->diag[d];
786:     len  = a->bdlen[d];
787:       /* diag of original matrix is (row/bs - col/bs) */
788:       /* diag of transpose matrix is (col/bs - row/bs) */
789:     if (diag < 0) {        /* transpose is lower triangle */
790:       pvin  = vin;
791:       pvout = vout - bs*diag;
792:     } else {        /* transpose is upper triangle, including main diagonal */
793:       pvin  = vin + bs*diag;
794:       pvout = vout;
795:       dv    = dv + diag;
796:     }
797:     for (k=0; k<len; k++) {
798:       kloc = k*bs; kshift = kloc*bs;
799:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
800:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
801:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
802:         }
803:       }
804:     }
805:   }
806:   VecRestoreArray(xx,&vin);
807:   VecRestoreArray(yy,&vout);
808:   return(0);
809: }

811: /*
812:      MatMultTransposeAdd ----------------------------------------------
813:  */
816: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
817: {
818:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
819:   int             ierr,nd = a->nd,diag,d,j,len;
820:   PetscScalar     *pvin,*pvout,*dv;
821:   PetscScalar     *vin,*vout;
822: 
824:   if (zz != yy) {VecCopy(zz,yy);}
825:   VecGetArray(xx,&vin);
826:   VecGetArray(yy,&vout);
827:   for (d=0; d<nd; d++) {
828:     dv   = a->diagv[d];
829:     diag = a->diag[d];
830:     len  = a->bdlen[d];
831:       /* diag of original matrix is (row/bs - col/bs) */
832:       /* diag of transpose matrix is (col/bs - row/bs) */
833:     if (diag < 0) {        /* transpose is lower triangle */
834:       pvin  = vin;
835:       pvout = vout - diag;
836:     } else {        /* transpose is upper triangle, including main diagonal */
837:       pvin  = vin + diag;
838:       pvout = vout;
839:       dv    = dv + diag;
840:     }
841:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
842:   }
843:   VecRestoreArray(xx,&vin);
844:   VecRestoreArray(yy,&vout);
845:   return(0);
846: }

850: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
851: {
852:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
853:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
854:   PetscScalar     *pvin,*pvout,*dv;
855:   PetscScalar     *vin,*vout;
856: 
858:   if (zz != yy) {VecCopy(zz,yy);}
859:   VecGetArray(xx,&vin);
860:   VecGetArray(yy,&vout);
861:   for (d=0; d<nd; d++) {
862:     dv   = a->diagv[d];
863:     diag = a->diag[d];
864:     len  = a->bdlen[d];
865:       /* diag of original matrix is (row/bs - col/bs) */
866:       /* diag of transpose matrix is (col/bs - row/bs) */
867:     if (diag < 0) {        /* transpose is lower triangle */
868:       pvin  = vin;
869:       pvout = vout - bs*diag;
870:     } else {        /* transpose is upper triangle, including main diagonal */
871:       pvin  = vin + bs*diag;
872:       pvout = vout;
873:       dv    = dv + diag;
874:     }
875:     for (k=0; k<len; k++) {
876:       kloc = k*bs; kshift = kloc*bs;
877:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
878:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
879:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
880:         }
881:       }
882:     }
883:   }
884:   VecRestoreArray(xx,&vin);
885:   VecRestoreArray(yy,&vout);
886:   return(0);
887: }
890: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
891: {
892:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
893:   PetscScalar  *x,*b,*xb,*dd,*dv,dval,sum;
894:   int          ierr,i,j,k,d,kbase,bs = a->bs,kloc;
895:   int          mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

898:   its = its*lits;
899:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

901:   /* Currently this code doesn't use wavefront orderings, although
902:      we should eventually incorporate that option, whatever wavefront
903:      ordering maybe :-) */

905:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

907:   VecGetArray(xx,&x);
908:   if (xx != bb) {
909:     VecGetArray(bb,&b);
910:   } else {
911:     b = x;
912:   }
913:   dd = a->diagv[mainbd];
914:   if (flag == SOR_APPLY_UPPER) {
915:     /* apply (U + D/omega) to the vector */
916:     for (k=0; k<mblock; k++) {
917:       kloc = k*bs; kbase = kloc*bs;
918:       for (i=0; i<bs; i++) {
919:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
920:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
921:         for (d=mainbd+1; d<a->nd; d++) {
922:           diag = a->diag[d];
923:           dv   = a->diagv[d];
924:           if (k-diag < mblock) {
925:             for (j=0; j<bs; j++) {
926:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
927:             }
928:           }
929:         }
930:         x[kloc+i] = sum;
931:       }
932:     }
933:     VecRestoreArray(xx,&x);
934:     if (xx != bb) {VecRestoreArray(bb,&b);}
935:     return(0);
936:   }
937:   if (flag & SOR_ZERO_INITIAL_GUESS) {
938:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
939:       for (k=0; k<mblock; k++) {
940:         kloc = k*bs; kbase = kloc*bs;
941:         for (i=0; i<bs; i++) {
942:           sum  = b[i+kloc];
943:           dval = shift + dd[i*(bs+1)+kbase];
944:           for (d=0; d<mainbd; d++) {
945:             diag = a->diag[d];
946:             dv   = a->diagv[d];
947:             if (k >= diag) {
948:               for (j=0; j<bs; j++)
949:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
950:             }
951:           }
952:           for (j=0; j<i; j++){
953:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
954:           }
955:           x[kloc+i] = omega*sum/dval;
956:         }
957:       }
958:       xb = x;
959:     } else xb = b;
960:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
961:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
962:       for (k=0; k<mblock; k++) {
963:         kloc = k*bs; kbase = kloc*bs;
964:         for (i=0; i<bs; i++)
965:           x[kloc+i] *= dd[i*(bs+1)+kbase];
966:       }
967:     }
968:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
969:       for (k=mblock-1; k>=0; k--) {
970:         kloc = k*bs; kbase = kloc*bs;
971:         for (i=bs-1; i>=0; i--) {
972:           sum  = xb[i+kloc];
973:           dval = shift + dd[i*(bs+1)+kbase];
974:           for (j=i+1; j<bs; j++)
975:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
976:           for (d=mainbd+1; d<a->nd; d++) {
977:             diag = a->diag[d];
978:             dv   = a->diagv[d];
979:             bloc = k - diag;
980:             if (bloc < mblock) {
981:               for (j=0; j<bs; j++)
982:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
983:             }
984:           }
985:           x[kloc+i] = omega*sum/dval;
986:         }
987:       }
988:     }
989:     its--;
990:   }
991:   while (its--) {
992:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
993:       for (k=0; k<mblock; k++) {
994:         kloc = k*bs; kbase = kloc*bs;
995:         for (i=0; i<bs; i++) {
996:           sum  = b[i+kloc];
997:           dval = shift + dd[i*(bs+1)+kbase];
998:           for (d=0; d<mainbd; d++) {
999:             diag = a->diag[d];
1000:             dv   = a->diagv[d];
1001:             bloc = k - diag;
1002:             if (bloc >= 0) {
1003:               for (j=0; j<bs; j++) {
1004:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1005:               }
1006:             }
1007:           }
1008:           for (d=mainbd; d<a->nd; d++) {
1009:             diag = a->diag[d];
1010:             dv   = a->diagv[d];
1011:             bloc = k - diag;
1012:             if (bloc < mblock) {
1013:               for (j=0; j<bs; j++) {
1014:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1015:               }
1016:             }
1017:           }
1018:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1019:         }
1020:       }
1021:     }
1022:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1023:       for (k=mblock-1; k>=0; k--) {
1024:         kloc = k*bs; kbase = kloc*bs;
1025:         for (i=bs-1; i>=0; i--) {
1026:           sum  = b[i+kloc];
1027:           dval = shift + dd[i*(bs+1)+kbase];
1028:           for (d=0; d<mainbd; d++) {
1029:             diag = a->diag[d];
1030:             dv   = a->diagv[d];
1031:             bloc = k - diag;
1032:             if (bloc >= 0) {
1033:               for (j=0; j<bs; j++) {
1034:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1035:               }
1036:             }
1037:           }
1038:           for (d=mainbd; d<a->nd; d++) {
1039:             diag = a->diag[d];
1040:             dv   = a->diagv[d];
1041:             bloc = k - diag;
1042:             if (bloc < mblock) {
1043:               for (j=0; j<bs; j++) {
1044:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1045:               }
1046:             }
1047:           }
1048:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1049:         }
1050:       }
1051:     }
1052:   }
1053:   VecRestoreArray(xx,&x);
1054:   if (xx != bb) VecRestoreArray(bb,&b);
1055:   return(0);
1056: }

1060: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
1061: {
1062:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1063:   PetscScalar  *x,*b,*xb,*dd,dval,sum;
1064:   int          ierr,m = A->m,i,d,loc;
1065:   int          mainbd = a->mainbd,diag;

1068:   its = its*lits;
1069:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1070:   /* Currently this code doesn't use wavefront orderings,although
1071:      we should eventually incorporate that option, whatever wavefront
1072:      ordering maybe :-) */

1074:   VecGetArray(xx,&x);
1075:   VecGetArray(bb,&b);
1076:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1077:   dd = a->diagv[mainbd];
1078:   if (flag == SOR_APPLY_UPPER) {
1079:     /* apply (U + D/omega) to the vector */
1080:     for (i=0; i<m; i++) {
1081:       sum = b[i] * (shift + dd[i]) / omega;
1082:       for (d=mainbd+1; d<a->nd; d++) {
1083:         diag = a->diag[d];
1084:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1085:       }
1086:       x[i] = sum;
1087:     }
1088:     VecRestoreArray(xx,&x);
1089:     VecRestoreArray(bb,&b);
1090:     return(0);
1091:   }
1092:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1093:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1094:       for (i=0; i<m; i++) {
1095:         sum  = b[i];
1096:         for (d=0; d<mainbd; d++) {
1097:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1098:         }
1099:         x[i] = omega*(sum/(shift + dd[i]));
1100:       }
1101:       xb = x;
1102:     } else xb = b;
1103:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1104:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1105:       for (i=0; i<m; i++) x[i] *= dd[i];
1106:     }
1107:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1108:       for (i=m-1; i>=0; i--) {
1109:         sum = xb[i];
1110:         for (d=mainbd+1; d<a->nd; d++) {
1111:           diag = a->diag[d];
1112:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1113:         }
1114:         x[i] = omega*(sum/(shift + dd[i]));
1115:       }
1116:     }
1117:     its--;
1118:   }
1119:   while (its--) {
1120:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1121:       for (i=0; i<m; i++) {
1122:         sum  = b[i];
1123:         dval = shift + dd[i];
1124:         for (d=0; d<mainbd; d++) {
1125:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1126:         }
1127:         for (d=mainbd; d<a->nd; d++) {
1128:           diag = a->diag[d];
1129:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1130:         }
1131:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1132:       }
1133:     }
1134:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1135:       for (i=m-1; i>=0; i--) {
1136:         sum = b[i];
1137:         for (d=0; d<mainbd; d++) {
1138:           loc = i - a->diag[d];
1139:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1140:         }
1141:         for (d=mainbd; d<a->nd; d++) {
1142:           diag = a->diag[d];
1143:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1144:         }
1145:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1146:       }
1147:     }
1148:   }
1149:   VecRestoreArray(xx,&x);
1150:   VecRestoreArray(bb,&b);
1151:   return(0);
1152: }