Actual source code: bdiag2.c
1: /*$Id: bdiag2.c,v 1.21 2001/08/07 03:02:53 balay Exp $*/
3: /* Block diagonal matrix format */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/inline/ilu.h
10: int MatSetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
11: {
12: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
13: int kk,ldiag,row,newnz,*bdlen_new;
14: int j,k, *diag_new,ierr;
15: PetscTruth roworiented = a->roworiented,dfound;
16: PetscScalar value,**diagv_new;
19: for (kk=0; kk<m; kk++) { /* loop over added rows */
20: row = im[kk];
21: if (row < 0) continue;
22: if (row >= A->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->M-1);
23: for (j=0; j<n; j++) {
24: if (in[j] < 0) continue;
25: if (in[j] >= A->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],A->N-1);
26: ldiag = row - in[j]; /* diagonal number */
27: dfound = PETSC_FALSE;
28: if (roworiented) {
29: value = v[j + kk*n];
30: } else {
31: value = v[kk + j*m];
32: }
33: /* search diagonals for required one */
34: for (k=0; k<a->nd; k++) {
35: if (a->diag[k] == ldiag) {
36: dfound = PETSC_TRUE;
37: if (is == ADD_VALUES) a->diagv[k][row] += value;
38: else a->diagv[k][row] = value;
39: break;
40: }
41: }
42: if (!dfound) {
43: if (a->nonew || a->nonew_diag) {
44: #if !defined(PETSC_USE_COMPLEX)
45: if (a->user_alloc && value) {
46: #else
47: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
48: #endif
49: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
50: }
51: } else {
52: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
53: a->reallocs++;
54: /* free old bdiag storage info and reallocate */
55: PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
56: bdlen_new = diag_new + a->nd + 1;
57: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
58: for (k=0; k<a->nd; k++) {
59: diag_new[k] = a->diag[k];
60: diagv_new[k] = a->diagv[k];
61: bdlen_new[k] = a->bdlen[k];
62: }
63: diag_new[a->nd] = ldiag;
64: if (ldiag > 0) { /* lower triangular */
65: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
66: } else { /* upper triangular */
67: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
68: }
69: newnz = bdlen_new[a->nd];
70: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
71: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
72: /* adjust pointers so that dv[diag][row] works for all diagonals*/
73: if (diag_new[a->nd] > 0) {
74: diagv_new[a->nd] -= diag_new[a->nd];
75: }
76: a->maxnz += newnz;
77: a->nz += newnz;
78: PetscFree(a->diagv);
79: PetscFree(a->diag);
80: a->diag = diag_new;
81: a->bdlen = bdlen_new;
82: a->diagv = diagv_new;
84: /* Insert value */
85: if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
86: else a->diagv[a->nd][row] = value;
87: a->nd++;
88: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
89: }
90: }
91: }
92: }
93: return(0);
94: }
99: int MatSetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
100: {
101: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
102: int kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
103: int j,k,bs = a->bs,*diag_new,idx=0;
104: PetscTruth roworiented = a->roworiented,dfound;
105: PetscScalar value,**diagv_new;
108: for (kk=0; kk<m; kk++) { /* loop over added rows */
109: row = im[kk];
110: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
111: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
112: shift = (row/bs)*bs*bs + row%bs;
113: for (j=0; j<n; j++) {
114: ldiag = row/bs - in[j]/bs; /* block diagonal */
115: dfound = PETSC_FALSE;
116: if (roworiented) {
117: value = v[idx++];
118: } else {
119: value = v[kk + j*m];
120: }
121: /* seach for appropriate diagonal */
122: for (k=0; k<a->nd; k++) {
123: if (a->diag[k] == ldiag) {
124: dfound = PETSC_TRUE;
125: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
126: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
127: break;
128: }
129: }
130: if (!dfound) {
131: if (a->nonew || a->nonew_diag) {
132: #if !defined(PETSC_USE_COMPLEX)
133: if (a->user_alloc && value) {
134: #else
135: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
136: #endif
137: PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocate\n",ldiag);
138: }
139: } else {
140: PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %d\n",ldiag);
141: a->reallocs++;
142: /* free old bdiag storage info and reallocate */
143: PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
144: bdlen_new = diag_new + a->nd + 1;
145: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
146: for (k=0; k<a->nd; k++) {
147: diag_new[k] = a->diag[k];
148: diagv_new[k] = a->diagv[k];
149: bdlen_new[k] = a->bdlen[k];
150: }
151: diag_new[a->nd] = ldiag;
152: if (ldiag > 0) {/* lower triangular */
153: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
154: } else { /* upper triangular */
155: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
156: }
157: newnz = bs*bs*bdlen_new[a->nd];
158: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
159: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
160: /* adjust pointer so that dv[diag][row] works for all diagonals */
161: if (diag_new[a->nd] > 0) {
162: diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
163: }
164: a->maxnz += newnz; a->nz += newnz;
165: PetscFree(a->diagv);
166: PetscFree(a->diag);
167: a->diag = diag_new;
168: a->bdlen = bdlen_new;
169: a->diagv = diagv_new;
171: /* Insert value */
172: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
173: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
174: a->nd++;
175: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
176: }
177: }
178: }
179: }
180: return(0);
181: }
185: int MatGetValues_SeqBDiag_1(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
186: {
187: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
188: int kk,ldiag,row,j,k;
189: PetscScalar zero = 0.0;
190: PetscTruth dfound;
193: for (kk=0; kk<m; kk++) { /* loop over rows */
194: row = im[kk];
195: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
196: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
197: for (j=0; j<n; j++) {
198: if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
199: if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
200: ldiag = row - in[j]; /* diagonal number */
201: dfound = PETSC_FALSE;
202: for (k=0; k<a->nd; k++) {
203: if (a->diag[k] == ldiag) {
204: dfound = PETSC_TRUE;
205: *v++ = a->diagv[k][row];
206: break;
207: }
208: }
209: if (!dfound) *v++ = zero;
210: }
211: }
212: return(0);
213: }
217: int MatGetValues_SeqBDiag_N(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
218: {
219: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
220: int kk,ldiag,shift,row,j,k,bs = a->bs;
221: PetscScalar zero = 0.0;
222: PetscTruth dfound;
225: for (kk=0; kk<m; kk++) { /* loop over rows */
226: row = im[kk];
227: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
228: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
229: shift = (row/bs)*bs*bs + row%bs;
230: for (j=0; j<n; j++) {
231: ldiag = row/bs - in[j]/bs; /* block diagonal */
232: dfound = PETSC_FALSE;
233: for (k=0; k<a->nd; k++) {
234: if (a->diag[k] == ldiag) {
235: dfound = PETSC_TRUE;
236: *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
237: break;
238: }
239: }
240: if (!dfound) *v++ = zero;
241: }
242: }
243: return(0);
244: }
246: /*
247: MatMults for blocksize 1 to 5 and N -------------------------------
248: */
251: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
252: {
253: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
254: int nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
255: int ierr,d,j,len;
256: PetscScalar *vin,*vout,**a_diagv = a->diagv;
257: PetscScalar *pvin,*pvout,*dv;
260: VecGetArray(xx,&vin);
261: VecGetArray(yy,&vout);
262: PetscMemzero(vout,A->m*sizeof(PetscScalar));
263: for (d=0; d<nd; d++) {
264: dv = a_diagv[d];
265: diag = a_diag[d];
266: len = a_bdlen[d];
267: if (diag > 0) { /* lower triangle */
268: pvin = vin;
269: pvout = vout + diag;
270: dv = dv + diag;
271: } else { /* upper triangle,including main diagonal */
272: pvin = vin - diag;
273: pvout = vout;
274: }
275: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
276: PetscLogFlops(2*len);
277: }
278: VecRestoreArray(xx,&vin);
279: VecRestoreArray(yy,&vout);
280: return(0);
281: }
285: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
286: {
287: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
288: int nd = a->nd,nb_diag;
289: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
290: PetscScalar *vin,*vout,**a_diagv = a->diagv;
291: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
294: VecGetArray(xx,&vin);
295: VecGetArray(yy,&vout);
296: PetscMemzero(vout,A->m*sizeof(PetscScalar));
297: for (d=0; d<nd; d++) {
298: dv = a_diagv[d];
299: nb_diag = 2*a_diag[d];
300: len = a_bdlen[d];
301: if (nb_diag > 0) { /* lower triangle */
302: pvin = vin;
303: pvout = vout + nb_diag;
304: dv = dv + 2*nb_diag;
305: } else { /* upper triangle, including main diagonal */
306: pvin = vin - nb_diag;
307: pvout = vout;
308: }
309: for (k=0; k<len; k++) {
310: pvin0 = pvin[0]; pvin1 = pvin[1];
312: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
313: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
315: pvout += 2; pvin += 2; dv += 4;
316: }
317: PetscLogFlops(8*len);
318: }
319: VecRestoreArray(xx,&vin);
320: VecRestoreArray(yy,&vout);
321: return(0);
322: }
326: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
327: {
328: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
329: int nd = a->nd,nb_diag;
330: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
331: PetscScalar *vin,*vout,**a_diagv = a->diagv;
332: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
335: VecGetArray(xx,&vin);
336: VecGetArray(yy,&vout);
337: PetscMemzero(vout,A->m*sizeof(PetscScalar));
338: for (d=0; d<nd; d++) {
339: dv = a_diagv[d];
340: nb_diag = 3*a_diag[d];
341: len = a_bdlen[d];
342: if (nb_diag > 0) { /* lower triangle */
343: pvin = vin;
344: pvout = vout + nb_diag;
345: dv = dv + 3*nb_diag;
346: } else { /* upper triangle,including main diagonal */
347: pvin = vin - nb_diag;
348: pvout = vout;
349: }
350: for (k=0; k<len; k++) {
351: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
353: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
354: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
355: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
357: pvout += 3; pvin += 3; dv += 9;
358: }
359: PetscLogFlops(18*len);
360: }
361: VecRestoreArray(xx,&vin);
362: VecRestoreArray(yy,&vout);
363: return(0);
364: }
368: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
369: {
370: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
371: int nd = a->nd,nb_diag;
372: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
373: PetscScalar *vin,*vout,**a_diagv = a->diagv;
374: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
377: VecGetArray(xx,&vin);
378: VecGetArray(yy,&vout);
379: PetscMemzero(vout,A->m*sizeof(PetscScalar));
380: for (d=0; d<nd; d++) {
381: dv = a_diagv[d];
382: nb_diag = 4*a_diag[d];
383: len = a_bdlen[d];
384: if (nb_diag > 0) { /* lower triangle */
385: pvin = vin;
386: pvout = vout + nb_diag;
387: dv = dv + 4*nb_diag;
388: } else { /* upper triangle,including main diagonal */
389: pvin = vin - nb_diag;
390: pvout = vout;
391: }
392: for (k=0; k<len; k++) {
393: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
395: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
396: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
397: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
398: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
400: pvout += 4; pvin += 4; dv += 16;
401: }
402: PetscLogFlops(32*len);
403: }
404: VecRestoreArray(xx,&vin);
405: VecRestoreArray(yy,&vout);
406: return(0);
407: }
411: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
412: {
413: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
414: int nd = a->nd,nb_diag;
415: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
416: PetscScalar *vin,*vout,**a_diagv = a->diagv;
417: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
420: VecGetArray(xx,&vin);
421: VecGetArray(yy,&vout);
422: PetscMemzero(vout,A->m*sizeof(PetscScalar));
423: for (d=0; d<nd; d++) {
424: dv = a_diagv[d];
425: nb_diag = 5*a_diag[d];
426: len = a_bdlen[d];
427: if (nb_diag > 0) { /* lower triangle */
428: pvin = vin;
429: pvout = vout + nb_diag;
430: dv = dv + 5*nb_diag;
431: } else { /* upper triangle,including main diagonal */
432: pvin = vin - nb_diag;
433: pvout = vout;
434: }
435: for (k=0; k<len; k++) {
436: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
438: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
439: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
440: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
441: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
442: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
444: pvout += 5; pvin += 5; dv += 25;
445: }
446: PetscLogFlops(50*len);
447: }
448: VecRestoreArray(xx,&vin);
449: VecRestoreArray(yy,&vout);
450: return(0);
451: }
455: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
456: {
457: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
458: int nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
459: int ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
460: PetscScalar *vin,*vout,**a_diagv = a->diagv;
461: PetscScalar *pvin,*pvout,*dv;
464: VecGetArray(xx,&vin);
465: VecGetArray(yy,&vout);
466: PetscMemzero(vout,A->m*sizeof(PetscScalar));
467: for (d=0; d<nd; d++) {
468: dv = a_diagv[d];
469: nb_diag = bs*a_diag[d];
470: len = a_bdlen[d];
471: if (nb_diag > 0) { /* lower triangle */
472: pvin = vin;
473: pvout = vout + nb_diag;
474: dv = dv + bs*nb_diag;
475: } else { /* upper triangle, including main diagonal */
476: pvin = vin - nb_diag;
477: pvout = vout;
478: }
479: for (k=0; k<len; k++) {
480: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
481: pvout += bs; pvin += bs; dv += bs2;
482: }
483: PetscLogFlops(2*bs2*len);
484: }
485: VecRestoreArray(xx,&vin);
486: VecRestoreArray(yy,&vout);
487: return(0);
488: }
490: /*
491: MatMultAdds for blocksize 1 to 5 and N -------------------------------
492: */
495: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
496: {
497: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
498: int ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
499: PetscScalar *vin,*vout,**a_diagv = a->diagv;
500: PetscScalar *pvin,*pvout,*dv;
503: if (zz != yy) {VecCopy(zz,yy);}
504: VecGetArray(xx,&vin);
505: VecGetArray(yy,&vout);
506: for (d=0; d<nd; d++) {
507: dv = a_diagv[d];
508: diag = a_diag[d];
509: len = a_bdlen[d];
510: if (diag > 0) { /* lower triangle */
511: pvin = vin;
512: pvout = vout + diag;
513: dv = dv + diag;
514: } else { /* upper triangle, including main diagonal */
515: pvin = vin - diag;
516: pvout = vout;
517: }
518: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
519: PetscLogFlops(2*len);
520: }
521: VecRestoreArray(xx,&vin);
522: VecRestoreArray(yy,&vout);
523: return(0);
524: }
528: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
529: {
530: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
531: int ierr,nd = a->nd,nb_diag;
532: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
533: PetscScalar *vin,*vout,**a_diagv = a->diagv;
534: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
537: if (zz != yy) {VecCopy(zz,yy);}
538: VecGetArray(xx,&vin);
539: VecGetArray(yy,&vout);
540: for (d=0; d<nd; d++) {
541: dv = a_diagv[d];
542: nb_diag = 2*a_diag[d];
543: len = a_bdlen[d];
544: if (nb_diag > 0) { /* lower triangle */
545: pvin = vin;
546: pvout = vout + nb_diag;
547: dv = dv + 2*nb_diag;
548: } else { /* upper triangle, including main diagonal */
549: pvin = vin - nb_diag;
550: pvout = vout;
551: }
552: for (k=0; k<len; k++) {
553: pvin0 = pvin[0]; pvin1 = pvin[1];
555: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
556: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
558: pvout += 2; pvin += 2; dv += 4;
559: }
560: PetscLogFlops(8*len);
561: }
562: VecRestoreArray(xx,&vin);
563: VecRestoreArray(yy,&vout);
564: return(0);
565: }
569: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
570: {
571: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
572: int ierr,nd = a->nd,nb_diag;
573: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
574: PetscScalar *vin,*vout,**a_diagv = a->diagv;
575: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
578: if (zz != yy) {VecCopy(zz,yy);}
579: VecGetArray(xx,&vin);
580: VecGetArray(yy,&vout);
581: for (d=0; d<nd; d++) {
582: dv = a_diagv[d];
583: nb_diag = 3*a_diag[d];
584: len = a_bdlen[d];
585: if (nb_diag > 0) { /* lower triangle */
586: pvin = vin;
587: pvout = vout + nb_diag;
588: dv = dv + 3*nb_diag;
589: } else { /* upper triangle, including main diagonal */
590: pvin = vin - nb_diag;
591: pvout = vout;
592: }
593: for (k=0; k<len; k++) {
594: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
596: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
597: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
598: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
600: pvout += 3; pvin += 3; dv += 9;
601: }
602: PetscLogFlops(18*len);
603: }
604: VecRestoreArray(xx,&vin);
605: VecRestoreArray(yy,&vout);
606: return(0);
607: }
611: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
612: {
613: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
614: int ierr,nd = a->nd,nb_diag;
615: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
616: PetscScalar *vin,*vout,**a_diagv = a->diagv;
617: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
620: if (zz != yy) {VecCopy(zz,yy);}
621: VecGetArray(xx,&vin);
622: VecGetArray(yy,&vout);
623: for (d=0; d<nd; d++) {
624: dv = a_diagv[d];
625: nb_diag = 4*a_diag[d];
626: len = a_bdlen[d];
627: if (nb_diag > 0) { /* lower triangle */
628: pvin = vin;
629: pvout = vout + nb_diag;
630: dv = dv + 4*nb_diag;
631: } else { /* upper triangle, including main diagonal */
632: pvin = vin - nb_diag;
633: pvout = vout;
634: }
635: for (k=0; k<len; k++) {
636: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
638: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
639: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
640: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
641: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
643: pvout += 4; pvin += 4; dv += 16;
644: }
645: PetscLogFlops(32*len);
646: }
647: VecRestoreArray(xx,&vin);
648: VecRestoreArray(yy,&vout);
649: return(0);
650: }
654: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
655: {
656: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
657: int ierr,nd = a->nd,nb_diag;
658: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
659: PetscScalar *vin,*vout,**a_diagv = a->diagv;
660: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
663: if (zz != yy) {VecCopy(zz,yy);}
664: VecGetArray(xx,&vin);
665: VecGetArray(yy,&vout);
666: for (d=0; d<nd; d++) {
667: dv = a_diagv[d];
668: nb_diag = 5*a_diag[d];
669: len = a_bdlen[d];
670: if (nb_diag > 0) { /* lower triangle */
671: pvin = vin;
672: pvout = vout + nb_diag;
673: dv = dv + 5*nb_diag;
674: } else { /* upper triangle, including main diagonal */
675: pvin = vin - nb_diag;
676: pvout = vout;
677: }
678: for (k=0; k<len; k++) {
679: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
681: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
682: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
683: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
684: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
685: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
687: pvout += 5; pvin += 5; dv += 25;
688: }
689: PetscLogFlops(50*len);
690: }
691: VecRestoreArray(xx,&vin);
692: VecRestoreArray(yy,&vout);
693: return(0);
694: }
698: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
699: {
700: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
701: int ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
702: int *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
703: PetscScalar *vin,*vout,**a_diagv = a->diagv;
704: PetscScalar *pvin,*pvout,*dv;
707: if (zz != yy) {VecCopy(zz,yy);}
708: VecGetArray(xx,&vin);
709: VecGetArray(yy,&vout);
710: for (d=0; d<nd; d++) {
711: dv = a_diagv[d];
712: nb_diag = bs*a_diag[d];
713: len = a_bdlen[d];
714: if (nb_diag > 0) { /* lower triangle */
715: pvin = vin;
716: pvout = vout + nb_diag;
717: dv = dv + bs*nb_diag;
718: } else { /* upper triangle, including main diagonal */
719: pvin = vin - nb_diag;
720: pvout = vout;
721: }
722: for (k=0; k<len; k++) {
723: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
724: pvout += bs; pvin += bs; dv += bs2;
725: }
726: PetscLogFlops(2*bs2*len);
727: }
728: VecRestoreArray(xx,&vin);
729: VecRestoreArray(yy,&vout);
730: return(0);
731: }
733: /*
734: MatMultTranspose ----------------------------------------------
735: */
738: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
739: {
740: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
741: int ierr,nd = a->nd,diag,d,j,len;
742: PetscScalar *pvin,*pvout,*dv;
743: PetscScalar *vin,*vout;
744:
746: VecGetArray(xx,&vin);
747: VecGetArray(yy,&vout);
748: PetscMemzero(vout,A->n*sizeof(PetscScalar));
749: for (d=0; d<nd; d++) {
750: dv = a->diagv[d];
751: diag = a->diag[d];
752: len = a->bdlen[d];
753: /* diag of original matrix is (row/bs - col/bs) */
754: /* diag of transpose matrix is (col/bs - row/bs) */
755: if (diag < 0) { /* transpose is lower triangle */
756: pvin = vin;
757: pvout = vout - diag;
758: } else { /* transpose is upper triangle, including main diagonal */
759: pvin = vin + diag;
760: pvout = vout;
761: dv = dv + diag;
762: }
763: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
764: }
765: VecRestoreArray(xx,&vin);
766: VecRestoreArray(yy,&vout);
767: return(0);
768: }
772: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
773: {
774: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
775: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
776: PetscScalar *pvin,*pvout,*dv;
777: PetscScalar *vin,*vout;
778:
780: VecGetArray(xx,&vin);
781: VecGetArray(yy,&vout);
782: PetscMemzero(vout,A->n*sizeof(PetscScalar));
783: for (d=0; d<nd; d++) {
784: dv = a->diagv[d];
785: diag = a->diag[d];
786: len = a->bdlen[d];
787: /* diag of original matrix is (row/bs - col/bs) */
788: /* diag of transpose matrix is (col/bs - row/bs) */
789: if (diag < 0) { /* transpose is lower triangle */
790: pvin = vin;
791: pvout = vout - bs*diag;
792: } else { /* transpose is upper triangle, including main diagonal */
793: pvin = vin + bs*diag;
794: pvout = vout;
795: dv = dv + diag;
796: }
797: for (k=0; k<len; k++) {
798: kloc = k*bs; kshift = kloc*bs;
799: for (i=0; i<bs; i++) { /* i = local column of transpose */
800: for (j=0; j<bs; j++) { /* j = local row of transpose */
801: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
802: }
803: }
804: }
805: }
806: VecRestoreArray(xx,&vin);
807: VecRestoreArray(yy,&vout);
808: return(0);
809: }
811: /*
812: MatMultTransposeAdd ----------------------------------------------
813: */
816: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
817: {
818: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
819: int ierr,nd = a->nd,diag,d,j,len;
820: PetscScalar *pvin,*pvout,*dv;
821: PetscScalar *vin,*vout;
822:
824: if (zz != yy) {VecCopy(zz,yy);}
825: VecGetArray(xx,&vin);
826: VecGetArray(yy,&vout);
827: for (d=0; d<nd; d++) {
828: dv = a->diagv[d];
829: diag = a->diag[d];
830: len = a->bdlen[d];
831: /* diag of original matrix is (row/bs - col/bs) */
832: /* diag of transpose matrix is (col/bs - row/bs) */
833: if (diag < 0) { /* transpose is lower triangle */
834: pvin = vin;
835: pvout = vout - diag;
836: } else { /* transpose is upper triangle, including main diagonal */
837: pvin = vin + diag;
838: pvout = vout;
839: dv = dv + diag;
840: }
841: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
842: }
843: VecRestoreArray(xx,&vin);
844: VecRestoreArray(yy,&vout);
845: return(0);
846: }
850: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
851: {
852: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
853: int ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
854: PetscScalar *pvin,*pvout,*dv;
855: PetscScalar *vin,*vout;
856:
858: if (zz != yy) {VecCopy(zz,yy);}
859: VecGetArray(xx,&vin);
860: VecGetArray(yy,&vout);
861: for (d=0; d<nd; d++) {
862: dv = a->diagv[d];
863: diag = a->diag[d];
864: len = a->bdlen[d];
865: /* diag of original matrix is (row/bs - col/bs) */
866: /* diag of transpose matrix is (col/bs - row/bs) */
867: if (diag < 0) { /* transpose is lower triangle */
868: pvin = vin;
869: pvout = vout - bs*diag;
870: } else { /* transpose is upper triangle, including main diagonal */
871: pvin = vin + bs*diag;
872: pvout = vout;
873: dv = dv + diag;
874: }
875: for (k=0; k<len; k++) {
876: kloc = k*bs; kshift = kloc*bs;
877: for (i=0; i<bs; i++) { /* i = local column of transpose */
878: for (j=0; j<bs; j++) { /* j = local row of transpose */
879: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
880: }
881: }
882: }
883: }
884: VecRestoreArray(xx,&vin);
885: VecRestoreArray(yy,&vout);
886: return(0);
887: }
890: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
891: {
892: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
893: PetscScalar *x,*b,*xb,*dd,*dv,dval,sum;
894: int ierr,i,j,k,d,kbase,bs = a->bs,kloc;
895: int mainbd = a->mainbd,diag,mblock = a->mblock,bloc;
898: its = its*lits;
899: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
901: /* Currently this code doesn't use wavefront orderings, although
902: we should eventually incorporate that option, whatever wavefront
903: ordering maybe :-) */
905: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
907: VecGetArray(xx,&x);
908: if (xx != bb) {
909: VecGetArray(bb,&b);
910: } else {
911: b = x;
912: }
913: dd = a->diagv[mainbd];
914: if (flag == SOR_APPLY_UPPER) {
915: /* apply (U + D/omega) to the vector */
916: for (k=0; k<mblock; k++) {
917: kloc = k*bs; kbase = kloc*bs;
918: for (i=0; i<bs; i++) {
919: sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
920: for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
921: for (d=mainbd+1; d<a->nd; d++) {
922: diag = a->diag[d];
923: dv = a->diagv[d];
924: if (k-diag < mblock) {
925: for (j=0; j<bs; j++) {
926: sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
927: }
928: }
929: }
930: x[kloc+i] = sum;
931: }
932: }
933: VecRestoreArray(xx,&x);
934: if (xx != bb) {VecRestoreArray(bb,&b);}
935: return(0);
936: }
937: if (flag & SOR_ZERO_INITIAL_GUESS) {
938: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
939: for (k=0; k<mblock; k++) {
940: kloc = k*bs; kbase = kloc*bs;
941: for (i=0; i<bs; i++) {
942: sum = b[i+kloc];
943: dval = shift + dd[i*(bs+1)+kbase];
944: for (d=0; d<mainbd; d++) {
945: diag = a->diag[d];
946: dv = a->diagv[d];
947: if (k >= diag) {
948: for (j=0; j<bs; j++)
949: sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
950: }
951: }
952: for (j=0; j<i; j++){
953: sum -= dd[kbase + j*bs + i] * x[kloc + j];
954: }
955: x[kloc+i] = omega*sum/dval;
956: }
957: }
958: xb = x;
959: } else xb = b;
960: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
961: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
962: for (k=0; k<mblock; k++) {
963: kloc = k*bs; kbase = kloc*bs;
964: for (i=0; i<bs; i++)
965: x[kloc+i] *= dd[i*(bs+1)+kbase];
966: }
967: }
968: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
969: for (k=mblock-1; k>=0; k--) {
970: kloc = k*bs; kbase = kloc*bs;
971: for (i=bs-1; i>=0; i--) {
972: sum = xb[i+kloc];
973: dval = shift + dd[i*(bs+1)+kbase];
974: for (j=i+1; j<bs; j++)
975: sum -= dd[kbase + j*bs + i] * x[kloc + j];
976: for (d=mainbd+1; d<a->nd; d++) {
977: diag = a->diag[d];
978: dv = a->diagv[d];
979: bloc = k - diag;
980: if (bloc < mblock) {
981: for (j=0; j<bs; j++)
982: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
983: }
984: }
985: x[kloc+i] = omega*sum/dval;
986: }
987: }
988: }
989: its--;
990: }
991: while (its--) {
992: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
993: for (k=0; k<mblock; k++) {
994: kloc = k*bs; kbase = kloc*bs;
995: for (i=0; i<bs; i++) {
996: sum = b[i+kloc];
997: dval = shift + dd[i*(bs+1)+kbase];
998: for (d=0; d<mainbd; d++) {
999: diag = a->diag[d];
1000: dv = a->diagv[d];
1001: bloc = k - diag;
1002: if (bloc >= 0) {
1003: for (j=0; j<bs; j++) {
1004: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1005: }
1006: }
1007: }
1008: for (d=mainbd; d<a->nd; d++) {
1009: diag = a->diag[d];
1010: dv = a->diagv[d];
1011: bloc = k - diag;
1012: if (bloc < mblock) {
1013: for (j=0; j<bs; j++) {
1014: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1015: }
1016: }
1017: }
1018: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1019: }
1020: }
1021: }
1022: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1023: for (k=mblock-1; k>=0; k--) {
1024: kloc = k*bs; kbase = kloc*bs;
1025: for (i=bs-1; i>=0; i--) {
1026: sum = b[i+kloc];
1027: dval = shift + dd[i*(bs+1)+kbase];
1028: for (d=0; d<mainbd; d++) {
1029: diag = a->diag[d];
1030: dv = a->diagv[d];
1031: bloc = k - diag;
1032: if (bloc >= 0) {
1033: for (j=0; j<bs; j++) {
1034: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1035: }
1036: }
1037: }
1038: for (d=mainbd; d<a->nd; d++) {
1039: diag = a->diag[d];
1040: dv = a->diagv[d];
1041: bloc = k - diag;
1042: if (bloc < mblock) {
1043: for (j=0; j<bs; j++) {
1044: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1045: }
1046: }
1047: }
1048: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1049: }
1050: }
1051: }
1052: }
1053: VecRestoreArray(xx,&x);
1054: if (xx != bb) VecRestoreArray(bb,&b);
1055: return(0);
1056: }
1060: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
1061: {
1062: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1063: PetscScalar *x,*b,*xb,*dd,dval,sum;
1064: int ierr,m = A->m,i,d,loc;
1065: int mainbd = a->mainbd,diag;
1068: its = its*lits;
1069: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1070: /* Currently this code doesn't use wavefront orderings,although
1071: we should eventually incorporate that option, whatever wavefront
1072: ordering maybe :-) */
1074: VecGetArray(xx,&x);
1075: VecGetArray(bb,&b);
1076: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1077: dd = a->diagv[mainbd];
1078: if (flag == SOR_APPLY_UPPER) {
1079: /* apply (U + D/omega) to the vector */
1080: for (i=0; i<m; i++) {
1081: sum = b[i] * (shift + dd[i]) / omega;
1082: for (d=mainbd+1; d<a->nd; d++) {
1083: diag = a->diag[d];
1084: if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1085: }
1086: x[i] = sum;
1087: }
1088: VecRestoreArray(xx,&x);
1089: VecRestoreArray(bb,&b);
1090: return(0);
1091: }
1092: if (flag & SOR_ZERO_INITIAL_GUESS) {
1093: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1094: for (i=0; i<m; i++) {
1095: sum = b[i];
1096: for (d=0; d<mainbd; d++) {
1097: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1098: }
1099: x[i] = omega*(sum/(shift + dd[i]));
1100: }
1101: xb = x;
1102: } else xb = b;
1103: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1104: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1105: for (i=0; i<m; i++) x[i] *= dd[i];
1106: }
1107: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1108: for (i=m-1; i>=0; i--) {
1109: sum = xb[i];
1110: for (d=mainbd+1; d<a->nd; d++) {
1111: diag = a->diag[d];
1112: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1113: }
1114: x[i] = omega*(sum/(shift + dd[i]));
1115: }
1116: }
1117: its--;
1118: }
1119: while (its--) {
1120: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1121: for (i=0; i<m; i++) {
1122: sum = b[i];
1123: dval = shift + dd[i];
1124: for (d=0; d<mainbd; d++) {
1125: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1126: }
1127: for (d=mainbd; d<a->nd; d++) {
1128: diag = a->diag[d];
1129: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1130: }
1131: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1132: }
1133: }
1134: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1135: for (i=m-1; i>=0; i--) {
1136: sum = b[i];
1137: for (d=0; d<mainbd; d++) {
1138: loc = i - a->diag[d];
1139: if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1140: }
1141: for (d=mainbd; d<a->nd; d++) {
1142: diag = a->diag[d];
1143: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1144: }
1145: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1146: }
1147: }
1148: }
1149: VecRestoreArray(xx,&x);
1150: VecRestoreArray(bb,&b);
1151: return(0);
1152: }