Actual source code: bdfact.c
1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/
3: /* Block diagonal matrix format - factorization and triangular solves */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/inline/ilu.h
10: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
11: {
12: PetscTruth idn;
13: int ierr;
16: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
17: if (isrow) {
18: ISIdentity(isrow,&idn);
19: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
20: }
21: if (iscol) {
22: ISIdentity(iscol,&idn);
23: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
24: }
25: if (info->levels != 0) {
26: SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
27: }
28: MatConvert(A,MATSAME,B);
30: /* Must set to zero for repeated calls with different nonzero structure */
31: (*B)->factor = 0;
32: return(0);
33: }
37: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
38: {
39: PetscTruth idn;
40: int ierr;
43: /* For now, no fill is allocated in symbolic factorization phase, so we
44: directly use the input matrix for numeric factorization. */
45: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
46: if (isrow) {
47: ISIdentity(isrow,&idn);
48: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
49: }
50: if (iscol) {
51: ISIdentity(iscol,&idn);
52: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
53: }
54: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
55: MatLUFactorNumeric(A,&A);
56: return(0);
57: }
59: /* --------------------------------------------------------------------------*/
62: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
63: {
64: Mat C = *B;
65: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
66: int k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
67: int dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
68: int *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
69: PetscScalar **dv = a->diagv,*dd = dv[mainbd],*v_work;
70: PetscScalar *multiplier;
73: /* Copy input matrix to factored matrix if we've already factored the
74: matrix before AND the nonzero structure remains the same. This is done
75: in symbolic factorization the first time through, but there's no symbolic
76: factorization for successive calls with same matrix sparsity structure. */
77: if (C->factor == FACTOR_LU) {
78: for (i=0; i<a->nd; i++) {
79: len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
80: d = diag[i];
81: if (d > 0) {
82: PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
83: } else {
84: PetscMemcpy(dv[i],a1->diagv[i],len);
85: }
86: }
87: }
89: if (!a->pivot) {
90: PetscMalloc((m+1)*sizeof(int),&a->pivot);
91: PetscLogObjectMemory(C,m*sizeof(int));
92: }
93: PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
94: multiplier = v_work + bs;
95: PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
96: PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
97: for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
98: for (k=0; k<mblock; k++) { /* k = block pivot_row */
99: knb = k*bs; knb2 = knb*bs;
100: /* invert the diagonal block */
101: Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
102: for (d=mainbd-1; d>=0; d--) {
103: elim_row = k + diag[d];
104: if (elim_row < mblock) { /* sweep down */
105: /* dv[d][knb2]: test if entire block is zero? */
106: Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
107: for (d2=d+1; d2<nd; d2++) {
108: elim_col = elim_row - diag[d2];
109: if (elim_col >=0 && elim_col < nblock) {
110: dgk = k - elim_col;
111: if ((dnum = dgptr[dgk+mblock])) {
112: Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
113: &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
114: }
115: }
116: }
117: }
118: }
119: }
120: PetscFree(dgptr);
121: PetscFree(v_work);
122: C->factor = FACTOR_LU;
123: return(0);
124: }
128: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
129: {
130: Mat C = *B;
131: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
132: int k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
133: int *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
134: PetscScalar **dv = a->diagv,*dd = dv[mainbd],mult;
137: /* Copy input matrix to factored matrix if we've already factored the
138: matrix before AND the nonzero structure remains the same. This is done
139: in symbolic factorization the first time through, but there's no symbolic
140: factorization for successive calls with same matrix sparsity structure. */
141: if (C->factor == FACTOR_LU) {
142: for (i=0; i<nd; i++) {
143: len = a->bdlen[i] * sizeof(PetscScalar);
144: d = diag[i];
145: if (d > 0) {
146: PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
147: } else {
148: PetscMemcpy(dv[i],a1->diagv[i],len);
149: }
150: }
151: }
153: PetscMalloc((m+n+1)*sizeof(int),&dgptr);
154: PetscMemzero(dgptr,(m+n)*sizeof(int));
155: for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
156: for (k=0; k<m; k++) { /* k = pivot_row */
157: dd[k] = 1.0/dd[k];
158: for (d=mainbd-1; d>=0; d--) {
159: elim_row = k + diag[d];
160: if (elim_row < m) { /* sweep down */
161: if (dv[d][elim_row] != 0.0) {
162: dv[d][elim_row] *= dd[k];
163: mult = dv[d][elim_row];
164: for (d2=d+1; d2<nd; d2++) {
165: elim_col = elim_row - diag[d2];
166: dgk = k - elim_col;
167: if (elim_col >=0 && elim_col < n) {
168: if ((dnum = dgptr[dgk+m])) {
169: dv[d2][elim_row] -= mult * dv[dnum-1][k];
170: }
171: }
172: }
173: }
174: }
175: }
176: }
177: PetscFree(dgptr);
178: C->factor = FACTOR_LU;
179: return(0);
180: }
182: /* -----------------------------------------------------------------*/
186: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
187: {
188: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
189: int ierr,i,d,loc,mainbd = a->mainbd;
190: int n = A->n,m = A->m,*diag = a->diag,col;
191: PetscScalar *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;
194: VecGetArray(xx,&x);
195: VecGetArray(yy,&y);
196: /* forward solve the lower triangular part */
197: for (i=0; i<m; i++) {
198: sum = x[i];
199: for (d=0; d<mainbd; d++) {
200: loc = i - diag[d];
201: if (loc >= 0) sum -= dv[d][i] * y[loc];
202: }
203: y[i] = sum;
204: }
205: /* backward solve the upper triangular part */
206: for (i=m-1; i>=0; i--) {
207: sum = y[i];
208: for (d=mainbd+1; d<a->nd; d++) {
209: col = i - diag[d];
210: if (col < n) sum -= dv[d][i] * y[col];
211: }
212: y[i] = sum*dd[i];
213: }
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(yy,&y);
216: PetscLogFlops(2*a->nz - A->n);
217: return(0);
218: }
222: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
223: {
224: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
225: int i,d,loc,mainbd = a->mainbd;
226: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
227: int ierr,m = A->m,*diag = a->diag,col;
228: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
229: PetscScalar w0,w1,sum0,sum1;
232: VecGetArray(xx,&x);
233: VecGetArray(yy,&y);
234: PetscMemcpy(y,x,m*sizeof(PetscScalar));
236: /* forward solve the lower triangular part */
237: if (mainbd != 0) {
238: inb = 0;
239: for (i=0; i<mblock; i++) {
240: sum0 = sum1 = 0.0;
241: for (d=0; d<mainbd; d++) {
242: loc = 2*(i - diag[d]);
243: if (loc >= 0) {
244: dvt = &dv[d][4*i];
245: w0 = y[loc]; w1 = y[loc+1];
246: sum0 += dvt[0]*w0 + dvt[2]*w1;
247: sum1 += dvt[1]*w0 + dvt[3]*w1;
248: }
249: }
250: y[inb] -= sum0; y[inb+1] -= sum1;
252: inb += 2;
253: }
254: }
255: /* backward solve the upper triangular part */
256: inb = 2*(mblock-1); inb2 = 2*inb;
257: for (i=mblock-1; i>=0; i--) {
258: sum0 = y[inb]; sum1 = y[inb+1];
259: for (d=mainbd+1; d<a->nd; d++) {
260: col = 2*(i - diag[d]);
261: if (col < 2*nblock) {
262: dvt = &dv[d][4*i];
263: w0 = y[col]; w1 = y[col+1];
264: sum0 -= dvt[0]*w0 + dvt[2]*w1;
265: sum1 -= dvt[1]*w0 + dvt[3]*w1;
266: }
267: }
268: dvt = dd+inb2;
269: y[inb] = dvt[0]*sum0 + dvt[2]*sum1;
270: y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
271: inb -= 2; inb2 -= 4;
272: }
273: VecRestoreArray(xx,&x);
274: VecRestoreArray(yy,&y);
275: PetscLogFlops(2*a->nz - A->n);
276: return(0);
277: }
281: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
282: {
283: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
284: int i,d,loc,mainbd = a->mainbd;
285: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
286: int ierr,m = A->m,*diag = a->diag,col;
287: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
288: PetscScalar w0,w1,w2,sum0,sum1,sum2;
291: VecGetArray(xx,&x);
292: VecGetArray(yy,&y);
293: PetscMemcpy(y,x,m*sizeof(PetscScalar));
295: /* forward solve the lower triangular part */
296: if (mainbd != 0) {
297: inb = 0;
298: for (i=0; i<mblock; i++) {
299: sum0 = sum1 = sum2 = 0.0;
300: for (d=0; d<mainbd; d++) {
301: loc = 3*(i - diag[d]);
302: if (loc >= 0) {
303: dvt = &dv[d][9*i];
304: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
305: sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
306: sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
307: sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
308: }
309: }
310: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
311: inb += 3;
312: }
313: }
314: /* backward solve the upper triangular part */
315: inb = 3*(mblock-1); inb2 = 3*inb;
316: for (i=mblock-1; i>=0; i--) {
317: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2];
318: for (d=mainbd+1; d<a->nd; d++) {
319: col = 3*(i - diag[d]);
320: if (col < 3*nblock) {
321: dvt = &dv[d][9*i];
322: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
323: sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
324: sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
325: sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
326: }
327: }
328: dvt = dd+inb2;
329: y[inb] = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
330: y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
331: y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
332: inb -= 3; inb2 -= 9;
333: }
334: VecRestoreArray(xx,&x);
335: VecRestoreArray(yy,&y);
336: PetscLogFlops(2*a->nz - A->n);
337: return(0);
338: }
342: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
343: {
344: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
345: int i,d,loc,mainbd = a->mainbd;
346: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
347: int ierr,m = A->m,*diag = a->diag,col;
348: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
349: PetscScalar w0,w1,w2,w3,sum0,sum1,sum2,sum3;
352: VecGetArray(xx,&x);
353: VecGetArray(yy,&y);
354: PetscMemcpy(y,x,m*sizeof(PetscScalar));
356: /* forward solve the lower triangular part */
357: if (mainbd != 0) {
358: inb = 0;
359: for (i=0; i<mblock; i++) {
360: sum0 = sum1 = sum2 = sum3 = 0.0;
361: for (d=0; d<mainbd; d++) {
362: loc = 4*(i - diag[d]);
363: if (loc >= 0) {
364: dvt = &dv[d][16*i];
365: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
366: sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
367: sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
368: sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
369: sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
370: }
371: }
372: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
373: inb += 4;
374: }
375: }
376: /* backward solve the upper triangular part */
377: inb = 4*(mblock-1); inb2 = 4*inb;
378: for (i=mblock-1; i>=0; i--) {
379: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2]; sum3 = y[inb+3];
380: for (d=mainbd+1; d<a->nd; d++) {
381: col = 4*(i - diag[d]);
382: if (col < 4*nblock) {
383: dvt = &dv[d][16*i];
384: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
385: sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
386: sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
387: sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
388: sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
389: }
390: }
391: dvt = dd+inb2;
392: y[inb] = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
393: y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
394: y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
395: y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
396: inb -= 4; inb2 -= 16;
397: }
398: VecRestoreArray(xx,&x);
399: VecRestoreArray(yy,&y);
400: PetscLogFlops(2*a->nz - A->n);
401: return(0);
402: }
406: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
407: {
408: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
409: int i,d,loc,mainbd = a->mainbd;
410: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
411: int ierr,m = A->m,*diag = a->diag,col;
412: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
413: PetscScalar w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;
416: VecGetArray(xx,&x);
417: VecGetArray(yy,&y);
418: PetscMemcpy(y,x,m*sizeof(PetscScalar));
420: /* forward solve the lower triangular part */
421: if (mainbd != 0) {
422: inb = 0;
423: for (i=0; i<mblock; i++) {
424: sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
425: for (d=0; d<mainbd; d++) {
426: loc = 5*(i - diag[d]);
427: if (loc >= 0) {
428: dvt = &dv[d][25*i];
429: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
430: sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
431: sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
432: sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
433: sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
434: sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
435: }
436: }
437: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
438: y[inb+4] -= sum4;
439: inb += 5;
440: }
441: }
442: /* backward solve the upper triangular part */
443: inb = 5*(mblock-1); inb2 = 5*inb;
444: for (i=mblock-1; i>=0; i--) {
445: sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
446: for (d=mainbd+1; d<a->nd; d++) {
447: col = 5*(i - diag[d]);
448: if (col < 5*nblock) {
449: dvt = &dv[d][25*i];
450: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
451: sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
452: sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
453: sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
454: sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
455: sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
456: }
457: }
458: dvt = dd+inb2;
459: y[inb] = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
460: + dvt[20]*sum4;
461: y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
462: + dvt[21]*sum4;
463: y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
464: + dvt[22]*sum4;
465: y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
466: + dvt[23]*sum4;
467: y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
468: + dvt[24]*sum4;
469: inb -= 5; inb2 -= 25;
470: }
471: VecRestoreArray(xx,&x);
472: VecRestoreArray(yy,&y);
473: PetscLogFlops(2*a->nz - A->n);
474: return(0);
475: }
479: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
480: {
481: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
482: int i,d,loc,mainbd = a->mainbd;
483: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
484: int ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
485: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
486: PetscScalar work[25];
489: VecGetArray(xx,&x);
490: VecGetArray(yy,&y);
491: if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
492: PetscMemcpy(y,x,m*sizeof(PetscScalar));
494: /* forward solve the lower triangular part */
495: if (mainbd != 0) {
496: inb = 0;
497: for (i=0; i<mblock; i++) {
498: for (d=0; d<mainbd; d++) {
499: loc = i - diag[d];
500: if (loc >= 0) {
501: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
502: }
503: }
504: inb += bs;
505: }
506: }
507: /* backward solve the upper triangular part */
508: inb = bs*(mblock-1); inb2 = inb*bs;
509: for (i=mblock-1; i>=0; i--) {
510: for (d=mainbd+1; d<a->nd; d++) {
511: col = i - diag[d];
512: if (col < nblock) {
513: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
514: }
515: }
516: Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
517: PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
518: inb -= bs; inb2 -= bs2;
519: }
520: VecRestoreArray(xx,&x);
521: VecRestoreArray(yy,&y);
522: PetscLogFlops(2*a->nz - A->n);
523: return(0);
524: }