Actual source code: sbaij2.c
1: /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/inline/spops.h
5: #include src/inline/ilu.h
6: #include petscbt.h
7: #include src/mat/impls/sbaij/seq/sbaij.h
11: int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov)
12: {
13: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: int brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol,
15: start,end,*ai,*aj,bs,*nidx2;
16: PetscBT table;
17: PetscBT table0;
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = a->bs;
25: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
27: PetscBTCreate(mbs,table);
28: PetscMalloc((mbs+1)*sizeof(int),&nidx);
29: PetscMalloc((A->m+1)*sizeof(int),&nidx2);
30: PetscBTCreate(mbs,table0);
32: for (i=0; i<is_max; i++) { /* for each is */
33: isz = 0;
34: PetscBTMemzero(mbs,table);
35:
36: /* Extract the indices, assume there can be duplicate entries */
37: ISGetIndices(is[i],&idx);
38: ISGetLocalSize(is[i],&n);
40: /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
41: for (j=0; j<n ; ++j){
42: brow = idx[j]/bs; /* convert the indices into block indices */
43: if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
44: if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;}
45: }
46: ISRestoreIndices(is[i],&idx);
47: ISDestroy(is[i]);
48:
49: k = 0;
50: for (j=0; j<ov; j++){ /* for each overlap */
51: /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
52: PetscBTMemzero(mbs,table0);
53: for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);
55: n = isz; /* length of the updated is[i] */
56: for (brow=0; brow<mbs; brow++){
57: start = ai[brow]; end = ai[brow+1];
58: if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
59: for (l = start; l<end ; l++){
60: bcol = aj[l];
61: if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
62: }
63: k++;
64: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
65: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
66: for (l = start; l<end ; l++){
67: bcol = aj[l];
68: if (PetscBTLookup(table0,bcol)){
69: if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
70: break; /* for l = start; l<end ; l++) */
71: }
72: }
73: }
74: }
75: } /* for each overlap */
77: /* expand the Index Set */
78: for (j=0; j<isz; j++) {
79: for (k=0; k<bs; k++)
80: nidx2[j*bs+k] = nidx[j]*bs+k;
81: }
82: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
83: }
84: PetscBTDestroy(table);
85: PetscFree(nidx);
86: PetscFree(nidx2);
87: PetscBTDestroy(table0);
88: return(0);
89: }
93: int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
94: {
95: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
96: int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
97: int row,mat_i,*mat_j,tcol,*mat_ilen;
98: int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
99: int *aj = a->j,*ai = a->i;
100: MatScalar *mat_a;
101: Mat C;
102: PetscTruth flag;
105:
106: if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
107: ISSorted(iscol,(PetscTruth*)&i);
108: if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
110: ISGetIndices(isrow,&irow);
111: ISGetSize(isrow,&nrows);
112:
113: PetscMalloc((1+oldcols)*sizeof(int),&smap);
114: ssmap = smap;
115: PetscMalloc((1+nrows)*sizeof(int),&lens);
116: PetscMemzero(smap,oldcols*sizeof(int));
117: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
118: /* determine lens of each row */
119: for (i=0; i<nrows; i++) {
120: kstart = ai[irow[i]];
121: kend = kstart + a->ilen[irow[i]];
122: lens[i] = 0;
123: for (k=kstart; k<kend; k++) {
124: if (ssmap[aj[k]]) {
125: lens[i]++;
126: }
127: }
128: }
129: /* Create and fill new matrix */
130: if (scall == MAT_REUSE_MATRIX) {
131: c = (Mat_SeqSBAIJ *)((*B)->data);
133: if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
134: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);
135: if (flag == PETSC_FALSE) {
136: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
137: }
138: PetscMemzero(c->ilen,c->mbs*sizeof(int));
139: C = *B;
140: } else {
141: MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);
142: MatSetType(C,A->type_name);
143: MatSeqSBAIJSetPreallocation(C,bs,0,lens);
144: }
145: c = (Mat_SeqSBAIJ *)(C->data);
146: for (i=0; i<nrows; i++) {
147: row = irow[i];
148: kstart = ai[row];
149: kend = kstart + a->ilen[row];
150: mat_i = c->i[i];
151: mat_j = c->j + mat_i;
152: mat_a = c->a + mat_i*bs2;
153: mat_ilen = c->ilen + i;
154: for (k=kstart; k<kend; k++) {
155: if ((tcol=ssmap[a->j[k]])) {
156: *mat_j++ = tcol - 1;
157: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
158: mat_a += bs2;
159: (*mat_ilen)++;
160: }
161: }
162: }
163:
164: /* Free work space */
165: PetscFree(smap);
166: PetscFree(lens);
167: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
168: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
169:
170: ISRestoreIndices(isrow,&irow);
171: *B = C;
172: return(0);
173: }
177: int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
178: {
179: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
180: IS is1;
181: int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
184: if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
185:
186: ISGetIndices(isrow,&irow);
187: ISGetSize(isrow,&nrows);
188:
189: /* Verify if the indices corespond to each element in a block
190: and form the IS with compressed IS */
191: PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);
192: iary = vary + a->mbs;
193: PetscMemzero(vary,(a->mbs)*sizeof(int));
194: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
195:
196: count = 0;
197: for (i=0; i<a->mbs; i++) {
198: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
199: if (vary[i]==bs) iary[count++] = i;
200: }
201: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
202:
203: ISRestoreIndices(isrow,&irow);
204: PetscFree(vary);
206: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
207: ISDestroy(is1);
208: return(0);
209: }
213: int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
214: {
215: int ierr,i;
218: if (scall == MAT_INITIAL_MATRIX) {
219: PetscMalloc((n+1)*sizeof(Mat),B);
220: }
222: for (i=0; i<n; i++) {
223: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
224: }
225: return(0);
226: }
228: /* -------------------------------------------------------*/
229: /* Should check that shapes of vectors and matrices match */
230: /* -------------------------------------------------------*/
231: #include petscblaslapack.h
235: int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
236: {
237: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
238: PetscScalar *x,*z,*xb,x1,zero=0.0;
239: MatScalar *v;
240: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
243: VecSet(&zero,zz);
244: VecGetArray(xx,&x);
245: VecGetArray(zz,&z);
247: v = a->a;
248: xb = x;
249:
250: for (i=0; i<mbs; i++) {
251: n = ai[1] - ai[0]; /* length of i_th row of A */
252: x1 = xb[0];
253: ib = aj + *ai;
254: jmin = 0;
255: if (*ib == i) { /* (diag of A)*x */
256: z[i] += *v++ * x[*ib++];
257: jmin++;
258: }
259: for (j=jmin; j<n; j++) {
260: cval = *ib;
261: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
262: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
263: }
264: xb++; ai++;
265: }
267: VecRestoreArray(xx,&x);
268: VecRestoreArray(zz,&z);
269: PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */
270: return(0);
271: }
275: int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
276: {
277: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
278: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
279: MatScalar *v;
280: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
284: VecSet(&zero,zz);
285: VecGetArray(xx,&x);
286: VecGetArray(zz,&z);
287:
288: v = a->a;
289: xb = x;
291: for (i=0; i<mbs; i++) {
292: n = ai[1] - ai[0]; /* length of i_th block row of A */
293: x1 = xb[0]; x2 = xb[1];
294: ib = aj + *ai;
295: jmin = 0;
296: if (*ib == i){ /* (diag of A)*x */
297: z[2*i] += v[0]*x1 + v[2]*x2;
298: z[2*i+1] += v[2]*x1 + v[3]*x2;
299: v += 4; jmin++;
300: }
301: for (j=jmin; j<n; j++) {
302: /* (strict lower triangular part of A)*x */
303: cval = ib[j]*2;
304: z[cval] += v[0]*x1 + v[1]*x2;
305: z[cval+1] += v[2]*x1 + v[3]*x2;
306: /* (strict upper triangular part of A)*x */
307: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
308: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
309: v += 4;
310: }
311: xb +=2; ai++;
312: }
314: VecRestoreArray(xx,&x);
315: VecRestoreArray(zz,&z);
316: PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
317: return(0);
318: }
322: int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
323: {
324: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
325: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
326: MatScalar *v;
327: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
331: VecSet(&zero,zz);
332: VecGetArray(xx,&x);
333: VecGetArray(zz,&z);
334:
335: v = a->a;
336: xb = x;
338: for (i=0; i<mbs; i++) {
339: n = ai[1] - ai[0]; /* length of i_th block row of A */
340: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
341: ib = aj + *ai;
342: jmin = 0;
343: if (*ib == i){ /* (diag of A)*x */
344: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
345: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
346: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
347: v += 9; jmin++;
348: }
349: for (j=jmin; j<n; j++) {
350: /* (strict lower triangular part of A)*x */
351: cval = ib[j]*3;
352: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
353: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
354: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
355: /* (strict upper triangular part of A)*x */
356: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
357: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
358: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
359: v += 9;
360: }
361: xb +=3; ai++;
362: }
364: VecRestoreArray(xx,&x);
365: VecRestoreArray(zz,&z);
366: PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
367: return(0);
368: }
372: int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
373: {
374: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
375: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
376: MatScalar *v;
377: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
380: VecSet(&zero,zz);
381: VecGetArray(xx,&x);
382: VecGetArray(zz,&z);
383:
384: v = a->a;
385: xb = x;
387: for (i=0; i<mbs; i++) {
388: n = ai[1] - ai[0]; /* length of i_th block row of A */
389: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
390: ib = aj + *ai;
391: jmin = 0;
392: if (*ib == i){ /* (diag of A)*x */
393: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
394: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
395: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
396: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
397: v += 16; jmin++;
398: }
399: for (j=jmin; j<n; j++) {
400: /* (strict lower triangular part of A)*x */
401: cval = ib[j]*4;
402: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
403: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
404: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
405: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
406: /* (strict upper triangular part of A)*x */
407: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
408: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
409: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
410: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
411: v += 16;
412: }
413: xb +=4; ai++;
414: }
416: VecRestoreArray(xx,&x);
417: VecRestoreArray(zz,&z);
418: PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
419: return(0);
420: }
424: int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
425: {
426: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
427: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
428: MatScalar *v;
429: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
432: VecSet(&zero,zz);
433: VecGetArray(xx,&x);
434: VecGetArray(zz,&z);
435:
436: v = a->a;
437: xb = x;
439: for (i=0; i<mbs; i++) {
440: n = ai[1] - ai[0]; /* length of i_th block row of A */
441: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
442: ib = aj + *ai;
443: jmin = 0;
444: if (*ib == i){ /* (diag of A)*x */
445: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
446: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
447: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
448: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
449: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
450: v += 25; jmin++;
451: }
452: for (j=jmin; j<n; j++) {
453: /* (strict lower triangular part of A)*x */
454: cval = ib[j]*5;
455: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
456: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
457: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
458: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
459: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
460: /* (strict upper triangular part of A)*x */
461: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
462: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
463: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
464: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
465: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
466: v += 25;
467: }
468: xb +=5; ai++;
469: }
471: VecRestoreArray(xx,&x);
472: VecRestoreArray(zz,&z);
473: PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
474: return(0);
475: }
480: int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
481: {
482: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
483: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
484: MatScalar *v;
485: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
488: VecSet(&zero,zz);
489: VecGetArray(xx,&x);
490: VecGetArray(zz,&z);
491:
492: v = a->a;
493: xb = x;
495: for (i=0; i<mbs; i++) {
496: n = ai[1] - ai[0]; /* length of i_th block row of A */
497: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
498: ib = aj + *ai;
499: jmin = 0;
500: if (*ib == i){ /* (diag of A)*x */
501: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
502: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
503: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
504: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
505: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
506: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
507: v += 36; jmin++;
508: }
509: for (j=jmin; j<n; j++) {
510: /* (strict lower triangular part of A)*x */
511: cval = ib[j]*6;
512: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
513: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
514: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
515: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
516: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
517: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
518: /* (strict upper triangular part of A)*x */
519: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
520: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
521: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
522: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
523: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
524: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
525: v += 36;
526: }
527: xb +=6; ai++;
528: }
530: VecRestoreArray(xx,&x);
531: VecRestoreArray(zz,&z);
532: PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
533: return(0);
534: }
537: int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
538: {
539: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
540: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
541: MatScalar *v;
542: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
545: VecSet(&zero,zz);
546: VecGetArray(xx,&x);
547: VecGetArray(zz,&z);
548:
549: v = a->a;
550: xb = x;
552: for (i=0; i<mbs; i++) {
553: n = ai[1] - ai[0]; /* length of i_th block row of A */
554: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
555: ib = aj + *ai;
556: jmin = 0;
557: if (*ib == i){ /* (diag of A)*x */
558: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
559: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
560: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
561: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
562: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
563: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
564: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
565: v += 49; jmin++;
566: }
567: for (j=jmin; j<n; j++) {
568: /* (strict lower triangular part of A)*x */
569: cval = ib[j]*7;
570: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
571: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
572: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
573: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
574: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
575: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
576: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
577: /* (strict upper triangular part of A)*x */
578: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
579: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
580: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
581: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
582: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
583: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
584: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
585: v += 49;
586: }
587: xb +=7; ai++;
588: }
589: VecRestoreArray(xx,&x);
590: VecRestoreArray(zz,&z);
591: PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
592: return(0);
593: }
595: /*
596: This will not work with MatScalar == float because it calls the BLAS
597: */
600: int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
601: {
602: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
603: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
604: MatScalar *v;
605: int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
606: int ncols,k;
609: VecSet(&zero,zz);
610: VecGetArray(xx,&x); x_ptr=x;
611: VecGetArray(zz,&z); z_ptr=z;
613: aj = a->j;
614: v = a->a;
615: ii = a->i;
617: if (!a->mult_work) {
618: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
619: }
620: work = a->mult_work;
621:
622: for (i=0; i<mbs; i++) {
623: n = ii[1] - ii[0]; ncols = n*bs;
624: workt = work; idx=aj+ii[0];
626: /* upper triangular part */
627: for (j=0; j<n; j++) {
628: xb = x_ptr + bs*(*idx++);
629: for (k=0; k<bs; k++) workt[k] = xb[k];
630: workt += bs;
631: }
632: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
633: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
634:
635: /* strict lower triangular part */
636: idx = aj+ii[0];
637: if (*idx == i){
638: ncols -= bs; v += bs2; idx++; n--;
639: }
640:
641: if (ncols > 0){
642: workt = work;
643: PetscMemzero(workt,ncols*sizeof(PetscScalar));
644: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
645: for (j=0; j<n; j++) {
646: zb = z_ptr + bs*(*idx++);
647: for (k=0; k<bs; k++) zb[k] += workt[k] ;
648: workt += bs;
649: }
650: }
651: x += bs; v += n*bs2; z += bs; ii++;
652: }
653:
654: VecRestoreArray(xx,&x);
655: VecRestoreArray(zz,&z);
656: PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
657: return(0);
658: }
662: int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
663: {
664: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
665: PetscScalar *x,*y,*z,*xb,x1;
666: MatScalar *v;
667: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
670: VecGetArray(xx,&x);
671: if (yy != xx) {
672: VecGetArray(yy,&y);
673: } else {
674: y = x;
675: }
676: if (zz != yy) {
677: /* VecCopy(yy,zz); */
678: VecGetArray(zz,&z);
679: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
680: } else {
681: z = y;
682: }
684: v = a->a;
685: xb = x;
687: for (i=0; i<mbs; i++) {
688: n = ai[1] - ai[0]; /* length of i_th row of A */
689: x1 = xb[0];
690: ib = aj + *ai;
691: jmin = 0;
692: if (*ib == i) { /* (diag of A)*x */
693: z[i] += *v++ * x[*ib++]; jmin++;
694: }
695: for (j=jmin; j<n; j++) {
696: cval = *ib;
697: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
698: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
699: }
700: xb++; ai++;
701: }
703: VecRestoreArray(xx,&x);
704: if (yy != xx) VecRestoreArray(yy,&y);
705: if (zz != yy) VecRestoreArray(zz,&z);
706:
707: PetscLogFlops(2*(a->nz*2 - A->m));
708: return(0);
709: }
713: int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
714: {
715: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
716: PetscScalar *x,*y,*z,*xb,x1,x2;
717: MatScalar *v;
718: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
721: VecGetArray(xx,&x);
722: if (yy != xx) {
723: VecGetArray(yy,&y);
724: } else {
725: y = x;
726: }
727: if (zz != yy) {
728: /* VecCopy(yy,zz); */
729: VecGetArray(zz,&z);
730: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
731: } else {
732: z = y;
733: }
735: v = a->a;
736: xb = x;
738: for (i=0; i<mbs; i++) {
739: n = ai[1] - ai[0]; /* length of i_th block row of A */
740: x1 = xb[0]; x2 = xb[1];
741: ib = aj + *ai;
742: jmin = 0;
743: if (*ib == i){ /* (diag of A)*x */
744: z[2*i] += v[0]*x1 + v[2]*x2;
745: z[2*i+1] += v[2]*x1 + v[3]*x2;
746: v += 4; jmin++;
747: }
748: for (j=jmin; j<n; j++) {
749: /* (strict lower triangular part of A)*x */
750: cval = ib[j]*2;
751: z[cval] += v[0]*x1 + v[1]*x2;
752: z[cval+1] += v[2]*x1 + v[3]*x2;
753: /* (strict upper triangular part of A)*x */
754: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
755: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
756: v += 4;
757: }
758: xb +=2; ai++;
759: }
761: VecRestoreArray(xx,&x);
762: if (yy != xx) VecRestoreArray(yy,&y);
763: if (zz != yy) VecRestoreArray(zz,&z);
765: PetscLogFlops(4*(a->nz*2 - A->m));
766: return(0);
767: }
771: int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
772: {
773: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
774: PetscScalar *x,*y,*z,*xb,x1,x2,x3;
775: MatScalar *v;
776: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
779: VecGetArray(xx,&x);
780: if (yy != xx) {
781: VecGetArray(yy,&y);
782: } else {
783: y = x;
784: }
785: if (zz != yy) {
786: /* VecCopy(yy,zz); */
787: VecGetArray(zz,&z);
788: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
789: } else {
790: z = y;
791: }
793: v = a->a;
794: xb = x;
796: for (i=0; i<mbs; i++) {
797: n = ai[1] - ai[0]; /* length of i_th block row of A */
798: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
799: ib = aj + *ai;
800: jmin = 0;
801: if (*ib == i){ /* (diag of A)*x */
802: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
803: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
804: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
805: v += 9; jmin++;
806: }
807: for (j=jmin; j<n; j++) {
808: /* (strict lower triangular part of A)*x */
809: cval = ib[j]*3;
810: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
811: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
812: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
813: /* (strict upper triangular part of A)*x */
814: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
815: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
816: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
817: v += 9;
818: }
819: xb +=3; ai++;
820: }
822: VecRestoreArray(xx,&x);
823: if (yy != xx) VecRestoreArray(yy,&y);
824: if (zz != yy) VecRestoreArray(zz,&z);
826: PetscLogFlops(18*(a->nz*2 - A->m));
827: return(0);
828: }
832: int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
833: {
834: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
835: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4;
836: MatScalar *v;
837: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
840: VecGetArray(xx,&x);
841: if (yy != xx) {
842: VecGetArray(yy,&y);
843: } else {
844: y = x;
845: }
846: if (zz != yy) {
847: /* VecCopy(yy,zz); */
848: VecGetArray(zz,&z);
849: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
850: } else {
851: z = y;
852: }
854: v = a->a;
855: xb = x;
857: for (i=0; i<mbs; i++) {
858: n = ai[1] - ai[0]; /* length of i_th block row of A */
859: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
860: ib = aj + *ai;
861: jmin = 0;
862: if (*ib == i){ /* (diag of A)*x */
863: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
864: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
865: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
866: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
867: v += 16; jmin++;
868: }
869: for (j=jmin; j<n; j++) {
870: /* (strict lower triangular part of A)*x */
871: cval = ib[j]*4;
872: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
873: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
874: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
875: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
876: /* (strict upper triangular part of A)*x */
877: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
878: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
879: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
880: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
881: v += 16;
882: }
883: xb +=4; ai++;
884: }
886: VecRestoreArray(xx,&x);
887: if (yy != xx) VecRestoreArray(yy,&y);
888: if (zz != yy) VecRestoreArray(zz,&z);
890: PetscLogFlops(32*(a->nz*2 - A->m));
891: return(0);
892: }
896: int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
897: {
898: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
899: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5;
900: MatScalar *v;
901: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
904: VecGetArray(xx,&x);
905: if (yy != xx) {
906: VecGetArray(yy,&y);
907: } else {
908: y = x;
909: }
910: if (zz != yy) {
911: /* VecCopy(yy,zz); */
912: VecGetArray(zz,&z);
913: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
914: } else {
915: z = y;
916: }
918: v = a->a;
919: xb = x;
921: for (i=0; i<mbs; i++) {
922: n = ai[1] - ai[0]; /* length of i_th block row of A */
923: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
924: ib = aj + *ai;
925: jmin = 0;
926: if (*ib == i){ /* (diag of A)*x */
927: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
928: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
929: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
930: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
931: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
932: v += 25; jmin++;
933: }
934: for (j=jmin; j<n; j++) {
935: /* (strict lower triangular part of A)*x */
936: cval = ib[j]*5;
937: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
938: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
939: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
940: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
941: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
942: /* (strict upper triangular part of A)*x */
943: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
944: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
945: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
946: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
947: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
948: v += 25;
949: }
950: xb +=5; ai++;
951: }
953: VecRestoreArray(xx,&x);
954: if (yy != xx) VecRestoreArray(yy,&y);
955: if (zz != yy) VecRestoreArray(zz,&z);
957: PetscLogFlops(50*(a->nz*2 - A->m));
958: return(0);
959: }
962: int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
963: {
964: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
965: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
966: MatScalar *v;
967: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
970: VecGetArray(xx,&x);
971: if (yy != xx) {
972: VecGetArray(yy,&y);
973: } else {
974: y = x;
975: }
976: if (zz != yy) {
977: /* VecCopy(yy,zz); */
978: VecGetArray(zz,&z);
979: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
980: } else {
981: z = y;
982: }
984: v = a->a;
985: xb = x;
987: for (i=0; i<mbs; i++) {
988: n = ai[1] - ai[0]; /* length of i_th block row of A */
989: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
990: ib = aj + *ai;
991: jmin = 0;
992: if (*ib == i){ /* (diag of A)*x */
993: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
994: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
995: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
996: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
997: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
998: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
999: v += 36; jmin++;
1000: }
1001: for (j=jmin; j<n; j++) {
1002: /* (strict lower triangular part of A)*x */
1003: cval = ib[j]*6;
1004: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1005: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1006: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1007: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1008: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1009: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1010: /* (strict upper triangular part of A)*x */
1011: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1012: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1013: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1014: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1015: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1016: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1017: v += 36;
1018: }
1019: xb +=6; ai++;
1020: }
1022: VecRestoreArray(xx,&x);
1023: if (yy != xx) VecRestoreArray(yy,&y);
1024: if (zz != yy) VecRestoreArray(zz,&z);
1026: PetscLogFlops(72*(a->nz*2 - A->m));
1027: return(0);
1028: }
1032: int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1033: {
1034: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1035: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1036: MatScalar *v;
1037: int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
1040: VecGetArray(xx,&x);
1041: if (yy != xx) {
1042: VecGetArray(yy,&y);
1043: } else {
1044: y = x;
1045: }
1046: if (zz != yy) {
1047: /* VecCopy(yy,zz); */
1048: VecGetArray(zz,&z);
1049: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1050: } else {
1051: z = y;
1052: }
1054: v = a->a;
1055: xb = x;
1057: for (i=0; i<mbs; i++) {
1058: n = ai[1] - ai[0]; /* length of i_th block row of A */
1059: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1060: ib = aj + *ai;
1061: jmin = 0;
1062: if (*ib == i){ /* (diag of A)*x */
1063: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1064: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1065: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1066: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1067: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1068: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1069: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1070: v += 49; jmin++;
1071: }
1072: for (j=jmin; j<n; j++) {
1073: /* (strict lower triangular part of A)*x */
1074: cval = ib[j]*7;
1075: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1076: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1077: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1078: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1079: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1080: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1081: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1082: /* (strict upper triangular part of A)*x */
1083: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1084: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1085: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1086: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1087: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1088: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1089: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1090: v += 49;
1091: }
1092: xb +=7; ai++;
1093: }
1095: VecRestoreArray(xx,&x);
1096: if (yy != xx) VecRestoreArray(yy,&y);
1097: if (zz != yy) VecRestoreArray(zz,&z);
1099: PetscLogFlops(98*(a->nz*2 - A->m));
1100: return(0);
1101: }
1105: int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1106: {
1107: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1108: PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1109: MatScalar *v;
1110: int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1111: int ncols,k;
1114: VecGetArray(xx,&x); x_ptr=x;
1115: if (yy != xx) {
1116: VecGetArray(yy,&y);
1117: } else {
1118: y = x;
1119: }
1120: if (zz != yy) {
1121: /* VecCopy(yy,zz); */
1122: VecGetArray(zz,&z); z_ptr=z;
1123: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1124: } else {
1125: z = y;
1126: }
1128: aj = a->j;
1129: v = a->a;
1130: ii = a->i;
1132: if (!a->mult_work) {
1133: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1134: }
1135: work = a->mult_work;
1136:
1137:
1138: for (i=0; i<mbs; i++) {
1139: n = ii[1] - ii[0]; ncols = n*bs;
1140: workt = work; idx=aj+ii[0];
1142: /* upper triangular part */
1143: for (j=0; j<n; j++) {
1144: xb = x_ptr + bs*(*idx++);
1145: for (k=0; k<bs; k++) workt[k] = xb[k];
1146: workt += bs;
1147: }
1148: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1149: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1151: /* strict lower triangular part */
1152: idx = aj+ii[0];
1153: if (*idx == i){
1154: ncols -= bs; v += bs2; idx++; n--;
1155: }
1156: if (ncols > 0){
1157: workt = work;
1158: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1159: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1160: for (j=0; j<n; j++) {
1161: zb = z_ptr + bs*(*idx++);
1162: /* idx++; */
1163: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1164: workt += bs;
1165: }
1166: }
1168: x += bs; v += n*bs2; z += bs; ii++;
1169: }
1171: VecRestoreArray(xx,&x);
1172: if (yy != xx) VecRestoreArray(yy,&y);
1173: if (zz != yy) VecRestoreArray(zz,&z);
1175: PetscLogFlops(2*(a->nz*2 - A->m));
1176: return(0);
1177: }
1181: int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
1182: {
1186: MatMult(A,xx,zz);
1187: return(0);
1188: }
1192: int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1194: {
1198: MatMultAdd(A,xx,yy,zz);
1199: return(0);
1200: }
1204: int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
1205: {
1206: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1207: int one = 1,totalnz = a->bs2*a->nz;
1210: BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1211: PetscLogFlops(totalnz);
1212: return(0);
1213: }
1217: int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1218: {
1219: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1220: MatScalar *v = a->a;
1221: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1222: int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1223: int *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
1224:
1226: if (type == NORM_FROBENIUS) {
1227: for (k=0; k<mbs; k++){
1228: jmin = a->i[k]; jmax = a->i[k+1];
1229: col = aj + jmin;
1230: if (*col == k){ /* diagonal block */
1231: for (i=0; i<bs2; i++){
1232: #if defined(PETSC_USE_COMPLEX)
1233: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1234: #else
1235: sum_diag += (*v)*(*v); v++;
1236: #endif
1237: }
1238: jmin++;
1239: }
1240: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1241: for (i=0; i<bs2; i++){
1242: #if defined(PETSC_USE_COMPLEX)
1243: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1244: #else
1245: sum_off += (*v)*(*v); v++;
1246: #endif
1247: }
1248: }
1249: }
1250: *norm = sqrt(sum_diag + 2*sum_off);
1252: } else if (type == NORM_INFINITY) { /* maximum row sum */
1253: PetscMalloc(mbs*sizeof(int),&il);
1254: PetscMalloc(mbs*sizeof(int),&jl);
1255: PetscMalloc(bs*sizeof(PetscReal),&sum);
1256: for (i=0; i<mbs; i++) {
1257: jl[i] = mbs; il[0] = 0;
1258: }
1260: *norm = 0.0;
1261: for (k=0; k<mbs; k++) { /* k_th block row */
1262: for (j=0; j<bs; j++) sum[j]=0.0;
1264: /*-- col sum --*/
1265: i = jl[k]; /* first |A(i,k)| to be added */
1266: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1267: at step k */
1268: while (i<mbs){
1269: nexti = jl[i]; /* next block row to be added */
1270: ik = il[i]; /* block index of A(i,k) in the array a */
1271: for (j=0; j<bs; j++){
1272: v = a->a + ik*bs2 + j*bs;
1273: for (k1=0; k1<bs; k1++) {
1274: sum[j] += PetscAbsScalar(*v); v++;
1275: }
1276: }
1277: /* update il, jl */
1278: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1279: jmax = a->i[i+1];
1280: if (jmin < jmax){
1281: il[i] = jmin;
1282: j = a->j[jmin];
1283: jl[i] = jl[j]; jl[j]=i;
1284: }
1285: i = nexti;
1286: }
1287:
1288: /*-- row sum --*/
1289: jmin = a->i[k]; jmax = a->i[k+1];
1290: for (i=jmin; i<jmax; i++) {
1291: for (j=0; j<bs; j++){
1292: v = a->a + i*bs2 + j;
1293: for (k1=0; k1<bs; k1++){
1294: sum[j] += PetscAbsScalar(*v);
1295: v += bs;
1296: }
1297: }
1298: }
1299: /* add k_th block row to il, jl */
1300: col = aj+jmin;
1301: if (*col == k) jmin++;
1302: if (jmin < jmax){
1303: il[k] = jmin;
1304: j = a->j[jmin];
1305: jl[k] = jl[j]; jl[j] = k;
1306: }
1307: for (j=0; j<bs; j++){
1308: if (sum[j] > *norm) *norm = sum[j];
1309: }
1310: }
1311: PetscFree(il);
1312: PetscFree(jl);
1313: PetscFree(sum);
1314: } else {
1315: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1316: }
1317: return(0);
1318: }
1322: int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1323: {
1324: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1325: int ierr;
1329: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1330: if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1331: *flg = PETSC_FALSE;
1332: return(0);
1333: }
1334:
1335: /* if the a->i are the same */
1336: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);
1337: if (*flg == PETSC_FALSE) {
1338: return(0);
1339: }
1340:
1341: /* if a->j are the same */
1342: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);
1343: if (*flg == PETSC_FALSE) {
1344: return(0);
1345: }
1346: /* if a->a are the same */
1347: PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);
1348:
1349: return(0);
1350: }
1354: int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1355: {
1356: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1357: int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1358: PetscScalar *x,zero = 0.0;
1359: MatScalar *aa,*aa_j;
1362: bs = a->bs;
1363: if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1364:
1365: aa = a->a;
1366: ai = a->i;
1367: aj = a->j;
1368: ambs = a->mbs;
1369: bs2 = a->bs2;
1371: VecSet(&zero,v);
1372: VecGetArray(v,&x);
1373: VecGetLocalSize(v,&n);
1374: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1375: for (i=0; i<ambs; i++) {
1376: j=ai[i];
1377: if (aj[j] == i) { /* if this is a diagonal element */
1378: row = i*bs;
1379: aa_j = aa + j*bs2;
1380: if (A->factor && bs==1){
1381: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1382: } else {
1383: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1384: }
1385: }
1386: }
1387:
1388: VecRestoreArray(v,&x);
1389: return(0);
1390: }
1394: int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1395: {
1396: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1397: PetscScalar *l,*r,x,*li,*ri;
1398: MatScalar *aa,*v;
1399: int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1402: ai = a->i;
1403: aj = a->j;
1404: aa = a->a;
1405: m = A->m;
1406: bs = a->bs;
1407: mbs = a->mbs;
1408: bs2 = a->bs2;
1410: if (ll != rr) {
1411: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1412: }
1413: if (ll) {
1414: VecGetArray(ll,&l);
1415: VecGetLocalSize(ll,&lm);
1416: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1417: for (i=0; i<mbs; i++) { /* for each block row */
1418: M = ai[i+1] - ai[i];
1419: li = l + i*bs;
1420: v = aa + bs2*ai[i];
1421: for (j=0; j<M; j++) { /* for each block */
1422: for (k=0; k<bs2; k++) {
1423: (*v++) *= li[k%bs];
1424: }
1425: #ifdef CONT
1426: /* will be used to replace the above loop */
1427: ri = l + bs*aj[ai[i]+j];
1428: for (k=0; k<bs; k++) { /* column value */
1429: x = ri[k];
1430: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1431: }
1432: #endif
1434: }
1435: }
1436: VecRestoreArray(ll,&l);
1437: PetscLogFlops(2*a->nz);
1438: }
1439: /* will be deleted */
1440: if (rr) {
1441: VecGetArray(rr,&r);
1442: VecGetLocalSize(rr,&rn);
1443: if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1444: for (i=0; i<mbs; i++) { /* for each block row */
1445: M = ai[i+1] - ai[i];
1446: v = aa + bs2*ai[i];
1447: for (j=0; j<M; j++) { /* for each block */
1448: ri = r + bs*aj[ai[i]+j];
1449: for (k=0; k<bs; k++) {
1450: x = ri[k];
1451: for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1452: }
1453: }
1454: }
1455: VecRestoreArray(rr,&r);
1456: PetscLogFlops(a->nz);
1457: }
1458: return(0);
1459: }
1463: int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1464: {
1465: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1468: info->rows_global = (double)A->m;
1469: info->columns_global = (double)A->m;
1470: info->rows_local = (double)A->m;
1471: info->columns_local = (double)A->m;
1472: info->block_size = a->bs2;
1473: info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */
1474: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1475: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1476: info->assemblies = A->num_ass;
1477: info->mallocs = a->reallocs;
1478: info->memory = A->mem;
1479: if (A->factor) {
1480: info->fill_ratio_given = A->info.fill_ratio_given;
1481: info->fill_ratio_needed = A->info.fill_ratio_needed;
1482: info->factor_mallocs = A->info.factor_mallocs;
1483: } else {
1484: info->fill_ratio_given = 0;
1485: info->fill_ratio_needed = 0;
1486: info->factor_mallocs = 0;
1487: }
1488: return(0);
1489: }
1494: int MatZeroEntries_SeqSBAIJ(Mat A)
1495: {
1496: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1497: int ierr;
1500: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1501: return(0);
1502: }
1506: int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1507: {
1508: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1509: int ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1510: PetscReal atmp;
1511: MatScalar *aa;
1512: PetscScalar zero = 0.0,*x;
1513: int ncols,brow,bcol,krow,kcol;
1516: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1517: bs = a->bs;
1518: aa = a->a;
1519: ai = a->i;
1520: aj = a->j;
1521: mbs = a->mbs;
1523: VecSet(&zero,v);
1524: VecGetArray(v,&x);
1525: VecGetLocalSize(v,&n);
1526: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1527: for (i=0; i<mbs; i++) {
1528: ncols = ai[1] - ai[0]; ai++;
1529: brow = bs*i;
1530: for (j=0; j<ncols; j++){
1531: bcol = bs*(*aj);
1532: for (kcol=0; kcol<bs; kcol++){
1533: col = bcol + kcol; /* col index */
1534: for (krow=0; krow<bs; krow++){
1535: atmp = PetscAbsScalar(*aa); aa++;
1536: row = brow + krow; /* row index */
1537: /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1538: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1539: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1540: }
1541: }
1542: aj++;
1543: }
1544: }
1545: VecRestoreArray(v,&x);
1546: return(0);
1547: }