Actual source code: sbaij2.c

  1: /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/inline/spops.h
 5:  #include src/inline/ilu.h
 6:  #include petscbt.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h

 11: int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov)
 12: {
 13:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
 14:   int          brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol,
 15:                start,end,*ai,*aj,bs,*nidx2;
 16:   PetscBT      table;
 17:   PetscBT      table0;

 20:   mbs = a->mbs;
 21:   ai  = a->i;
 22:   aj  = a->j;
 23:   bs  = a->bs;

 25:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");

 27:   PetscBTCreate(mbs,table);
 28:   PetscMalloc((mbs+1)*sizeof(int),&nidx);
 29:   PetscMalloc((A->m+1)*sizeof(int),&nidx2);
 30:   PetscBTCreate(mbs,table0);

 32:   for (i=0; i<is_max; i++) { /* for each is */
 33:     isz  = 0;
 34:     PetscBTMemzero(mbs,table);
 35: 
 36:     /* Extract the indices, assume there can be duplicate entries */
 37:     ISGetIndices(is[i],&idx);
 38:     ISGetLocalSize(is[i],&n);

 40:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 41:     for (j=0; j<n ; ++j){
 42:       brow = idx[j]/bs; /* convert the indices into block indices */
 43:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 44:       if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;}
 45:     }
 46:     ISRestoreIndices(is[i],&idx);
 47:     ISDestroy(is[i]);
 48: 
 49:     k = 0;
 50:     for (j=0; j<ov; j++){ /* for each overlap */
 51:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 52:       PetscBTMemzero(mbs,table0);
 53:       for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);

 55:       n = isz;  /* length of the updated is[i] */
 56:       for (brow=0; brow<mbs; brow++){
 57:         start = ai[brow]; end   = ai[brow+1];
 58:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 59:           for (l = start; l<end ; l++){
 60:             bcol = aj[l];
 61:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 62:           }
 63:           k++;
 64:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 65:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 66:           for (l = start; l<end ; l++){
 67:             bcol = aj[l];
 68:             if (PetscBTLookup(table0,bcol)){
 69:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 70:               break; /* for l = start; l<end ; l++) */
 71:             }
 72:           }
 73:         }
 74:       }
 75:     } /* for each overlap */

 77:     /* expand the Index Set */
 78:     for (j=0; j<isz; j++) {
 79:       for (k=0; k<bs; k++)
 80:         nidx2[j*bs+k] = nidx[j]*bs+k;
 81:     }
 82:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 83:   }
 84:   PetscBTDestroy(table);
 85:   PetscFree(nidx);
 86:   PetscFree(nidx2);
 87:   PetscBTDestroy(table0);
 88:   return(0);
 89: }

 93: int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 94: {
 95:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 96:   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
 97:   int          row,mat_i,*mat_j,tcol,*mat_ilen;
 98:   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
 99:   int          *aj = a->j,*ai = a->i;
100:   MatScalar    *mat_a;
101:   Mat          C;
102:   PetscTruth   flag;

105: 
106:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
107:   ISSorted(iscol,(PetscTruth*)&i);
108:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

110:   ISGetIndices(isrow,&irow);
111:   ISGetSize(isrow,&nrows);
112: 
113:   PetscMalloc((1+oldcols)*sizeof(int),&smap);
114:   ssmap = smap;
115:   PetscMalloc((1+nrows)*sizeof(int),&lens);
116:   PetscMemzero(smap,oldcols*sizeof(int));
117:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
118:   /* determine lens of each row */
119:   for (i=0; i<nrows; i++) {
120:     kstart  = ai[irow[i]];
121:     kend    = kstart + a->ilen[irow[i]];
122:     lens[i] = 0;
123:       for (k=kstart; k<kend; k++) {
124:         if (ssmap[aj[k]]) {
125:           lens[i]++;
126:         }
127:       }
128:     }
129:   /* Create and fill new matrix */
130:   if (scall == MAT_REUSE_MATRIX) {
131:     c = (Mat_SeqSBAIJ *)((*B)->data);

133:     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
134:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);
135:     if (flag == PETSC_FALSE) {
136:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
137:     }
138:     PetscMemzero(c->ilen,c->mbs*sizeof(int));
139:     C = *B;
140:   } else {
141:     MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);
142:     MatSetType(C,A->type_name);
143:     MatSeqSBAIJSetPreallocation(C,bs,0,lens);
144:   }
145:   c = (Mat_SeqSBAIJ *)(C->data);
146:   for (i=0; i<nrows; i++) {
147:     row    = irow[i];
148:     kstart = ai[row];
149:     kend   = kstart + a->ilen[row];
150:     mat_i  = c->i[i];
151:     mat_j  = c->j + mat_i;
152:     mat_a  = c->a + mat_i*bs2;
153:     mat_ilen = c->ilen + i;
154:     for (k=kstart; k<kend; k++) {
155:       if ((tcol=ssmap[a->j[k]])) {
156:         *mat_j++ = tcol - 1;
157:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
158:         mat_a   += bs2;
159:         (*mat_ilen)++;
160:       }
161:     }
162:   }
163: 
164:   /* Free work space */
165:   PetscFree(smap);
166:   PetscFree(lens);
167:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
168:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
169: 
170:   ISRestoreIndices(isrow,&irow);
171:   *B = C;
172:   return(0);
173: }

177: int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
178: {
179:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
180:   IS          is1;
181:   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;

184:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
185: 
186:   ISGetIndices(isrow,&irow);
187:   ISGetSize(isrow,&nrows);
188: 
189:   /* Verify if the indices corespond to each element in a block 
190:    and form the IS with compressed IS */
191:   PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);
192:   iary = vary + a->mbs;
193:   PetscMemzero(vary,(a->mbs)*sizeof(int));
194:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
195: 
196:   count = 0;
197:   for (i=0; i<a->mbs; i++) {
198:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
199:     if (vary[i]==bs) iary[count++] = i;
200:   }
201:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
202: 
203:   ISRestoreIndices(isrow,&irow);
204:   PetscFree(vary);

206:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
207:   ISDestroy(is1);
208:   return(0);
209: }

213: int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
214: {
215:   int ierr,i;

218:   if (scall == MAT_INITIAL_MATRIX) {
219:     PetscMalloc((n+1)*sizeof(Mat),B);
220:   }

222:   for (i=0; i<n; i++) {
223:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
224:   }
225:   return(0);
226: }

228: /* -------------------------------------------------------*/
229: /* Should check that shapes of vectors and matrices match */
230: /* -------------------------------------------------------*/
231:  #include petscblaslapack.h

235: int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
236: {
237:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
238:   PetscScalar     *x,*z,*xb,x1,zero=0.0;
239:   MatScalar       *v;
240:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

243:   VecSet(&zero,zz);
244:   VecGetArray(xx,&x);
245:   VecGetArray(zz,&z);

247:   v  = a->a;
248:   xb = x;
249: 
250:   for (i=0; i<mbs; i++) {
251:     n  = ai[1] - ai[0];  /* length of i_th row of A */
252:     x1 = xb[0];
253:     ib = aj + *ai;
254:     jmin = 0;
255:     if (*ib == i) {      /* (diag of A)*x */
256:       z[i] += *v++ * x[*ib++];
257:       jmin++;
258:     }
259:     for (j=jmin; j<n; j++) {
260:       cval    = *ib;
261:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
262:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
263:     }
264:     xb++; ai++;
265:   }

267:   VecRestoreArray(xx,&x);
268:   VecRestoreArray(zz,&z);
269:   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
270:   return(0);
271: }

275: int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
276: {
277:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
278:   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
279:   MatScalar        *v;
280:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


284:   VecSet(&zero,zz);
285:   VecGetArray(xx,&x);
286:   VecGetArray(zz,&z);
287: 
288:   v     = a->a;
289:   xb = x;

291:   for (i=0; i<mbs; i++) {
292:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
293:     x1 = xb[0]; x2 = xb[1];
294:     ib = aj + *ai;
295:     jmin = 0;
296:     if (*ib == i){     /* (diag of A)*x */
297:       z[2*i]   += v[0]*x1 + v[2]*x2;
298:       z[2*i+1] += v[2]*x1 + v[3]*x2;
299:       v += 4; jmin++;
300:     }
301:     for (j=jmin; j<n; j++) {
302:       /* (strict lower triangular part of A)*x  */
303:       cval       = ib[j]*2;
304:       z[cval]     += v[0]*x1 + v[1]*x2;
305:       z[cval+1]   += v[2]*x1 + v[3]*x2;
306:       /* (strict upper triangular part of A)*x  */
307:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
308:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
309:       v  += 4;
310:     }
311:     xb +=2; ai++;
312:   }

314:   VecRestoreArray(xx,&x);
315:   VecRestoreArray(zz,&z);
316:   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
317:   return(0);
318: }

322: int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
323: {
324:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
325:   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
326:   MatScalar     *v;
327:   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


331:   VecSet(&zero,zz);
332:   VecGetArray(xx,&x);
333:   VecGetArray(zz,&z);
334: 
335:   v     = a->a;
336:   xb = x;

338:   for (i=0; i<mbs; i++) {
339:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
340:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
341:     ib = aj + *ai;
342:     jmin = 0;
343:     if (*ib == i){     /* (diag of A)*x */
344:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
345:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
346:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
347:       v += 9; jmin++;
348:     }
349:     for (j=jmin; j<n; j++) {
350:       /* (strict lower triangular part of A)*x  */
351:       cval       = ib[j]*3;
352:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
353:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
354:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
355:       /* (strict upper triangular part of A)*x  */
356:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
357:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
358:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
359:       v  += 9;
360:     }
361:     xb +=3; ai++;
362:   }

364:   VecRestoreArray(xx,&x);
365:   VecRestoreArray(zz,&z);
366:   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
367:   return(0);
368: }

372: int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
373: {
374:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
375:   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
376:   MatScalar        *v;
377:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

380:   VecSet(&zero,zz);
381:   VecGetArray(xx,&x);
382:   VecGetArray(zz,&z);
383: 
384:   v     = a->a;
385:   xb = x;

387:   for (i=0; i<mbs; i++) {
388:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
389:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
390:     ib = aj + *ai;
391:     jmin = 0;
392:     if (*ib == i){     /* (diag of A)*x */
393:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
394:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
395:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
396:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
397:       v += 16; jmin++;
398:     }
399:     for (j=jmin; j<n; j++) {
400:       /* (strict lower triangular part of A)*x  */
401:       cval       = ib[j]*4;
402:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
403:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
404:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
405:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
406:       /* (strict upper triangular part of A)*x  */
407:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
408:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
409:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
410:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
411:       v  += 16;
412:     }
413:     xb +=4; ai++;
414:   }

416:   VecRestoreArray(xx,&x);
417:   VecRestoreArray(zz,&z);
418:   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
419:   return(0);
420: }

424: int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
425: {
426:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
427:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
428:   MatScalar       *v;
429:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

432:   VecSet(&zero,zz);
433:   VecGetArray(xx,&x);
434:   VecGetArray(zz,&z);
435: 
436:   v     = a->a;
437:   xb = x;

439:   for (i=0; i<mbs; i++) {
440:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
441:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
442:     ib = aj + *ai;
443:     jmin = 0;
444:     if (*ib == i){      /* (diag of A)*x */
445:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
446:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
447:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
448:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
449:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
450:       v += 25; jmin++;
451:     }
452:     for (j=jmin; j<n; j++) {
453:       /* (strict lower triangular part of A)*x  */
454:       cval       = ib[j]*5;
455:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
456:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
457:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
458:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
459:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
460:       /* (strict upper triangular part of A)*x  */
461:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
462:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
463:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
464:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
465:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
466:       v  += 25;
467:     }
468:     xb +=5; ai++;
469:   }

471:   VecRestoreArray(xx,&x);
472:   VecRestoreArray(zz,&z);
473:   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
474:   return(0);
475: }


480: int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
481: {
482:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
483:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
484:   MatScalar       *v;
485:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

488:   VecSet(&zero,zz);
489:   VecGetArray(xx,&x);
490:   VecGetArray(zz,&z);
491: 
492:   v     = a->a;
493:   xb = x;

495:   for (i=0; i<mbs; i++) {
496:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
497:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
498:     ib = aj + *ai;
499:     jmin = 0;
500:     if (*ib == i){      /* (diag of A)*x */
501:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
502:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
503:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
504:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
505:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
506:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
507:       v += 36; jmin++;
508:     }
509:     for (j=jmin; j<n; j++) {
510:       /* (strict lower triangular part of A)*x  */
511:       cval       = ib[j]*6;
512:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
513:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
514:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
515:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
516:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
517:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
518:       /* (strict upper triangular part of A)*x  */
519:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
520:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
521:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
522:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
523:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
524:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
525:       v  += 36;
526:     }
527:     xb +=6; ai++;
528:   }

530:   VecRestoreArray(xx,&x);
531:   VecRestoreArray(zz,&z);
532:   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
533:   return(0);
534: }
537: int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
538: {
539:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
540:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
541:   MatScalar       *v;
542:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

545:   VecSet(&zero,zz);
546:   VecGetArray(xx,&x);
547:   VecGetArray(zz,&z);
548: 
549:   v     = a->a;
550:   xb = x;

552:   for (i=0; i<mbs; i++) {
553:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
554:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
555:     ib = aj + *ai;
556:     jmin = 0;
557:     if (*ib == i){      /* (diag of A)*x */
558:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
559:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
560:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
561:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
562:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
563:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
564:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
565:       v += 49; jmin++;
566:     }
567:     for (j=jmin; j<n; j++) {
568:       /* (strict lower triangular part of A)*x  */
569:       cval       = ib[j]*7;
570:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
571:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
572:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
573:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
574:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
575:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
576:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
577:       /* (strict upper triangular part of A)*x  */
578:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
579:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
580:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
581:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
582:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
583:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
584:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
585:       v  += 49;
586:     }
587:     xb +=7; ai++;
588:   }
589:   VecRestoreArray(xx,&x);
590:   VecRestoreArray(zz,&z);
591:   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
592:   return(0);
593: }

595: /*
596:     This will not work with MatScalar == float because it calls the BLAS
597: */
600: int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
601: {
602:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
603:   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
604:   MatScalar       *v;
605:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
606:   int             ncols,k;

609:   VecSet(&zero,zz);
610:   VecGetArray(xx,&x); x_ptr=x;
611:   VecGetArray(zz,&z); z_ptr=z;

613:   aj   = a->j;
614:   v    = a->a;
615:   ii   = a->i;

617:   if (!a->mult_work) {
618:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
619:   }
620:   work = a->mult_work;
621: 
622:   for (i=0; i<mbs; i++) {
623:     n     = ii[1] - ii[0]; ncols = n*bs;
624:     workt = work; idx=aj+ii[0];

626:     /* upper triangular part */
627:     for (j=0; j<n; j++) {
628:       xb = x_ptr + bs*(*idx++);
629:       for (k=0; k<bs; k++) workt[k] = xb[k];
630:       workt += bs;
631:     }
632:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
633:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
634: 
635:     /* strict lower triangular part */
636:     idx = aj+ii[0];
637:     if (*idx == i){
638:       ncols -= bs; v += bs2; idx++; n--;
639:     }
640: 
641:     if (ncols > 0){
642:       workt = work;
643:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
644:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
645:       for (j=0; j<n; j++) {
646:         zb = z_ptr + bs*(*idx++);
647:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
648:         workt += bs;
649:       }
650:     }
651:     x += bs; v += n*bs2; z += bs; ii++;
652:   }
653: 
654:   VecRestoreArray(xx,&x);
655:   VecRestoreArray(zz,&z);
656:   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
657:   return(0);
658: }

662: int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
663: {
664:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
665:   PetscScalar     *x,*y,*z,*xb,x1;
666:   MatScalar       *v;
667:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

670:   VecGetArray(xx,&x);
671:   if (yy != xx) {
672:     VecGetArray(yy,&y);
673:   } else {
674:     y = x;
675:   }
676:   if (zz != yy) {
677:     /* VecCopy(yy,zz); */
678:     VecGetArray(zz,&z);
679:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
680:   } else {
681:     z = y;
682:   }

684:   v  = a->a;
685:   xb = x;

687:   for (i=0; i<mbs; i++) {
688:     n  = ai[1] - ai[0];  /* length of i_th row of A */
689:     x1 = xb[0];
690:     ib = aj + *ai;
691:     jmin = 0;
692:     if (*ib == i) {            /* (diag of A)*x */
693:       z[i] += *v++ * x[*ib++]; jmin++;
694:     }
695:     for (j=jmin; j<n; j++) {
696:       cval    = *ib;
697:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
698:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
699:     }
700:     xb++; ai++;
701:   }

703:   VecRestoreArray(xx,&x);
704:   if (yy != xx) VecRestoreArray(yy,&y);
705:   if (zz != yy) VecRestoreArray(zz,&z);
706: 
707:   PetscLogFlops(2*(a->nz*2 - A->m));
708:   return(0);
709: }

713: int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
714: {
715:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
716:   PetscScalar     *x,*y,*z,*xb,x1,x2;
717:   MatScalar       *v;
718:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

721:   VecGetArray(xx,&x);
722:   if (yy != xx) {
723:     VecGetArray(yy,&y);
724:   } else {
725:     y = x;
726:   }
727:   if (zz != yy) {
728:     /* VecCopy(yy,zz); */
729:     VecGetArray(zz,&z);
730:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
731:   } else {
732:     z = y;
733:   }

735:   v     = a->a;
736:   xb = x;

738:   for (i=0; i<mbs; i++) {
739:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
740:     x1 = xb[0]; x2 = xb[1];
741:     ib = aj + *ai;
742:     jmin = 0;
743:     if (*ib == i){      /* (diag of A)*x */
744:       z[2*i]   += v[0]*x1 + v[2]*x2;
745:       z[2*i+1] += v[2]*x1 + v[3]*x2;
746:       v += 4; jmin++;
747:     }
748:     for (j=jmin; j<n; j++) {
749:       /* (strict lower triangular part of A)*x  */
750:       cval       = ib[j]*2;
751:       z[cval]     += v[0]*x1 + v[1]*x2;
752:       z[cval+1]   += v[2]*x1 + v[3]*x2;
753:       /* (strict upper triangular part of A)*x  */
754:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
755:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
756:       v  += 4;
757:     }
758:     xb +=2; ai++;
759:   }

761:   VecRestoreArray(xx,&x);
762:   if (yy != xx) VecRestoreArray(yy,&y);
763:   if (zz != yy) VecRestoreArray(zz,&z);

765:   PetscLogFlops(4*(a->nz*2 - A->m));
766:   return(0);
767: }

771: int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
772: {
773:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
774:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
775:   MatScalar       *v;
776:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

779:   VecGetArray(xx,&x);
780:   if (yy != xx) {
781:     VecGetArray(yy,&y);
782:   } else {
783:     y = x;
784:   }
785:   if (zz != yy) {
786:     /* VecCopy(yy,zz); */
787:     VecGetArray(zz,&z);
788:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
789:   } else {
790:     z = y;
791:   }

793:   v     = a->a;
794:   xb = x;

796:   for (i=0; i<mbs; i++) {
797:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
798:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
799:     ib = aj + *ai;
800:     jmin = 0;
801:     if (*ib == i){     /* (diag of A)*x */
802:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
803:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
804:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
805:      v += 9; jmin++;
806:     }
807:     for (j=jmin; j<n; j++) {
808:       /* (strict lower triangular part of A)*x  */
809:       cval       = ib[j]*3;
810:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
811:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
812:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
813:       /* (strict upper triangular part of A)*x  */
814:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
815:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
816:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
817:       v  += 9;
818:     }
819:     xb +=3; ai++;
820:   }

822:   VecRestoreArray(xx,&x);
823:   if (yy != xx) VecRestoreArray(yy,&y);
824:   if (zz != yy) VecRestoreArray(zz,&z);

826:   PetscLogFlops(18*(a->nz*2 - A->m));
827:   return(0);
828: }

832: int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
833: {
834:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
835:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
836:   MatScalar       *v;
837:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

840:   VecGetArray(xx,&x);
841:   if (yy != xx) {
842:     VecGetArray(yy,&y);
843:   } else {
844:     y = x;
845:   }
846:   if (zz != yy) {
847:     /* VecCopy(yy,zz); */
848:     VecGetArray(zz,&z);
849:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
850:   } else {
851:     z = y;
852:   }

854:   v     = a->a;
855:   xb = x;

857:   for (i=0; i<mbs; i++) {
858:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
859:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
860:     ib = aj + *ai;
861:     jmin = 0;
862:     if (*ib == i){      /* (diag of A)*x */
863:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
864:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
865:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
866:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
867:       v += 16; jmin++;
868:     }
869:     for (j=jmin; j<n; j++) {
870:       /* (strict lower triangular part of A)*x  */
871:       cval       = ib[j]*4;
872:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
873:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
874:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
875:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
876:       /* (strict upper triangular part of A)*x  */
877:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
878:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
879:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
880:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
881:       v  += 16;
882:     }
883:     xb +=4; ai++;
884:   }

886:   VecRestoreArray(xx,&x);
887:   if (yy != xx) VecRestoreArray(yy,&y);
888:   if (zz != yy) VecRestoreArray(zz,&z);

890:   PetscLogFlops(32*(a->nz*2 - A->m));
891:   return(0);
892: }

896: int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
897: {
898:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
899:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
900:   MatScalar       *v;
901:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

904:   VecGetArray(xx,&x);
905:   if (yy != xx) {
906:     VecGetArray(yy,&y);
907:   } else {
908:     y = x;
909:   }
910:   if (zz != yy) {
911:     /* VecCopy(yy,zz); */
912:     VecGetArray(zz,&z);
913:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
914:   } else {
915:     z = y;
916:   }

918:   v     = a->a;
919:   xb = x;

921:   for (i=0; i<mbs; i++) {
922:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
923:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
924:     ib = aj + *ai;
925:     jmin = 0;
926:     if (*ib == i){      /* (diag of A)*x */
927:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
928:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
929:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
930:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
931:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
932:       v += 25; jmin++;
933:     }
934:     for (j=jmin; j<n; j++) {
935:       /* (strict lower triangular part of A)*x  */
936:       cval       = ib[j]*5;
937:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
938:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
939:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
940:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
941:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
942:       /* (strict upper triangular part of A)*x  */
943:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
944:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
945:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
946:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
947:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
948:       v  += 25;
949:     }
950:     xb +=5; ai++;
951:   }

953:   VecRestoreArray(xx,&x);
954:   if (yy != xx) VecRestoreArray(yy,&y);
955:   if (zz != yy) VecRestoreArray(zz,&z);

957:   PetscLogFlops(50*(a->nz*2 - A->m));
958:   return(0);
959: }
962: int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
963: {
964:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
965:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
966:   MatScalar       *v;
967:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

970:   VecGetArray(xx,&x);
971:   if (yy != xx) {
972:     VecGetArray(yy,&y);
973:   } else {
974:     y = x;
975:   }
976:   if (zz != yy) {
977:     /* VecCopy(yy,zz); */
978:     VecGetArray(zz,&z);
979:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
980:   } else {
981:     z = y;
982:   }

984:   v     = a->a;
985:   xb = x;

987:   for (i=0; i<mbs; i++) {
988:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
989:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
990:     ib = aj + *ai;
991:     jmin = 0;
992:     if (*ib == i){     /* (diag of A)*x */
993:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
994:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
995:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
996:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
997:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
998:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
999:       v += 36; jmin++;
1000:     }
1001:     for (j=jmin; j<n; j++) {
1002:       /* (strict lower triangular part of A)*x  */
1003:       cval       = ib[j]*6;
1004:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1005:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1006:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1007:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1008:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1009:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1010:       /* (strict upper triangular part of A)*x  */
1011:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1012:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1013:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1014:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1015:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1016:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1017:       v  += 36;
1018:     }
1019:     xb +=6; ai++;
1020:   }

1022:   VecRestoreArray(xx,&x);
1023:   if (yy != xx) VecRestoreArray(yy,&y);
1024:   if (zz != yy) VecRestoreArray(zz,&z);

1026:   PetscLogFlops(72*(a->nz*2 - A->m));
1027:   return(0);
1028: }

1032: int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1033: {
1034:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1035:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1036:   MatScalar       *v;
1037:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

1040:   VecGetArray(xx,&x);
1041:   if (yy != xx) {
1042:     VecGetArray(yy,&y);
1043:   } else {
1044:     y = x;
1045:   }
1046:   if (zz != yy) {
1047:     /* VecCopy(yy,zz); */
1048:     VecGetArray(zz,&z);
1049:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1050:   } else {
1051:     z = y;
1052:   }

1054:   v     = a->a;
1055:   xb = x;

1057:   for (i=0; i<mbs; i++) {
1058:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1059:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1060:     ib = aj + *ai;
1061:     jmin = 0;
1062:     if (*ib == i){     /* (diag of A)*x */
1063:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1064:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1065:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1066:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1067:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1068:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1069:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1070:       v += 49; jmin++;
1071:     }
1072:     for (j=jmin; j<n; j++) {
1073:       /* (strict lower triangular part of A)*x  */
1074:       cval       = ib[j]*7;
1075:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1076:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1077:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1078:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1079:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1080:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1081:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1082:       /* (strict upper triangular part of A)*x  */
1083:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1084:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1085:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1086:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1087:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1088:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1089:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1090:       v  += 49;
1091:     }
1092:     xb +=7; ai++;
1093:   }

1095:   VecRestoreArray(xx,&x);
1096:   if (yy != xx) VecRestoreArray(yy,&y);
1097:   if (zz != yy) VecRestoreArray(zz,&z);

1099:   PetscLogFlops(98*(a->nz*2 - A->m));
1100:   return(0);
1101: }

1105: int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1106: {
1107:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1108:   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1109:   MatScalar       *v;
1110:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1111:   int             ncols,k;

1114:   VecGetArray(xx,&x); x_ptr=x;
1115:   if (yy != xx) {
1116:     VecGetArray(yy,&y);
1117:   } else {
1118:     y = x;
1119:   }
1120:   if (zz != yy) {
1121:     /* VecCopy(yy,zz); */
1122:     VecGetArray(zz,&z); z_ptr=z;
1123:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1124:   } else {
1125:     z = y;
1126:   }

1128:   aj   = a->j;
1129:   v    = a->a;
1130:   ii   = a->i;

1132:   if (!a->mult_work) {
1133:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1134:   }
1135:   work = a->mult_work;
1136: 
1137: 
1138:   for (i=0; i<mbs; i++) {
1139:     n     = ii[1] - ii[0]; ncols = n*bs;
1140:     workt = work; idx=aj+ii[0];

1142:     /* upper triangular part */
1143:     for (j=0; j<n; j++) {
1144:       xb = x_ptr + bs*(*idx++);
1145:       for (k=0; k<bs; k++) workt[k] = xb[k];
1146:       workt += bs;
1147:     }
1148:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1149:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1151:     /* strict lower triangular part */
1152:     idx = aj+ii[0];
1153:     if (*idx == i){
1154:       ncols -= bs; v += bs2; idx++; n--;
1155:     }
1156:     if (ncols > 0){
1157:       workt = work;
1158:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1159:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1160:       for (j=0; j<n; j++) {
1161:         zb = z_ptr + bs*(*idx++);
1162:         /* idx++; */
1163:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1164:         workt += bs;
1165:       }
1166:     }

1168:     x += bs; v += n*bs2; z += bs; ii++;
1169:   }

1171:   VecRestoreArray(xx,&x);
1172:   if (yy != xx) VecRestoreArray(yy,&y);
1173:   if (zz != yy) VecRestoreArray(zz,&z);

1175:   PetscLogFlops(2*(a->nz*2 - A->m));
1176:   return(0);
1177: }

1181: int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
1182: {

1186:   MatMult(A,xx,zz);
1187:   return(0);
1188: }

1192: int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)

1194: {

1198:   MatMultAdd(A,xx,yy,zz);
1199:   return(0);
1200: }

1204: int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
1205: {
1206:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1207:   int         one = 1,totalnz = a->bs2*a->nz;

1210:   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1211:   PetscLogFlops(totalnz);
1212:   return(0);
1213: }

1217: int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1218: {
1219:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1220:   MatScalar   *v = a->a;
1221:   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1222:   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1223:   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
1224: 
1226:   if (type == NORM_FROBENIUS) {
1227:     for (k=0; k<mbs; k++){
1228:       jmin = a->i[k]; jmax = a->i[k+1];
1229:       col  = aj + jmin;
1230:       if (*col == k){         /* diagonal block */
1231:         for (i=0; i<bs2; i++){
1232: #if defined(PETSC_USE_COMPLEX)
1233:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1234: #else
1235:           sum_diag += (*v)*(*v); v++;
1236: #endif
1237:         }
1238:         jmin++;
1239:       }
1240:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1241:         for (i=0; i<bs2; i++){
1242: #if defined(PETSC_USE_COMPLEX)
1243:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1244: #else
1245:           sum_off += (*v)*(*v); v++;
1246: #endif  
1247:         }
1248:       }
1249:     }
1250:     *norm = sqrt(sum_diag + 2*sum_off);

1252:   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1253:     PetscMalloc(mbs*sizeof(int),&il);
1254:     PetscMalloc(mbs*sizeof(int),&jl);
1255:     PetscMalloc(bs*sizeof(PetscReal),&sum);
1256:     for (i=0; i<mbs; i++) {
1257:       jl[i] = mbs; il[0] = 0;
1258:     }

1260:     *norm = 0.0;
1261:     for (k=0; k<mbs; k++) { /* k_th block row */
1262:       for (j=0; j<bs; j++) sum[j]=0.0;

1264:       /*-- col sum --*/
1265:       i = jl[k]; /* first |A(i,k)| to be added */
1266:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1267:                   at step k */
1268:       while (i<mbs){
1269:         nexti = jl[i];  /* next block row to be added */
1270:         ik    = il[i];  /* block index of A(i,k) in the array a */
1271:         for (j=0; j<bs; j++){
1272:           v = a->a + ik*bs2 + j*bs;
1273:           for (k1=0; k1<bs; k1++) {
1274:             sum[j] += PetscAbsScalar(*v); v++;
1275:           }
1276:         }
1277:         /* update il, jl */
1278:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1279:         jmax = a->i[i+1];
1280:         if (jmin < jmax){
1281:           il[i] = jmin;
1282:           j   = a->j[jmin];
1283:           jl[i] = jl[j]; jl[j]=i;
1284:         }
1285:         i = nexti;
1286:       }
1287: 
1288:       /*-- row sum --*/
1289:       jmin = a->i[k]; jmax = a->i[k+1];
1290:       for (i=jmin; i<jmax; i++) {
1291:         for (j=0; j<bs; j++){
1292:           v = a->a + i*bs2 + j;
1293:           for (k1=0; k1<bs; k1++){
1294:             sum[j] += PetscAbsScalar(*v);
1295:             v   += bs;
1296:           }
1297:         }
1298:       }
1299:       /* add k_th block row to il, jl */
1300:       col = aj+jmin;
1301:       if (*col == k) jmin++;
1302:       if (jmin < jmax){
1303:         il[k] = jmin;
1304:         j   = a->j[jmin];
1305:         jl[k] = jl[j]; jl[j] = k;
1306:       }
1307:       for (j=0; j<bs; j++){
1308:         if (sum[j] > *norm) *norm = sum[j];
1309:       }
1310:     }
1311:     PetscFree(il);
1312:     PetscFree(jl);
1313:     PetscFree(sum);
1314:   } else {
1315:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1316:   }
1317:   return(0);
1318: }

1322: int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1323: {
1324:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1325:   int          ierr;


1329:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1330:   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1331:     *flg = PETSC_FALSE;
1332:     return(0);
1333:   }
1334: 
1335:   /* if the a->i are the same */
1336:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);
1337:   if (*flg == PETSC_FALSE) {
1338:     return(0);
1339:   }
1340: 
1341:   /* if a->j are the same */
1342:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);
1343:   if (*flg == PETSC_FALSE) {
1344:     return(0);
1345:   }
1346:   /* if a->a are the same */
1347:   PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);
1348: 
1349:   return(0);
1350: }

1354: int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1355: {
1356:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1357:   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1358:   PetscScalar  *x,zero = 0.0;
1359:   MatScalar    *aa,*aa_j;

1362:   bs   = a->bs;
1363:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1364: 
1365:   aa   = a->a;
1366:   ai   = a->i;
1367:   aj   = a->j;
1368:   ambs = a->mbs;
1369:   bs2  = a->bs2;

1371:   VecSet(&zero,v);
1372:   VecGetArray(v,&x);
1373:   VecGetLocalSize(v,&n);
1374:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1375:   for (i=0; i<ambs; i++) {
1376:     j=ai[i];
1377:     if (aj[j] == i) {             /* if this is a diagonal element */
1378:       row  = i*bs;
1379:       aa_j = aa + j*bs2;
1380:       if (A->factor && bs==1){
1381:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1382:       } else {
1383:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1384:       }
1385:     }
1386:   }
1387: 
1388:   VecRestoreArray(v,&x);
1389:   return(0);
1390: }

1394: int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1395: {
1396:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1397:   PetscScalar  *l,*r,x,*li,*ri;
1398:   MatScalar    *aa,*v;
1399:   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;

1402:   ai  = a->i;
1403:   aj  = a->j;
1404:   aa  = a->a;
1405:   m   = A->m;
1406:   bs  = a->bs;
1407:   mbs = a->mbs;
1408:   bs2 = a->bs2;

1410:   if (ll != rr) {
1411:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1412:   }
1413:   if (ll) {
1414:     VecGetArray(ll,&l);
1415:     VecGetLocalSize(ll,&lm);
1416:     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1417:     for (i=0; i<mbs; i++) { /* for each block row */
1418:       M  = ai[i+1] - ai[i];
1419:       li = l + i*bs;
1420:       v  = aa + bs2*ai[i];
1421:       for (j=0; j<M; j++) { /* for each block */
1422:         for (k=0; k<bs2; k++) {
1423:           (*v++) *= li[k%bs];
1424:         }
1425: #ifdef CONT
1426:         /* will be used to replace the above loop */
1427:         ri = l + bs*aj[ai[i]+j];
1428:         for (k=0; k<bs; k++) { /* column value */
1429:           x = ri[k];
1430:           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1431:         }
1432: #endif

1434:       }
1435:     }
1436:     VecRestoreArray(ll,&l);
1437:     PetscLogFlops(2*a->nz);
1438:   }
1439:   /* will be deleted */
1440:   if (rr) {
1441:     VecGetArray(rr,&r);
1442:     VecGetLocalSize(rr,&rn);
1443:     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1444:     for (i=0; i<mbs; i++) { /* for each block row */
1445:       M  = ai[i+1] - ai[i];
1446:       v  = aa + bs2*ai[i];
1447:       for (j=0; j<M; j++) { /* for each block */
1448:         ri = r + bs*aj[ai[i]+j];
1449:         for (k=0; k<bs; k++) {
1450:           x = ri[k];
1451:           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1452:         }
1453:       }
1454:     }
1455:     VecRestoreArray(rr,&r);
1456:     PetscLogFlops(a->nz);
1457:   }
1458:   return(0);
1459: }

1463: int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1464: {
1465:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1468:   info->rows_global    = (double)A->m;
1469:   info->columns_global = (double)A->m;
1470:   info->rows_local     = (double)A->m;
1471:   info->columns_local  = (double)A->m;
1472:   info->block_size     = a->bs2;
1473:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1474:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1475:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1476:   info->assemblies   = A->num_ass;
1477:   info->mallocs      = a->reallocs;
1478:   info->memory       = A->mem;
1479:   if (A->factor) {
1480:     info->fill_ratio_given  = A->info.fill_ratio_given;
1481:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1482:     info->factor_mallocs    = A->info.factor_mallocs;
1483:   } else {
1484:     info->fill_ratio_given  = 0;
1485:     info->fill_ratio_needed = 0;
1486:     info->factor_mallocs    = 0;
1487:   }
1488:   return(0);
1489: }


1494: int MatZeroEntries_SeqSBAIJ(Mat A)
1495: {
1496:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1497:   int         ierr;

1500:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1501:   return(0);
1502: }

1506: int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1507: {
1508:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1509:   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1510:   PetscReal    atmp;
1511:   MatScalar    *aa;
1512:   PetscScalar  zero = 0.0,*x;
1513:   int          ncols,brow,bcol,krow,kcol;

1516:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1517:   bs   = a->bs;
1518:   aa   = a->a;
1519:   ai   = a->i;
1520:   aj   = a->j;
1521:   mbs = a->mbs;

1523:   VecSet(&zero,v);
1524:   VecGetArray(v,&x);
1525:   VecGetLocalSize(v,&n);
1526:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1527:   for (i=0; i<mbs; i++) {
1528:     ncols = ai[1] - ai[0]; ai++;
1529:     brow  = bs*i;
1530:     for (j=0; j<ncols; j++){
1531:       bcol = bs*(*aj);
1532:       for (kcol=0; kcol<bs; kcol++){
1533:         col = bcol + kcol;      /* col index */
1534:         for (krow=0; krow<bs; krow++){
1535:           atmp = PetscAbsScalar(*aa); aa++;
1536:           row = brow + krow;    /* row index */
1537:           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1538:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1539:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1540:         }
1541:       }
1542:       aj++;
1543:     }
1544:   }
1545:   VecRestoreArray(v,&x);
1546:   return(0);
1547: }