Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 16: {
 17:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
 18:   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
 19:   PetscScalar *v = a->a,*odiag,*diag,*mdiag;

 22:   if (a->idiagvalid) return(0);
 23:   MatMarkDiagonal_SeqBAIJ(A);
 24:   diag_offset = a->diag;
 25:   if (!a->idiag) {
 26:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 27:   }
 28:   diag  = a->idiag;
 29:   mdiag = a->idiag+bs*bs*mbs;
 30:   /* factor and invert each block */
 31:   switch (a->bs){
 32:     case 2:
 33:       for (i=0; i<mbs; i++) {
 34:         odiag   = v + 4*diag_offset[i];
 35:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 36:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 37:         Kernel_A_gets_inverse_A_2(diag);
 38:         diag    += 4;
 39:         mdiag   += 4;
 40:       }
 41:       break;
 42:     case 3:
 43:       for (i=0; i<mbs; i++) {
 44:         odiag    = v + 9*diag_offset[i];
 45:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 46:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 47:         diag[8]  = odiag[8];
 48:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 49:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 50:         mdiag[8] = odiag[8];
 51:         Kernel_A_gets_inverse_A_3(diag);
 52:         diag    += 9;
 53:         mdiag   += 9;
 54:       }
 55:       break;
 56:     case 4:
 57:       for (i=0; i<mbs; i++) {
 58:         odiag  = v + 16*diag_offset[i];
 59:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 60:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 61:         Kernel_A_gets_inverse_A_4(diag);
 62:         diag  += 16;
 63:         mdiag += 16;
 64:       }
 65:       break;
 66:     case 5:
 67:       for (i=0; i<mbs; i++) {
 68:         odiag = v + 25*diag_offset[i];
 69:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 70:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 71:         Kernel_A_gets_inverse_A_5(diag);
 72:         diag  += 25;
 73:         mdiag += 25;
 74:       }
 75:       break;
 76:     default:
 77:       SETERRQ1(1,"not supported for block size %d",a->bs);
 78:   }
 79:   a->idiagvalid = PETSC_TRUE;
 80:   return(0);
 81: }

 85: int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
 86: {
 87:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
 88:   PetscScalar        *x,x1,x2,s1,s2;
 89:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
 90:   int                ierr,m = a->mbs,i,i2,nz,idx;
 91:   const int          *diag,*ai = a->i,*aj = a->j,*vi;

 94:   its = its*lits;
 95:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
 96:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
 97:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
 98:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
 99:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

101:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

103:   diag  = a->diag;
104:   idiag = a->idiag;
105:   VecGetArray(xx,&x);
106:   VecGetArray(bb,(PetscScalar**)&b);

108:   if (flag & SOR_ZERO_INITIAL_GUESS) {
109:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112:       i2     = 2;
113:       idiag += 4;
114:       for (i=1; i<m; i++) {
115:         v     = aa + 4*ai[i];
116:         vi    = aj + ai[i];
117:         nz    = diag[i] - ai[i];
118:         s1    = b[i2]; s2 = b[i2+1];
119:         while (nz--) {
120:           idx  = 2*(*vi++);
121:           x1   = x[idx]; x2 = x[1+idx];
122:           s1  -= v[0]*x1 + v[2]*x2;
123:           s2  -= v[1]*x1 + v[3]*x2;
124:           v   += 4;
125:         }
126:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128:         idiag   += 4;
129:         i2      += 2;
130:       }
131:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132:       PetscLogFlops(4*(a->nz));
133:     }
134:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136:       i2    = 0;
137:       mdiag = a->idiag+4*a->mbs;
138:       for (i=0; i<m; i++) {
139:         x1      = x[i2]; x2 = x[i2+1];
140:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142:         mdiag  += 4;
143:         i2     += 2;
144:       }
145:       PetscLogFlops(6*m);
146:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
148:     }
149:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150:       idiag   = a->idiag+4*a->mbs - 4;
151:       i2      = 2*m - 2;
152:       x1      = x[i2]; x2 = x[i2+1];
153:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155:       idiag -= 4;
156:       i2    -= 2;
157:       for (i=m-2; i>=0; i--) {
158:         v     = aa + 4*(diag[i]+1);
159:         vi    = aj + diag[i] + 1;
160:         nz    = ai[i+1] - diag[i] - 1;
161:         s1    = x[i2]; s2 = x[i2+1];
162:         while (nz--) {
163:           idx  = 2*(*vi++);
164:           x1   = x[idx]; x2 = x[1+idx];
165:           s1  -= v[0]*x1 + v[2]*x2;
166:           s2  -= v[1]*x1 + v[3]*x2;
167:           v   += 4;
168:         }
169:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171:         idiag   -= 4;
172:         i2      -= 2;
173:       }
174:       PetscLogFlops(4*(a->nz));
175:     }
176:   } else {
177:     SETERRQ(1,"Only supports point block SOR with zero initial guess");
178:   }
179:   VecRestoreArray(xx,&x);
180:   VecRestoreArray(bb,(PetscScalar**)&b);
181:   return(0);
182: }

186: int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
187: {
188:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191:   int                ierr,m = a->mbs,i,i2,nz,idx;
192:   const int          *diag,*ai = a->i,*aj = a->j,*vi;

195:   its = its*lits;
196:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
197:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
198:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
199:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
200:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

202:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

204:   diag  = a->diag;
205:   idiag = a->idiag;
206:   VecGetArray(xx,&x);
207:   VecGetArray(bb,(PetscScalar**)&b);

209:   if (flag & SOR_ZERO_INITIAL_GUESS) {
210:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
211:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
212:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
213:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
214:       i2     = 3;
215:       idiag += 9;
216:       for (i=1; i<m; i++) {
217:         v     = aa + 9*ai[i];
218:         vi    = aj + ai[i];
219:         nz    = diag[i] - ai[i];
220:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
221:         while (nz--) {
222:           idx  = 3*(*vi++);
223:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
224:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
225:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
226:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
227:           v   += 9;
228:         }
229:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
230:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
231:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
232:         idiag   += 9;
233:         i2      += 3;
234:       }
235:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
236:       PetscLogFlops(9*(a->nz));
237:     }
238:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
239:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
240:       i2    = 0;
241:       mdiag = a->idiag+9*a->mbs;
242:       for (i=0; i<m; i++) {
243:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
244:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
245:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
246:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
247:         mdiag  += 9;
248:         i2     += 3;
249:       }
250:       PetscLogFlops(15*m);
251:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
252:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
253:     }
254:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
255:       idiag   = a->idiag+9*a->mbs - 9;
256:       i2      = 3*m - 3;
257:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
258:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
259:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
260:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
261:       idiag -= 9;
262:       i2    -= 3;
263:       for (i=m-2; i>=0; i--) {
264:         v     = aa + 9*(diag[i]+1);
265:         vi    = aj + diag[i] + 1;
266:         nz    = ai[i+1] - diag[i] - 1;
267:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
268:         while (nz--) {
269:           idx  = 3*(*vi++);
270:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
271:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
272:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
273:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
274:           v   += 9;
275:         }
276:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
277:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
278:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
279:         idiag   -= 9;
280:         i2      -= 3;
281:       }
282:       PetscLogFlops(9*(a->nz));
283:     }
284:   } else {
285:     SETERRQ(1,"Only supports point block SOR with zero initial guess");
286:   }
287:   VecRestoreArray(xx,&x);
288:   VecRestoreArray(bb,(PetscScalar**)&b);
289:   return(0);
290: }

294: int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
295: {
296:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
297:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
298:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
299:   int                ierr,m = a->mbs,i,i2,nz,idx;
300:   const int          *diag,*ai = a->i,*aj = a->j,*vi;

303:   its = its*lits;
304:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
305:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
306:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
307:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
308:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

310:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

312:   diag  = a->diag;
313:   idiag = a->idiag;
314:   VecGetArray(xx,&x);
315:   VecGetArray(bb,(PetscScalar**)&b);

317:   if (flag & SOR_ZERO_INITIAL_GUESS) {
318:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
319:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
320:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
321:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
322:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
323:       i2     = 4;
324:       idiag += 16;
325:       for (i=1; i<m; i++) {
326:         v     = aa + 16*ai[i];
327:         vi    = aj + ai[i];
328:         nz    = diag[i] - ai[i];
329:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
330:         while (nz--) {
331:           idx  = 4*(*vi++);
332:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
333:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
334:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
335:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
336:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
337:           v   += 16;
338:         }
339:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
340:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
341:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
342:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
343:         idiag   += 16;
344:         i2      += 4;
345:       }
346:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
347:       PetscLogFlops(16*(a->nz));
348:     }
349:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
350:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
351:       i2    = 0;
352:       mdiag = a->idiag+16*a->mbs;
353:       for (i=0; i<m; i++) {
354:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
355:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
356:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
357:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
358:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
359:         mdiag  += 16;
360:         i2     += 4;
361:       }
362:       PetscLogFlops(28*m);
363:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
364:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
365:     }
366:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
367:       idiag   = a->idiag+16*a->mbs - 16;
368:       i2      = 4*m - 4;
369:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
370:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
371:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
372:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
373:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
374:       idiag -= 16;
375:       i2    -= 4;
376:       for (i=m-2; i>=0; i--) {
377:         v     = aa + 16*(diag[i]+1);
378:         vi    = aj + diag[i] + 1;
379:         nz    = ai[i+1] - diag[i] - 1;
380:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
381:         while (nz--) {
382:           idx  = 4*(*vi++);
383:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
384:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
385:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
386:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
387:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
388:           v   += 16;
389:         }
390:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
391:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
392:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
393:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
394:         idiag   -= 16;
395:         i2      -= 4;
396:       }
397:       PetscLogFlops(16*(a->nz));
398:     }
399:   } else {
400:     SETERRQ(1,"Only supports point block SOR with zero initial guess");
401:   }
402:   VecRestoreArray(xx,&x);
403:   VecRestoreArray(bb,(PetscScalar**)&b);
404:   return(0);
405: }

409: int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
410: {
411:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
412:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
413:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
414:   int                ierr,m = a->mbs,i,i2,nz,idx;
415:   const int          *diag,*ai = a->i,*aj = a->j,*vi;

418:   its = its*lits;
419:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
420:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
421:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
422:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
423:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

425:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

427:   diag  = a->diag;
428:   idiag = a->idiag;
429:   VecGetArray(xx,&x);
430:   VecGetArray(bb,(PetscScalar**)&b);

432:   if (flag & SOR_ZERO_INITIAL_GUESS) {
433:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
434:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
435:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
436:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
437:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
438:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
439:       i2     = 5;
440:       idiag += 25;
441:       for (i=1; i<m; i++) {
442:         v     = aa + 25*ai[i];
443:         vi    = aj + ai[i];
444:         nz    = diag[i] - ai[i];
445:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
446:         while (nz--) {
447:           idx  = 5*(*vi++);
448:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
449:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
450:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
451:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
452:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
453:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
454:           v   += 25;
455:         }
456:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
457:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
458:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
459:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
460:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
461:         idiag   += 25;
462:         i2      += 5;
463:       }
464:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465:       PetscLogFlops(25*(a->nz));
466:     }
467:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
468:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
469:       i2    = 0;
470:       mdiag = a->idiag+25*a->mbs;
471:       for (i=0; i<m; i++) {
472:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
473:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
474:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
475:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
476:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
477:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
478:         mdiag  += 25;
479:         i2     += 5;
480:       }
481:       PetscLogFlops(45*m);
482:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
483:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
484:     }
485:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
486:       idiag   = a->idiag+25*a->mbs - 25;
487:       i2      = 5*m - 5;
488:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
489:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
490:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
491:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
492:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
493:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
494:       idiag -= 25;
495:       i2    -= 5;
496:       for (i=m-2; i>=0; i--) {
497:         v     = aa + 25*(diag[i]+1);
498:         vi    = aj + diag[i] + 1;
499:         nz    = ai[i+1] - diag[i] - 1;
500:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
501:         while (nz--) {
502:           idx  = 5*(*vi++);
503:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
504:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
505:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
506:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
507:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
508:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
509:           v   += 25;
510:         }
511:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
512:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
513:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
514:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
515:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
516:         idiag   -= 25;
517:         i2      -= 5;
518:       }
519:       PetscLogFlops(25*(a->nz));
520:     }
521:   } else {
522:     SETERRQ(1,"Only supports point block SOR with zero initial guess");
523:   }
524:   VecRestoreArray(xx,&x);
525:   VecRestoreArray(bb,(PetscScalar**)&b);
526:   return(0);
527: }

529: /*
530:     Special version for Fun3d sequential benchmark
531: */
532: #if defined(PETSC_HAVE_FORTRAN_CAPS)
533: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
534: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
535: #define matsetvaluesblocked4_ matsetvaluesblocked4
536: #endif

538: EXTERN_C_BEGIN
541: void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
542: {
543:   Mat               A = *AA;
544:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
545:   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
546:   int               *ai=a->i,*ailen=a->ilen;
547:   int               *aj=a->j,stepval;
548:   const PetscScalar *value = v;
549:   MatScalar         *ap,*aa = a->a,*bap;

552:   stepval = (n-1)*4;
553:   for (k=0; k<m; k++) { /* loop over added rows */
554:     row  = im[k];
555:     rp   = aj + ai[row];
556:     ap   = aa + 16*ai[row];
557:     nrow = ailen[row];
558:     low  = 0;
559:     for (l=0; l<n; l++) { /* loop over added columns */
560:       col = in[l];
561:       value = v + k*(stepval+4)*4 + l*4;
562:       low = 0; high = nrow;
563:       while (high-low > 7) {
564:         t = (low+high)/2;
565:         if (rp[t] > col) high = t;
566:         else             low  = t;
567:       }
568:       for (i=low; i<high; i++) {
569:         if (rp[i] > col) break;
570:         if (rp[i] == col) {
571:           bap  = ap +  16*i;
572:           for (ii=0; ii<4; ii++,value+=stepval) {
573:             for (jj=ii; jj<16; jj+=4) {
574:               bap[jj] += *value++;
575:             }
576:           }
577:           goto noinsert2;
578:         }
579:       }
580:       N = nrow++ - 1;
581:       /* shift up all the later entries in this row */
582:       for (ii=N; ii>=i; ii--) {
583:         rp[ii+1] = rp[ii];
584:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
585:       }
586:       if (N >= i) {
587:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
588:       }
589:       rp[i] = col;
590:       bap   = ap +  16*i;
591:       for (ii=0; ii<4; ii++,value+=stepval) {
592:         for (jj=ii; jj<16; jj+=4) {
593:           bap[jj] = *value++;
594:         }
595:       }
596:       noinsert2:;
597:       low = i;
598:     }
599:     ailen[row] = nrow;
600:   }
601: }
602: EXTERN_C_END

604: #if defined(PETSC_HAVE_FORTRAN_CAPS)
605: #define matsetvalues4_ MATSETVALUES4
606: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
607: #define matsetvalues4_ matsetvalues4
608: #endif

610: EXTERN_C_BEGIN
613: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
614: {
615:   Mat         A = *AA;
616:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
617:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
618:   int         *ai=a->i,*ailen=a->ilen;
619:   int         *aj=a->j,brow,bcol;
620:   int         ridx,cidx;
621:   MatScalar   *ap,value,*aa=a->a,*bap;
622: 
624:   for (k=0; k<m; k++) { /* loop over added rows */
625:     row  = im[k]; brow = row/4;
626:     rp   = aj + ai[brow];
627:     ap   = aa + 16*ai[brow];
628:     nrow = ailen[brow];
629:     low  = 0;
630:     for (l=0; l<n; l++) { /* loop over added columns */
631:       col = in[l]; bcol = col/4;
632:       ridx = row % 4; cidx = col % 4;
633:       value = v[l + k*n];
634:       low = 0; high = nrow;
635:       while (high-low > 7) {
636:         t = (low+high)/2;
637:         if (rp[t] > bcol) high = t;
638:         else              low  = t;
639:       }
640:       for (i=low; i<high; i++) {
641:         if (rp[i] > bcol) break;
642:         if (rp[i] == bcol) {
643:           bap  = ap +  16*i + 4*cidx + ridx;
644:           *bap += value;
645:           goto noinsert1;
646:         }
647:       }
648:       N = nrow++ - 1;
649:       /* shift up all the later entries in this row */
650:       for (ii=N; ii>=i; ii--) {
651:         rp[ii+1] = rp[ii];
652:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
653:       }
654:       if (N>=i) {
655:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
656:       }
657:       rp[i]                    = bcol;
658:       ap[16*i + 4*cidx + ridx] = value;
659:       noinsert1:;
660:       low = i;
661:     }
662:     ailen[brow] = nrow;
663:   }
664: }
665: EXTERN_C_END

667: /*  UGLY, ugly, ugly
668:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
669:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
670:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
671:    converts the entries into single precision and then calls ..._MatScalar() to put them
672:    into the single precision data structures.
673: */
674: #if defined(PETSC_USE_MAT_SINGLE)
675: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
676: #else
677: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
678: #endif

680: #define CHUNKSIZE  10

682: /*
683:      Checks for missing diagonals
684: */
687: int MatMissingDiagonal_SeqBAIJ(Mat A)
688: {
689:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690:   int         *diag,*jj = a->j,i,ierr;

693:   MatMarkDiagonal_SeqBAIJ(A);
694:   diag = a->diag;
695:   for (i=0; i<a->mbs; i++) {
696:     if (jj[diag[i]] != i) {
697:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
698:     }
699:   }
700:   return(0);
701: }

705: int MatMarkDiagonal_SeqBAIJ(Mat A)
706: {
707:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
708:   int         i,j,*diag,m = a->mbs,ierr;

711:   if (a->diag) return(0);

713:   PetscMalloc((m+1)*sizeof(int),&diag);
714:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
715:   for (i=0; i<m; i++) {
716:     diag[i] = a->i[i+1];
717:     for (j=a->i[i]; j<a->i[i+1]; j++) {
718:       if (a->j[j] == i) {
719:         diag[i] = j;
720:         break;
721:       }
722:     }
723:   }
724:   a->diag = diag;
725:   return(0);
726: }


729: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

733: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
734: {
735:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
736:   int         ierr,n = a->mbs,i;

739:   *nn = n;
740:   if (!ia) return(0);
741:   if (symmetric) {
742:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
743:   } else if (oshift == 1) {
744:     /* temporarily add 1 to i and j indices */
745:     int nz = a->i[n];
746:     for (i=0; i<nz; i++) a->j[i]++;
747:     for (i=0; i<n+1; i++) a->i[i]++;
748:     *ia = a->i; *ja = a->j;
749:   } else {
750:     *ia = a->i; *ja = a->j;
751:   }

753:   return(0);
754: }

758: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
759: {
760:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
761:   int         i,n = a->mbs,ierr;

764:   if (!ia) return(0);
765:   if (symmetric) {
766:     PetscFree(*ia);
767:     PetscFree(*ja);
768:   } else if (oshift == 1) {
769:     int nz = a->i[n]-1;
770:     for (i=0; i<nz; i++) a->j[i]--;
771:     for (i=0; i<n+1; i++) a->i[i]--;
772:   }
773:   return(0);
774: }

778: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
779: {
780:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

783:   *bs = baij->bs;
784:   return(0);
785: }


790: int MatDestroy_SeqBAIJ(Mat A)
791: {
792:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793:   int         ierr;

796: #if defined(PETSC_USE_LOG)
797:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
798: #endif
799:   PetscFree(a->a);
800:   if (!a->singlemalloc) {
801:     PetscFree(a->i);
802:     PetscFree(a->j);
803:   }
804:   if (a->row) {
805:     ISDestroy(a->row);
806:   }
807:   if (a->col) {
808:     ISDestroy(a->col);
809:   }
810:   if (a->diag) {PetscFree(a->diag);}
811:   if (a->idiag) {PetscFree(a->idiag);}
812:   if (a->ilen) {PetscFree(a->ilen);}
813:   if (a->imax) {PetscFree(a->imax);}
814:   if (a->solve_work) {PetscFree(a->solve_work);}
815:   if (a->mult_work) {PetscFree(a->mult_work);}
816:   if (a->icol) {ISDestroy(a->icol);}
817:   if (a->saved_values) {PetscFree(a->saved_values);}
818: #if defined(PETSC_USE_MAT_SINGLE)
819:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
820: #endif
821:   if (a->xtoy) {PetscFree(a->xtoy);}

823:   PetscFree(a);
824:   return(0);
825: }

829: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
830: {
831:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

834:   switch (op) {
835:   case MAT_ROW_ORIENTED:
836:     a->roworiented    = PETSC_TRUE;
837:     break;
838:   case MAT_COLUMN_ORIENTED:
839:     a->roworiented    = PETSC_FALSE;
840:     break;
841:   case MAT_COLUMNS_SORTED:
842:     a->sorted         = PETSC_TRUE;
843:     break;
844:   case MAT_COLUMNS_UNSORTED:
845:     a->sorted         = PETSC_FALSE;
846:     break;
847:   case MAT_KEEP_ZEROED_ROWS:
848:     a->keepzeroedrows = PETSC_TRUE;
849:     break;
850:   case MAT_NO_NEW_NONZERO_LOCATIONS:
851:     a->nonew          = 1;
852:     break;
853:   case MAT_NEW_NONZERO_LOCATION_ERR:
854:     a->nonew          = -1;
855:     break;
856:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
857:     a->nonew          = -2;
858:     break;
859:   case MAT_YES_NEW_NONZERO_LOCATIONS:
860:     a->nonew          = 0;
861:     break;
862:   case MAT_ROWS_SORTED:
863:   case MAT_ROWS_UNSORTED:
864:   case MAT_YES_NEW_DIAGONALS:
865:   case MAT_IGNORE_OFF_PROC_ENTRIES:
866:   case MAT_USE_HASH_TABLE:
867:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
868:     break;
869:   case MAT_NO_NEW_DIAGONALS:
870:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
871:   case MAT_SYMMETRIC:
872:   case MAT_STRUCTURALLY_SYMMETRIC:
873:   case MAT_NOT_SYMMETRIC:
874:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
875:   case MAT_HERMITIAN:
876:   case MAT_NOT_HERMITIAN:
877:   case MAT_SYMMETRY_ETERNAL:
878:   case MAT_NOT_SYMMETRY_ETERNAL:
879:     break;
880:   default:
881:     SETERRQ(PETSC_ERR_SUP,"unknown option");
882:   }
883:   return(0);
884: }

888: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
889: {
890:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
891:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
892:   MatScalar    *aa,*aa_i;
893:   PetscScalar  *v_i;

896:   bs  = a->bs;
897:   ai  = a->i;
898:   aj  = a->j;
899:   aa  = a->a;
900:   bs2 = a->bs2;
901: 
902:   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
903: 
904:   bn  = row/bs;   /* Block number */
905:   bp  = row % bs; /* Block Position */
906:   M   = ai[bn+1] - ai[bn];
907:   *nz = bs*M;
908: 
909:   if (v) {
910:     *v = 0;
911:     if (*nz) {
912:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
913:       for (i=0; i<M; i++) { /* for each block in the block row */
914:         v_i  = *v + i*bs;
915:         aa_i = aa + bs2*(ai[bn] + i);
916:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
917:       }
918:     }
919:   }

921:   if (idx) {
922:     *idx = 0;
923:     if (*nz) {
924:       PetscMalloc((*nz)*sizeof(int),idx);
925:       for (i=0; i<M; i++) { /* for each block in the block row */
926:         idx_i = *idx + i*bs;
927:         itmp  = bs*aj[ai[bn] + i];
928:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
929:       }
930:     }
931:   }
932:   return(0);
933: }

937: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
938: {

942:   if (idx) {if (*idx) {PetscFree(*idx);}}
943:   if (v)   {if (*v)   {PetscFree(*v);}}
944:   return(0);
945: }

949: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
950: {
951:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
952:   Mat         C;
953:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
954:   int         *rows,*cols,bs2=a->bs2;
955:   PetscScalar *array;

958:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
959:   PetscMalloc((1+nbs)*sizeof(int),&col);
960:   PetscMemzero(col,(1+nbs)*sizeof(int));

962: #if defined(PETSC_USE_MAT_SINGLE)
963:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
964:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
965: #else
966:   array = a->a;
967: #endif

969:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
970:   MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);
971:   MatSetType(C,A->type_name);
972:   MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);
973:   PetscFree(col);
974:   PetscMalloc(2*bs*sizeof(int),&rows);
975:   cols = rows + bs;
976:   for (i=0; i<mbs; i++) {
977:     cols[0] = i*bs;
978:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
979:     len = ai[i+1] - ai[i];
980:     for (j=0; j<len; j++) {
981:       rows[0] = (*aj++)*bs;
982:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
983:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
984:       array += bs2;
985:     }
986:   }
987:   PetscFree(rows);
988: #if defined(PETSC_USE_MAT_SINGLE)
989:   PetscFree(array);
990: #endif
991: 
992:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
993:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
994: 
995:   if (B) {
996:     *B = C;
997:   } else {
998:     MatHeaderCopy(A,C);
999:   }
1000:   return(0);
1001: }

1005: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1006: {
1007:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1008:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1009:   PetscScalar *aa;
1010:   FILE        *file;

1013:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1014:   PetscMalloc((4+A->m)*sizeof(int),&col_lens);
1015:   col_lens[0] = MAT_FILE_COOKIE;

1017:   col_lens[1] = A->m;
1018:   col_lens[2] = A->n;
1019:   col_lens[3] = a->nz*bs2;

1021:   /* store lengths of each row and write (including header) to file */
1022:   count = 0;
1023:   for (i=0; i<a->mbs; i++) {
1024:     for (j=0; j<bs; j++) {
1025:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1026:     }
1027:   }
1028:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
1029:   PetscFree(col_lens);

1031:   /* store column indices (zero start index) */
1032:   PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
1033:   count = 0;
1034:   for (i=0; i<a->mbs; i++) {
1035:     for (j=0; j<bs; j++) {
1036:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1037:         for (l=0; l<bs; l++) {
1038:           jj[count++] = bs*a->j[k] + l;
1039:         }
1040:       }
1041:     }
1042:   }
1043:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
1044:   PetscFree(jj);

1046:   /* store nonzero values */
1047:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1048:   count = 0;
1049:   for (i=0; i<a->mbs; i++) {
1050:     for (j=0; j<bs; j++) {
1051:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1052:         for (l=0; l<bs; l++) {
1053:           aa[count++] = a->a[bs2*k + l*bs + j];
1054:         }
1055:       }
1056:     }
1057:   }
1058:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
1059:   PetscFree(aa);

1061:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1062:   if (file) {
1063:     fprintf(file,"-matload_block_size %d\n",a->bs);
1064:   }
1065:   return(0);
1066: }

1070: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1071: {
1072:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1073:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
1074:   PetscViewerFormat format;

1077:   PetscViewerGetFormat(viewer,&format);
1078:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1079:     PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);
1080:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1081:     Mat aij;
1082:     MatConvert(A,MATSEQAIJ,&aij);
1083:     MatView(aij,viewer);
1084:     MatDestroy(aij);
1085:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086:      return(0);
1087:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1088:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1089:     for (i=0; i<a->mbs; i++) {
1090:       for (j=0; j<bs; j++) {
1091:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
1092:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1093:           for (l=0; l<bs; l++) {
1094: #if defined(PETSC_USE_COMPLEX)
1095:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1096:               PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1097:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1098:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1099:               PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1100:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1101:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1102:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1103:             }
1104: #else
1105:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1106:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1107:             }
1108: #endif
1109:           }
1110:         }
1111:         PetscViewerASCIIPrintf(viewer,"\n");
1112:       }
1113:     }
1114:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1115:   } else {
1116:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1117:     for (i=0; i<a->mbs; i++) {
1118:       for (j=0; j<bs; j++) {
1119:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
1120:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1121:           for (l=0; l<bs; l++) {
1122: #if defined(PETSC_USE_COMPLEX)
1123:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1124:               PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1125:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1126:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1127:               PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1128:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1129:             } else {
1130:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1131:             }
1132: #else
1133:             PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1134: #endif
1135:           }
1136:         }
1137:         PetscViewerASCIIPrintf(viewer,"\n");
1138:       }
1139:     }
1140:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1141:   }
1142:   PetscViewerFlush(viewer);
1143:   return(0);
1144: }

1148: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1149: {
1150:   Mat          A = (Mat) Aa;
1151:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1152:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1153:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1154:   MatScalar    *aa;
1155:   PetscViewer  viewer;


1159:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1160:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1162:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1164:   /* loop over matrix elements drawing boxes */
1165:   color = PETSC_DRAW_BLUE;
1166:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1167:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1168:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1169:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1170:       aa = a->a + j*bs2;
1171:       for (k=0; k<bs; k++) {
1172:         for (l=0; l<bs; l++) {
1173:           if (PetscRealPart(*aa++) >=  0.) continue;
1174:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1175:         }
1176:       }
1177:     }
1178:   }
1179:   color = PETSC_DRAW_CYAN;
1180:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1181:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1182:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1183:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1184:       aa = a->a + j*bs2;
1185:       for (k=0; k<bs; k++) {
1186:         for (l=0; l<bs; l++) {
1187:           if (PetscRealPart(*aa++) != 0.) continue;
1188:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1189:         }
1190:       }
1191:     }
1192:   }

1194:   color = PETSC_DRAW_RED;
1195:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1196:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1197:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1198:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1199:       aa = a->a + j*bs2;
1200:       for (k=0; k<bs; k++) {
1201:         for (l=0; l<bs; l++) {
1202:           if (PetscRealPart(*aa++) <= 0.) continue;
1203:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1204:         }
1205:       }
1206:     }
1207:   }
1208:   return(0);
1209: }

1213: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1214: {
1215:   int          ierr;
1216:   PetscReal    xl,yl,xr,yr,w,h;
1217:   PetscDraw    draw;
1218:   PetscTruth   isnull;


1222:   PetscViewerDrawGetDraw(viewer,0,&draw);
1223:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1225:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1226:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1227:   xr += w;    yr += h;  xl = -w;     yl = -h;
1228:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1229:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1230:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1231:   return(0);
1232: }

1236: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1237: {
1238:   int        ierr;
1239:   PetscTruth isascii,isbinary,isdraw;

1242:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1243:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1244:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1245:   if (isascii){
1246:     MatView_SeqBAIJ_ASCII(A,viewer);
1247:   } else if (isbinary) {
1248:     MatView_SeqBAIJ_Binary(A,viewer);
1249:   } else if (isdraw) {
1250:     MatView_SeqBAIJ_Draw(A,viewer);
1251:   } else {
1252:     Mat B;
1253:     MatConvert(A,MATSEQAIJ,&B);
1254:     MatView(B,viewer);
1255:     MatDestroy(B);
1256:   }
1257:   return(0);
1258: }


1263: int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1264: {
1265:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1266:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1267:   int        *ai = a->i,*ailen = a->ilen;
1268:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1269:   MatScalar  *ap,*aa = a->a,zero = 0.0;

1272:   for (k=0; k<m; k++) { /* loop over rows */
1273:     row  = im[k]; brow = row/bs;
1274:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1275:     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1276:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1277:     nrow = ailen[brow];
1278:     for (l=0; l<n; l++) { /* loop over columns */
1279:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1280:       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1281:       col  = in[l] ;
1282:       bcol = col/bs;
1283:       cidx = col%bs;
1284:       ridx = row%bs;
1285:       high = nrow;
1286:       low  = 0; /* assume unsorted */
1287:       while (high-low > 5) {
1288:         t = (low+high)/2;
1289:         if (rp[t] > bcol) high = t;
1290:         else             low  = t;
1291:       }
1292:       for (i=low; i<high; i++) {
1293:         if (rp[i] > bcol) break;
1294:         if (rp[i] == bcol) {
1295:           *v++ = ap[bs2*i+bs*cidx+ridx];
1296:           goto finished;
1297:         }
1298:       }
1299:       *v++ = zero;
1300:       finished:;
1301:     }
1302:   }
1303:   return(0);
1304: }

1306: #if defined(PETSC_USE_MAT_SINGLE)
1309: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1310: {
1311:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1312:   int         ierr,i,N = m*n*b->bs2;
1313:   MatScalar   *vsingle;

1316:   if (N > b->setvalueslen) {
1317:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
1318:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1319:     b->setvalueslen  = N;
1320:   }
1321:   vsingle = b->setvaluescopy;
1322:   for (i=0; i<N; i++) {
1323:     vsingle[i] = v[i];
1324:   }
1325:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1326:   return(0);
1327: }
1328: #endif


1333: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1334: {
1335:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1336:   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1337:   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1338:   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1339:   PetscTruth        roworiented=a->roworiented;
1340:   const MatScalar   *value = v;
1341:   MatScalar         *ap,*aa = a->a,*bap;

1344:   if (roworiented) {
1345:     stepval = (n-1)*bs;
1346:   } else {
1347:     stepval = (m-1)*bs;
1348:   }
1349:   for (k=0; k<m; k++) { /* loop over added rows */
1350:     row  = im[k];
1351:     if (row < 0) continue;
1352: #if defined(PETSC_USE_BOPT_g)  
1353:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1354: #endif
1355:     rp   = aj + ai[row];
1356:     ap   = aa + bs2*ai[row];
1357:     rmax = imax[row];
1358:     nrow = ailen[row];
1359:     low  = 0;
1360:     for (l=0; l<n; l++) { /* loop over added columns */
1361:       if (in[l] < 0) continue;
1362: #if defined(PETSC_USE_BOPT_g)  
1363:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1364: #endif
1365:       col = in[l];
1366:       if (roworiented) {
1367:         value = v + k*(stepval+bs)*bs + l*bs;
1368:       } else {
1369:         value = v + l*(stepval+bs)*bs + k*bs;
1370:       }
1371:       if (!sorted) low = 0; high = nrow;
1372:       while (high-low > 7) {
1373:         t = (low+high)/2;
1374:         if (rp[t] > col) high = t;
1375:         else             low  = t;
1376:       }
1377:       for (i=low; i<high; i++) {
1378:         if (rp[i] > col) break;
1379:         if (rp[i] == col) {
1380:           bap  = ap +  bs2*i;
1381:           if (roworiented) {
1382:             if (is == ADD_VALUES) {
1383:               for (ii=0; ii<bs; ii++,value+=stepval) {
1384:                 for (jj=ii; jj<bs2; jj+=bs) {
1385:                   bap[jj] += *value++;
1386:                 }
1387:               }
1388:             } else {
1389:               for (ii=0; ii<bs; ii++,value+=stepval) {
1390:                 for (jj=ii; jj<bs2; jj+=bs) {
1391:                   bap[jj] = *value++;
1392:                 }
1393:               }
1394:             }
1395:           } else {
1396:             if (is == ADD_VALUES) {
1397:               for (ii=0; ii<bs; ii++,value+=stepval) {
1398:                 for (jj=0; jj<bs; jj++) {
1399:                   *bap++ += *value++;
1400:                 }
1401:               }
1402:             } else {
1403:               for (ii=0; ii<bs; ii++,value+=stepval) {
1404:                 for (jj=0; jj<bs; jj++) {
1405:                   *bap++  = *value++;
1406:                 }
1407:               }
1408:             }
1409:           }
1410:           goto noinsert2;
1411:         }
1412:       }
1413:       if (nonew == 1) goto noinsert2;
1414:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1415:       if (nrow >= rmax) {
1416:         /* there is no extra room in row, therefore enlarge */
1417:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1418:         MatScalar *new_a;

1420:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);

1422:         /* malloc new storage space */
1423:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1424:         PetscMalloc(len,&new_a);
1425:         new_j   = (int*)(new_a + bs2*new_nz);
1426:         new_i   = new_j + new_nz;

1428:         /* copy over old data into new slots */
1429:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1430:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1431:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
1432:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1433:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
1434:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
1435:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1436:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
1437:         /* free up old matrix storage */
1438:         PetscFree(a->a);
1439:         if (!a->singlemalloc) {
1440:           PetscFree(a->i);
1441:           PetscFree(a->j);
1442:         }
1443:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1444:         a->singlemalloc = PETSC_TRUE;

1446:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1447:         rmax = imax[row] = imax[row] + CHUNKSIZE;
1448:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1449:         a->maxnz += bs2*CHUNKSIZE;
1450:         a->reallocs++;
1451:         a->nz++;
1452:       }
1453:       N = nrow++ - 1;
1454:       /* shift up all the later entries in this row */
1455:       for (ii=N; ii>=i; ii--) {
1456:         rp[ii+1] = rp[ii];
1457:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1458:       }
1459:       if (N >= i) {
1460:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1461:       }
1462:       rp[i] = col;
1463:       bap   = ap +  bs2*i;
1464:       if (roworiented) {
1465:         for (ii=0; ii<bs; ii++,value+=stepval) {
1466:           for (jj=ii; jj<bs2; jj+=bs) {
1467:             bap[jj] = *value++;
1468:           }
1469:         }
1470:       } else {
1471:         for (ii=0; ii<bs; ii++,value+=stepval) {
1472:           for (jj=0; jj<bs; jj++) {
1473:             *bap++  = *value++;
1474:           }
1475:         }
1476:       }
1477:       noinsert2:;
1478:       low = i;
1479:     }
1480:     ailen[row] = nrow;
1481:   }
1482:   return(0);
1483: }

1487: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1488: {
1489:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1490:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1491:   int        m = A->m,*ip,N,*ailen = a->ilen;
1492:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1493:   MatScalar  *aa = a->a,*ap;

1496:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1498:   if (m) rmax = ailen[0];
1499:   for (i=1; i<mbs; i++) {
1500:     /* move each row back by the amount of empty slots (fshift) before it*/
1501:     fshift += imax[i-1] - ailen[i-1];
1502:     rmax   = PetscMax(rmax,ailen[i]);
1503:     if (fshift) {
1504:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1505:       N = ailen[i];
1506:       for (j=0; j<N; j++) {
1507:         ip[j-fshift] = ip[j];
1508:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1509:       }
1510:     }
1511:     ai[i] = ai[i-1] + ailen[i-1];
1512:   }
1513:   if (mbs) {
1514:     fshift += imax[mbs-1] - ailen[mbs-1];
1515:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1516:   }
1517:   /* reset ilen and imax for each row */
1518:   for (i=0; i<mbs; i++) {
1519:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1520:   }
1521:   a->nz = ai[mbs];

1523:   /* diagonals may have moved, so kill the diagonal pointers */
1524:   a->idiagvalid = PETSC_FALSE;
1525:   if (fshift && a->diag) {
1526:     PetscFree(a->diag);
1527:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1528:     a->diag = 0;
1529:   }
1530:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1531:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1532:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1533:   a->reallocs          = 0;
1534:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1536:   return(0);
1537: }



1541: /* 
1542:    This function returns an array of flags which indicate the locations of contiguous
1543:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1544:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1545:    Assume: sizes should be long enough to hold all the values.
1546: */
1549: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1550: {
1551:   int        i,j,k,row;
1552:   PetscTruth flg;

1555:   for (i=0,j=0; i<n; j++) {
1556:     row = idx[i];
1557:     if (row%bs!=0) { /* Not the begining of a block */
1558:       sizes[j] = 1;
1559:       i++;
1560:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1561:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1562:       i++;
1563:     } else { /* Begining of the block, so check if the complete block exists */
1564:       flg = PETSC_TRUE;
1565:       for (k=1; k<bs; k++) {
1566:         if (row+k != idx[i+k]) { /* break in the block */
1567:           flg = PETSC_FALSE;
1568:           break;
1569:         }
1570:       }
1571:       if (flg == PETSC_TRUE) { /* No break in the bs */
1572:         sizes[j] = bs;
1573:         i+= bs;
1574:       } else {
1575:         sizes[j] = 1;
1576:         i++;
1577:       }
1578:     }
1579:   }
1580:   *bs_max = j;
1581:   return(0);
1582: }
1583: 
1586: int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1587: {
1588:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1589:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1590:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1591:   PetscScalar zero = 0.0;
1592:   MatScalar   *aa;

1595:   /* Make a copy of the IS and  sort it */
1596:   ISGetLocalSize(is,&is_n);
1597:   ISGetIndices(is,&is_idx);

1599:   /* allocate memory for rows,sizes */
1600:   PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1601:   sizes = rows + is_n;

1603:   /* copy IS values to rows, and sort them */
1604:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1605:   PetscSortInt(is_n,rows);
1606:   if (baij->keepzeroedrows) {
1607:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1608:     bs_max = is_n;
1609:   } else {
1610:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1611:   }
1612:   ISRestoreIndices(is,&is_idx);

1614:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1615:     row   = rows[j];
1616:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1617:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1618:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1619:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1620:       if (diag) {
1621:         if (baij->ilen[row/bs] > 0) {
1622:           baij->ilen[row/bs]       = 1;
1623:           baij->j[baij->i[row/bs]] = row/bs;
1624:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1625:         }
1626:         /* Now insert all the diagonal values for this bs */
1627:         for (k=0; k<bs; k++) {
1628:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1629:         }
1630:       } else { /* (!diag) */
1631:         baij->ilen[row/bs] = 0;
1632:       } /* end (!diag) */
1633:     } else { /* (sizes[i] != bs) */
1634: #if defined (PETSC_USE_DEBUG)
1635:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1636: #endif
1637:       for (k=0; k<count; k++) {
1638:         aa[0] =  zero;
1639:         aa    += bs;
1640:       }
1641:       if (diag) {
1642:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1643:       }
1644:     }
1645:   }

1647:   PetscFree(rows);
1648:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1649:   return(0);
1650: }

1654: int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1655: {
1656:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1657:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1658:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1659:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1660:   int         ridx,cidx,bs2=a->bs2,ierr;
1661:   PetscTruth  roworiented=a->roworiented;
1662:   MatScalar   *ap,value,*aa=a->a,*bap;

1665:   for (k=0; k<m; k++) { /* loop over added rows */
1666:     row  = im[k]; brow = row/bs;
1667:     if (row < 0) continue;
1668: #if defined(PETSC_USE_BOPT_g)  
1669:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1670: #endif
1671:     rp   = aj + ai[brow];
1672:     ap   = aa + bs2*ai[brow];
1673:     rmax = imax[brow];
1674:     nrow = ailen[brow];
1675:     low  = 0;
1676:     for (l=0; l<n; l++) { /* loop over added columns */
1677:       if (in[l] < 0) continue;
1678: #if defined(PETSC_USE_BOPT_g)  
1679:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1680: #endif
1681:       col = in[l]; bcol = col/bs;
1682:       ridx = row % bs; cidx = col % bs;
1683:       if (roworiented) {
1684:         value = v[l + k*n];
1685:       } else {
1686:         value = v[k + l*m];
1687:       }
1688:       if (!sorted) low = 0; high = nrow;
1689:       while (high-low > 7) {
1690:         t = (low+high)/2;
1691:         if (rp[t] > bcol) high = t;
1692:         else              low  = t;
1693:       }
1694:       for (i=low; i<high; i++) {
1695:         if (rp[i] > bcol) break;
1696:         if (rp[i] == bcol) {
1697:           bap  = ap +  bs2*i + bs*cidx + ridx;
1698:           if (is == ADD_VALUES) *bap += value;
1699:           else                  *bap  = value;
1700:           goto noinsert1;
1701:         }
1702:       }
1703:       if (nonew == 1) goto noinsert1;
1704:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1705:       if (nrow >= rmax) {
1706:         /* there is no extra room in row, therefore enlarge */
1707:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1708:         MatScalar *new_a;

1710:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);

1712:         /* Malloc new storage space */
1713:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1714:         PetscMalloc(len,&new_a);
1715:         new_j   = (int*)(new_a + bs2*new_nz);
1716:         new_i   = new_j + new_nz;

1718:         /* copy over old data into new slots */
1719:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1720:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1721:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1722:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1723:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1724:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1725:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1726:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1727:         /* free up old matrix storage */
1728:         PetscFree(a->a);
1729:         if (!a->singlemalloc) {
1730:           PetscFree(a->i);
1731:           PetscFree(a->j);
1732:         }
1733:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1734:         a->singlemalloc = PETSC_TRUE;

1736:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1737:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1738:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1739:         a->maxnz += bs2*CHUNKSIZE;
1740:         a->reallocs++;
1741:         a->nz++;
1742:       }
1743:       N = nrow++ - 1;
1744:       /* shift up all the later entries in this row */
1745:       for (ii=N; ii>=i; ii--) {
1746:         rp[ii+1] = rp[ii];
1747:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1748:       }
1749:       if (N>=i) {
1750:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1751:       }
1752:       rp[i]                      = bcol;
1753:       ap[bs2*i + bs*cidx + ridx] = value;
1754:       noinsert1:;
1755:       low = i;
1756:     }
1757:     ailen[brow] = nrow;
1758:   }
1759:   return(0);
1760: }


1765: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1766: {
1767:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1768:   Mat         outA;
1769:   int         ierr;
1770:   PetscTruth  row_identity,col_identity;

1773:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1774:   ISIdentity(row,&row_identity);
1775:   ISIdentity(col,&col_identity);
1776:   if (!row_identity || !col_identity) {
1777:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1778:   }

1780:   outA          = inA;
1781:   inA->factor   = FACTOR_LU;

1783:   if (!a->diag) {
1784:     MatMarkDiagonal_SeqBAIJ(inA);
1785:   }

1787:   a->row        = row;
1788:   a->col        = col;
1789:   PetscObjectReference((PetscObject)row);
1790:   PetscObjectReference((PetscObject)col);
1791: 
1792:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1793:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1794:   PetscLogObjectParent(inA,a->icol);
1795: 
1796:   /*
1797:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1798:       for ILU(0) factorization with natural ordering
1799:   */
1800:   if (a->bs < 8) {
1801:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1802:   } else {
1803:     if (!a->solve_work) {
1804:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1805:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1806:     }
1807:   }

1809:   MatLUFactorNumeric(inA,&outA);

1811:   return(0);
1812: }
1815: int MatPrintHelp_SeqBAIJ(Mat A)
1816: {
1817:   static PetscTruth called = PETSC_FALSE;
1818:   MPI_Comm          comm = A->comm;
1819:   int               ierr;

1822:   if (called) {return(0);} else called = PETSC_TRUE;
1823:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1824:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1825:   return(0);
1826: }

1828: EXTERN_C_BEGIN
1831: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1832: {
1833:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1834:   int         i,nz,nbs;

1837:   nz  = baij->maxnz/baij->bs2;
1838:   nbs = baij->nbs;
1839:   for (i=0; i<nz; i++) {
1840:     baij->j[i] = indices[i];
1841:   }
1842:   baij->nz = nz;
1843:   for (i=0; i<nbs; i++) {
1844:     baij->ilen[i] = baij->imax[i];
1845:   }

1847:   return(0);
1848: }
1849: EXTERN_C_END

1853: /*@
1854:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1855:        in the matrix.

1857:   Input Parameters:
1858: +  mat - the SeqBAIJ matrix
1859: -  indices - the column indices

1861:   Level: advanced

1863:   Notes:
1864:     This can be called if you have precomputed the nonzero structure of the 
1865:   matrix and want to provide it to the matrix object to improve the performance
1866:   of the MatSetValues() operation.

1868:     You MUST have set the correct numbers of nonzeros per row in the call to 
1869:   MatCreateSeqBAIJ().

1871:     MUST be called before any calls to MatSetValues();

1873: @*/
1874: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1875: {
1876:   int ierr,(*f)(Mat,int *);

1881:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1882:   if (f) {
1883:     (*f)(mat,indices);
1884:   } else {
1885:     SETERRQ(1,"Wrong type of matrix to set column indices");
1886:   }
1887:   return(0);
1888: }

1892: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1893: {
1894:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1895:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1896:   PetscReal    atmp;
1897:   PetscScalar  *x,zero = 0.0;
1898:   MatScalar    *aa;
1899:   int          ncols,brow,krow,kcol;

1902:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1903:   bs   = a->bs;
1904:   aa   = a->a;
1905:   ai   = a->i;
1906:   aj   = a->j;
1907:   mbs = a->mbs;

1909:   VecSet(&zero,v);
1910:   VecGetArray(v,&x);
1911:   VecGetLocalSize(v,&n);
1912:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1913:   for (i=0; i<mbs; i++) {
1914:     ncols = ai[1] - ai[0]; ai++;
1915:     brow  = bs*i;
1916:     for (j=0; j<ncols; j++){
1917:       /* bcol = bs*(*aj); */
1918:       for (kcol=0; kcol<bs; kcol++){
1919:         for (krow=0; krow<bs; krow++){
1920:           atmp = PetscAbsScalar(*aa); aa++;
1921:           row = brow + krow;    /* row index */
1922:           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1923:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1924:         }
1925:       }
1926:       aj++;
1927:     }
1928:   }
1929:   VecRestoreArray(v,&x);
1930:   return(0);
1931: }

1935: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1936: {
1937:   int        ierr;

1940:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1941:   return(0);
1942: }

1946: int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1947: {
1948:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1950:   *array = a->a;
1951:   return(0);
1952: }

1956: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1957: {
1959:   return(0);
1960: }

1962:  #include petscblaslapack.h
1965: int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1966: {
1967:   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1968:   int          ierr,one=1,i,bs=y->bs,j,bs2;

1971:   if (str == SAME_NONZERO_PATTERN) {
1972:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1973:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974:     if (y->xtoy && y->XtoY != X) {
1975:       PetscFree(y->xtoy);
1976:       MatDestroy(y->XtoY);
1977:     }
1978:     if (!y->xtoy) { /* get xtoy */
1979:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1980:       y->XtoY = X;
1981:     }
1982:     bs2 = bs*bs;
1983:     for (i=0; i<x->nz; i++) {
1984:       j = 0;
1985:       while (j < bs2){
1986:         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1987:         j++;
1988:       }
1989:     }
1990:     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1991:   } else {
1992:     MatAXPY_Basic(a,X,Y,str);
1993:   }
1994:   return(0);
1995: }

1997: /* -------------------------------------------------------------------*/
1998: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1999:        MatGetRow_SeqBAIJ,
2000:        MatRestoreRow_SeqBAIJ,
2001:        MatMult_SeqBAIJ_N,
2002: /* 4*/ MatMultAdd_SeqBAIJ_N,
2003:        MatMultTranspose_SeqBAIJ,
2004:        MatMultTransposeAdd_SeqBAIJ,
2005:        MatSolve_SeqBAIJ_N,
2006:        0,
2007:        0,
2008: /*10*/ 0,
2009:        MatLUFactor_SeqBAIJ,
2010:        0,
2011:        0,
2012:        MatTranspose_SeqBAIJ,
2013: /*15*/ MatGetInfo_SeqBAIJ,
2014:        MatEqual_SeqBAIJ,
2015:        MatGetDiagonal_SeqBAIJ,
2016:        MatDiagonalScale_SeqBAIJ,
2017:        MatNorm_SeqBAIJ,
2018: /*20*/ 0,
2019:        MatAssemblyEnd_SeqBAIJ,
2020:        0,
2021:        MatSetOption_SeqBAIJ,
2022:        MatZeroEntries_SeqBAIJ,
2023: /*25*/ MatZeroRows_SeqBAIJ,
2024:        MatLUFactorSymbolic_SeqBAIJ,
2025:        MatLUFactorNumeric_SeqBAIJ_N,
2026:        0,
2027:        0,
2028: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2029:        MatILUFactorSymbolic_SeqBAIJ,
2030:        0,
2031:        MatGetArray_SeqBAIJ,
2032:        MatRestoreArray_SeqBAIJ,
2033: /*35*/ MatDuplicate_SeqBAIJ,
2034:        0,
2035:        0,
2036:        MatILUFactor_SeqBAIJ,
2037:        0,
2038: /*40*/ MatAXPY_SeqBAIJ,
2039:        MatGetSubMatrices_SeqBAIJ,
2040:        MatIncreaseOverlap_SeqBAIJ,
2041:        MatGetValues_SeqBAIJ,
2042:        0,
2043: /*45*/ MatPrintHelp_SeqBAIJ,
2044:        MatScale_SeqBAIJ,
2045:        0,
2046:        0,
2047:        0,
2048: /*50*/ MatGetBlockSize_SeqBAIJ,
2049:        MatGetRowIJ_SeqBAIJ,
2050:        MatRestoreRowIJ_SeqBAIJ,
2051:        0,
2052:        0,
2053: /*55*/ 0,
2054:        0,
2055:        0,
2056:        0,
2057:        MatSetValuesBlocked_SeqBAIJ,
2058: /*60*/ MatGetSubMatrix_SeqBAIJ,
2059:        MatDestroy_SeqBAIJ,
2060:        MatView_SeqBAIJ,
2061:        MatGetPetscMaps_Petsc,
2062:        0,
2063: /*65*/ 0,
2064:        0,
2065:        0,
2066:        0,
2067:        0,
2068: /*70*/ MatGetRowMax_SeqBAIJ,
2069:        MatConvert_Basic,
2070:        0,
2071:        0,
2072:        0,
2073: /*75*/ 0,
2074:        0,
2075:        0,
2076:        0,
2077:        0,
2078: /*80*/ 0,
2079:        0,
2080:        0,
2081:        0,
2082: /*85*/ MatLoad_SeqBAIJ,
2083:        0,
2084:        0,
2085:        0,
2086:        0
2087: };

2089: EXTERN_C_BEGIN
2092: int MatStoreValues_SeqBAIJ(Mat mat)
2093: {
2094:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2095:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2096:   int         ierr;

2099:   if (aij->nonew != 1) {
2100:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2101:   }

2103:   /* allocate space for values if not already there */
2104:   if (!aij->saved_values) {
2105:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2106:   }

2108:   /* copy values over */
2109:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2110:   return(0);
2111: }
2112: EXTERN_C_END

2114: EXTERN_C_BEGIN
2117: int MatRetrieveValues_SeqBAIJ(Mat mat)
2118: {
2119:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2120:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

2123:   if (aij->nonew != 1) {
2124:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2125:   }
2126:   if (!aij->saved_values) {
2127:     SETERRQ(1,"Must call MatStoreValues(A);first");
2128:   }

2130:   /* copy values over */
2131:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2132:   return(0);
2133: }
2134: EXTERN_C_END

2136: EXTERN_C_BEGIN
2137: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2138: extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2139: EXTERN_C_END

2141: EXTERN_C_BEGIN
2144: int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2145: {
2146:   Mat_SeqBAIJ *b;
2147:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
2148:   PetscTruth  flg;


2152:   B->preallocated = PETSC_TRUE;
2153:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
2154:   if (nnz && newbs != bs) {
2155:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
2156:   }
2157:   bs   = newbs;

2159:   mbs  = B->m/bs;
2160:   nbs  = B->n/bs;
2161:   bs2  = bs*bs;

2163:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2164:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2165:   }

2167:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2168:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2169:   if (nnz) {
2170:     for (i=0; i<mbs; i++) {
2171:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2172:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2173:     }
2174:   }

2176:   b       = (Mat_SeqBAIJ*)B->data;
2177:   PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
2178:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2179:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2180:   if (!flg) {
2181:     switch (bs) {
2182:     case 1:
2183:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2184:       B->ops->mult            = MatMult_SeqBAIJ_1;
2185:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2186:       break;
2187:     case 2:
2188:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2189:       B->ops->mult            = MatMult_SeqBAIJ_2;
2190:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2191:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2192:       break;
2193:     case 3:
2194:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2195:       B->ops->mult            = MatMult_SeqBAIJ_3;
2196:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2197:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2198:       break;
2199:     case 4:
2200:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2201:       B->ops->mult            = MatMult_SeqBAIJ_4;
2202:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2203:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2204:       break;
2205:     case 5:
2206:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2207:       B->ops->mult            = MatMult_SeqBAIJ_5;
2208:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2209:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2210:       break;
2211:     case 6:
2212:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2213:       B->ops->mult            = MatMult_SeqBAIJ_6;
2214:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2215:       break;
2216:     case 7:
2217:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2218:       B->ops->mult            = MatMult_SeqBAIJ_7;
2219:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2220:       break;
2221:     default:
2222:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2223:       B->ops->mult            = MatMult_SeqBAIJ_N;
2224:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2225:       break;
2226:     }
2227:   }
2228:   b->bs      = bs;
2229:   b->mbs     = mbs;
2230:   b->nbs     = nbs;
2231:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
2232:   if (!nnz) {
2233:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2234:     else if (nz <= 0)        nz = 1;
2235:     for (i=0; i<mbs; i++) b->imax[i] = nz;
2236:     nz = nz*mbs;
2237:   } else {
2238:     nz = 0;
2239:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2240:   }

2242:   /* allocate the matrix space */
2243:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2244:   PetscMalloc(len,&b->a);
2245:   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2246:   b->j  = (int*)(b->a + nz*bs2);
2247:   PetscMemzero(b->j,nz*sizeof(int));
2248:   b->i  = b->j + nz;
2249:   b->singlemalloc = PETSC_TRUE;

2251:   b->i[0] = 0;
2252:   for (i=1; i<mbs+1; i++) {
2253:     b->i[i] = b->i[i-1] + b->imax[i-1];
2254:   }

2256:   /* b->ilen will count nonzeros in each block row so far. */
2257:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
2258:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2259:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

2261:   b->bs               = bs;
2262:   b->bs2              = bs2;
2263:   b->mbs              = mbs;
2264:   b->nz               = 0;
2265:   b->maxnz            = nz*bs2;
2266:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2267:   return(0);
2268: }
2269: EXTERN_C_END

2271: /*MC
2272:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2273:    block sparse compressed row format.

2275:    Options Database Keys:
2276: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2278:   Level: beginner

2280: .seealso: MatCreateSeqBAIJ
2281: M*/

2283: EXTERN_C_BEGIN
2286: int MatCreate_SeqBAIJ(Mat B)
2287: {
2288:   int         ierr,size;
2289:   Mat_SeqBAIJ *b;

2292:   MPI_Comm_size(B->comm,&size);
2293:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2295:   B->m = B->M = PetscMax(B->m,B->M);
2296:   B->n = B->N = PetscMax(B->n,B->N);
2297:   PetscNew(Mat_SeqBAIJ,&b);
2298:   B->data = (void*)b;
2299:   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2300:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2301:   B->factor           = 0;
2302:   B->lupivotthreshold = 1.0;
2303:   B->mapping          = 0;
2304:   b->row              = 0;
2305:   b->col              = 0;
2306:   b->icol             = 0;
2307:   b->reallocs         = 0;
2308:   b->saved_values     = 0;
2309: #if defined(PETSC_USE_MAT_SINGLE)
2310:   b->setvalueslen     = 0;
2311:   b->setvaluescopy    = PETSC_NULL;
2312: #endif

2314:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
2315:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

2317:   b->sorted           = PETSC_FALSE;
2318:   b->roworiented      = PETSC_TRUE;
2319:   b->nonew            = 0;
2320:   b->diag             = 0;
2321:   b->solve_work       = 0;
2322:   b->mult_work        = 0;
2323:   B->spptr            = 0;
2324:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2325:   b->keepzeroedrows   = PETSC_FALSE;
2326:   b->xtoy              = 0;
2327:   b->XtoY              = 0;

2329:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2330:                                      "MatStoreValues_SeqBAIJ",
2331:                                       MatStoreValues_SeqBAIJ);
2332:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2333:                                      "MatRetrieveValues_SeqBAIJ",
2334:                                       MatRetrieveValues_SeqBAIJ);
2335:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2336:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2337:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2338:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2339:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2340:                                       MatConvert_SeqBAIJ_SeqAIJ);
2341: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2342:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2343:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2344:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2345:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2346:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2347:   return(0);
2348: }
2349: EXTERN_C_END

2353: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2354: {
2355:   Mat         C;
2356:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2357:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

2360:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2362:   *B = 0;
2363:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
2364:   MatSetType(C,A->type_name);
2365:   c    = (Mat_SeqBAIJ*)C->data;

2367:   C->M   = A->M;
2368:   C->N   = A->N;
2369:   c->bs  = a->bs;
2370:   c->bs2 = a->bs2;
2371:   c->mbs = a->mbs;
2372:   c->nbs = a->nbs;
2373:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

2375:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
2376:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
2377:   for (i=0; i<mbs; i++) {
2378:     c->imax[i] = a->imax[i];
2379:     c->ilen[i] = a->ilen[i];
2380:   }

2382:   /* allocate the matrix space */
2383:   c->singlemalloc = PETSC_TRUE;
2384:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2385:   PetscMalloc(len,&c->a);
2386:   c->j = (int*)(c->a + nz*bs2);
2387:   c->i = c->j + nz;
2388:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2389:   if (mbs > 0) {
2390:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2391:     if (cpvalues == MAT_COPY_VALUES) {
2392:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2393:     } else {
2394:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2395:     }
2396:   }

2398:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2399:   c->sorted      = a->sorted;
2400:   c->roworiented = a->roworiented;
2401:   c->nonew       = a->nonew;

2403:   if (a->diag) {
2404:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
2405:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2406:     for (i=0; i<mbs; i++) {
2407:       c->diag[i] = a->diag[i];
2408:     }
2409:   } else c->diag        = 0;
2410:   c->nz                 = a->nz;
2411:   c->maxnz              = a->maxnz;
2412:   c->solve_work         = 0;
2413:   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
2414:   c->mult_work          = 0;
2415:   C->preallocated       = PETSC_TRUE;
2416:   C->assembled          = PETSC_TRUE;
2417:   *B = C;
2418:   PetscFListDuplicate(A->qlist,&C->qlist);
2419:   return(0);
2420: }

2424: int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2425: {
2426:   Mat_SeqBAIJ  *a;
2427:   Mat          B;
2428:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2429:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2430:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2431:   int          *masked,nmask,tmp,bs2,ishift;
2432:   PetscScalar  *aa;
2433:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

2436:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2437:   bs2  = bs*bs;

2439:   MPI_Comm_size(comm,&size);
2440:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2441:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2442:   PetscBinaryRead(fd,header,4,PETSC_INT);
2443:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2444:   M = header[1]; N = header[2]; nz = header[3];

2446:   if (header[3] < 0) {
2447:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2448:   }

2450:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2452:   /* 
2453:      This code adds extra rows to make sure the number of rows is 
2454:     divisible by the blocksize
2455:   */
2456:   mbs        = M/bs;
2457:   extra_rows = bs - M + bs*(mbs);
2458:   if (extra_rows == bs) extra_rows = 0;
2459:   else                  mbs++;
2460:   if (extra_rows) {
2461:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2462:   }

2464:   /* read in row lengths */
2465:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
2466:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2467:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2469:   /* read in column indices */
2470:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
2471:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2472:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2474:   /* loop over row lengths determining block row lengths */
2475:   PetscMalloc(mbs*sizeof(int),&browlengths);
2476:   PetscMemzero(browlengths,mbs*sizeof(int));
2477:   PetscMalloc(2*mbs*sizeof(int),&mask);
2478:   PetscMemzero(mask,mbs*sizeof(int));
2479:   masked   = mask + mbs;
2480:   rowcount = 0; nzcount = 0;
2481:   for (i=0; i<mbs; i++) {
2482:     nmask = 0;
2483:     for (j=0; j<bs; j++) {
2484:       kmax = rowlengths[rowcount];
2485:       for (k=0; k<kmax; k++) {
2486:         tmp = jj[nzcount++]/bs;
2487:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2488:       }
2489:       rowcount++;
2490:     }
2491:     browlengths[i] += nmask;
2492:     /* zero out the mask elements we set */
2493:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2494:   }

2496:   /* create our matrix */
2497:   MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2498:   MatSetType(B,type);
2499:   MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
2500:   a = (Mat_SeqBAIJ*)B->data;

2502:   /* set matrix "i" values */
2503:   a->i[0] = 0;
2504:   for (i=1; i<= mbs; i++) {
2505:     a->i[i]      = a->i[i-1] + browlengths[i-1];
2506:     a->ilen[i-1] = browlengths[i-1];
2507:   }
2508:   a->nz         = 0;
2509:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

2511:   /* read in nonzero values */
2512:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2513:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2514:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

2516:   /* set "a" and "j" values into matrix */
2517:   nzcount = 0; jcount = 0;
2518:   for (i=0; i<mbs; i++) {
2519:     nzcountb = nzcount;
2520:     nmask    = 0;
2521:     for (j=0; j<bs; j++) {
2522:       kmax = rowlengths[i*bs+j];
2523:       for (k=0; k<kmax; k++) {
2524:         tmp = jj[nzcount++]/bs;
2525:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2526:       }
2527:     }
2528:     /* sort the masked values */
2529:     PetscSortInt(nmask,masked);

2531:     /* set "j" values into matrix */
2532:     maskcount = 1;
2533:     for (j=0; j<nmask; j++) {
2534:       a->j[jcount++]  = masked[j];
2535:       mask[masked[j]] = maskcount++;
2536:     }
2537:     /* set "a" values into matrix */
2538:     ishift = bs2*a->i[i];
2539:     for (j=0; j<bs; j++) {
2540:       kmax = rowlengths[i*bs+j];
2541:       for (k=0; k<kmax; k++) {
2542:         tmp       = jj[nzcountb]/bs ;
2543:         block     = mask[tmp] - 1;
2544:         point     = jj[nzcountb] - bs*tmp;
2545:         idx       = ishift + bs2*block + j + bs*point;
2546:         a->a[idx] = (MatScalar)aa[nzcountb++];
2547:       }
2548:     }
2549:     /* zero out the mask elements we set */
2550:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2551:   }
2552:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2554:   PetscFree(rowlengths);
2555:   PetscFree(browlengths);
2556:   PetscFree(aa);
2557:   PetscFree(jj);
2558:   PetscFree(mask);

2560:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2561:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2562:   MatView_Private(B);

2564:   *A = B;
2565:   return(0);
2566: }

2570: /*@C
2571:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2572:    compressed row) format.  For good matrix assembly performance the
2573:    user should preallocate the matrix storage by setting the parameter nz
2574:    (or the array nnz).  By setting these parameters accurately, performance
2575:    during matrix assembly can be increased by more than a factor of 50.

2577:    Collective on MPI_Comm

2579:    Input Parameters:
2580: +  comm - MPI communicator, set to PETSC_COMM_SELF
2581: .  bs - size of block
2582: .  m - number of rows
2583: .  n - number of columns
2584: .  nz - number of nonzero blocks  per block row (same for all rows)
2585: -  nnz - array containing the number of nonzero blocks in the various block rows 
2586:          (possibly different for each block row) or PETSC_NULL

2588:    Output Parameter:
2589: .  A - the matrix 

2591:    Options Database Keys:
2592: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2593:                      block calculations (much slower)
2594: .    -mat_block_size - size of the blocks to use

2596:    Level: intermediate

2598:    Notes:
2599:    A nonzero block is any block that as 1 or more nonzeros in it

2601:    The block AIJ format is fully compatible with standard Fortran 77
2602:    storage.  That is, the stored row and column indices can begin at
2603:    either one (as in Fortran) or zero.  See the users' manual for details.

2605:    Specify the preallocated storage with either nz or nnz (not both).
2606:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2607:    allocation.  For additional details, see the users manual chapter on
2608:    matrices.

2610: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2611: @*/
2612: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2613: {
2615: 
2617:   MatCreate(comm,m,n,m,n,A);
2618:   MatSetType(*A,MATSEQBAIJ);
2619:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2620:   return(0);
2621: }

2625: /*@C
2626:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2627:    per row in the matrix. For good matrix assembly performance the
2628:    user should preallocate the matrix storage by setting the parameter nz
2629:    (or the array nnz).  By setting these parameters accurately, performance
2630:    during matrix assembly can be increased by more than a factor of 50.

2632:    Collective on MPI_Comm

2634:    Input Parameters:
2635: +  A - the matrix
2636: .  bs - size of block
2637: .  nz - number of block nonzeros per block row (same for all rows)
2638: -  nnz - array containing the number of block nonzeros in the various block rows 
2639:          (possibly different for each block row) or PETSC_NULL

2641:    Options Database Keys:
2642: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2643:                      block calculations (much slower)
2644: .    -mat_block_size - size of the blocks to use

2646:    Level: intermediate

2648:    Notes:
2649:    The block AIJ format is fully compatible with standard Fortran 77
2650:    storage.  That is, the stored row and column indices can begin at
2651:    either one (as in Fortran) or zero.  See the users' manual for details.

2653:    Specify the preallocated storage with either nz or nnz (not both).
2654:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2655:    allocation.  For additional details, see the users manual chapter on
2656:    matrices.

2658: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2659: @*/
2660: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2661: {
2662:   int ierr,(*f)(Mat,int,int,const int[]);

2665:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2666:   if (f) {
2667:     (*f)(B,bs,nz,nnz);
2668:   }
2669:   return(0);
2670: }