Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
16: {
17: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
18: int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
19: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
22: if (a->idiagvalid) return(0);
23: MatMarkDiagonal_SeqBAIJ(A);
24: diag_offset = a->diag;
25: if (!a->idiag) {
26: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
27: }
28: diag = a->idiag;
29: mdiag = a->idiag+bs*bs*mbs;
30: /* factor and invert each block */
31: switch (a->bs){
32: case 2:
33: for (i=0; i<mbs; i++) {
34: odiag = v + 4*diag_offset[i];
35: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
36: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
37: Kernel_A_gets_inverse_A_2(diag);
38: diag += 4;
39: mdiag += 4;
40: }
41: break;
42: case 3:
43: for (i=0; i<mbs; i++) {
44: odiag = v + 9*diag_offset[i];
45: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
46: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
47: diag[8] = odiag[8];
48: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
49: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
50: mdiag[8] = odiag[8];
51: Kernel_A_gets_inverse_A_3(diag);
52: diag += 9;
53: mdiag += 9;
54: }
55: break;
56: case 4:
57: for (i=0; i<mbs; i++) {
58: odiag = v + 16*diag_offset[i];
59: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
60: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
61: Kernel_A_gets_inverse_A_4(diag);
62: diag += 16;
63: mdiag += 16;
64: }
65: break;
66: case 5:
67: for (i=0; i<mbs; i++) {
68: odiag = v + 25*diag_offset[i];
69: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
70: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
71: Kernel_A_gets_inverse_A_5(diag);
72: diag += 25;
73: mdiag += 25;
74: }
75: break;
76: default:
77: SETERRQ1(1,"not supported for block size %d",a->bs);
78: }
79: a->idiagvalid = PETSC_TRUE;
80: return(0);
81: }
85: int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
86: {
87: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
88: PetscScalar *x,x1,x2,s1,s2;
89: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
90: int ierr,m = a->mbs,i,i2,nz,idx;
91: const int *diag,*ai = a->i,*aj = a->j,*vi;
94: its = its*lits;
95: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
96: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
101: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
103: diag = a->diag;
104: idiag = a->idiag;
105: VecGetArray(xx,&x);
106: VecGetArray(bb,(PetscScalar**)&b);
108: if (flag & SOR_ZERO_INITIAL_GUESS) {
109: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112: i2 = 2;
113: idiag += 4;
114: for (i=1; i<m; i++) {
115: v = aa + 4*ai[i];
116: vi = aj + ai[i];
117: nz = diag[i] - ai[i];
118: s1 = b[i2]; s2 = b[i2+1];
119: while (nz--) {
120: idx = 2*(*vi++);
121: x1 = x[idx]; x2 = x[1+idx];
122: s1 -= v[0]*x1 + v[2]*x2;
123: s2 -= v[1]*x1 + v[3]*x2;
124: v += 4;
125: }
126: x[i2] = idiag[0]*s1 + idiag[2]*s2;
127: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128: idiag += 4;
129: i2 += 2;
130: }
131: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132: PetscLogFlops(4*(a->nz));
133: }
134: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136: i2 = 0;
137: mdiag = a->idiag+4*a->mbs;
138: for (i=0; i<m; i++) {
139: x1 = x[i2]; x2 = x[i2+1];
140: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
141: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142: mdiag += 4;
143: i2 += 2;
144: }
145: PetscLogFlops(6*m);
146: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
148: }
149: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150: idiag = a->idiag+4*a->mbs - 4;
151: i2 = 2*m - 2;
152: x1 = x[i2]; x2 = x[i2+1];
153: x[i2] = idiag[0]*x1 + idiag[2]*x2;
154: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155: idiag -= 4;
156: i2 -= 2;
157: for (i=m-2; i>=0; i--) {
158: v = aa + 4*(diag[i]+1);
159: vi = aj + diag[i] + 1;
160: nz = ai[i+1] - diag[i] - 1;
161: s1 = x[i2]; s2 = x[i2+1];
162: while (nz--) {
163: idx = 2*(*vi++);
164: x1 = x[idx]; x2 = x[1+idx];
165: s1 -= v[0]*x1 + v[2]*x2;
166: s2 -= v[1]*x1 + v[3]*x2;
167: v += 4;
168: }
169: x[i2] = idiag[0]*s1 + idiag[2]*s2;
170: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171: idiag -= 4;
172: i2 -= 2;
173: }
174: PetscLogFlops(4*(a->nz));
175: }
176: } else {
177: SETERRQ(1,"Only supports point block SOR with zero initial guess");
178: }
179: VecRestoreArray(xx,&x);
180: VecRestoreArray(bb,(PetscScalar**)&b);
181: return(0);
182: }
186: int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
187: {
188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
189: PetscScalar *x,x1,x2,x3,s1,s2,s3;
190: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
191: int ierr,m = a->mbs,i,i2,nz,idx;
192: const int *diag,*ai = a->i,*aj = a->j,*vi;
195: its = its*lits;
196: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
197: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
198: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
199: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
200: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
204: diag = a->diag;
205: idiag = a->idiag;
206: VecGetArray(xx,&x);
207: VecGetArray(bb,(PetscScalar**)&b);
209: if (flag & SOR_ZERO_INITIAL_GUESS) {
210: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
211: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
212: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
213: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
214: i2 = 3;
215: idiag += 9;
216: for (i=1; i<m; i++) {
217: v = aa + 9*ai[i];
218: vi = aj + ai[i];
219: nz = diag[i] - ai[i];
220: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
221: while (nz--) {
222: idx = 3*(*vi++);
223: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
224: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
225: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
226: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
227: v += 9;
228: }
229: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
230: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
231: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
232: idiag += 9;
233: i2 += 3;
234: }
235: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
236: PetscLogFlops(9*(a->nz));
237: }
238: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
239: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
240: i2 = 0;
241: mdiag = a->idiag+9*a->mbs;
242: for (i=0; i<m; i++) {
243: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
244: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
245: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
246: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
247: mdiag += 9;
248: i2 += 3;
249: }
250: PetscLogFlops(15*m);
251: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
252: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
253: }
254: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
255: idiag = a->idiag+9*a->mbs - 9;
256: i2 = 3*m - 3;
257: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
258: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
259: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
260: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
261: idiag -= 9;
262: i2 -= 3;
263: for (i=m-2; i>=0; i--) {
264: v = aa + 9*(diag[i]+1);
265: vi = aj + diag[i] + 1;
266: nz = ai[i+1] - diag[i] - 1;
267: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
268: while (nz--) {
269: idx = 3*(*vi++);
270: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
271: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
272: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
273: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
274: v += 9;
275: }
276: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
277: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
278: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
279: idiag -= 9;
280: i2 -= 3;
281: }
282: PetscLogFlops(9*(a->nz));
283: }
284: } else {
285: SETERRQ(1,"Only supports point block SOR with zero initial guess");
286: }
287: VecRestoreArray(xx,&x);
288: VecRestoreArray(bb,(PetscScalar**)&b);
289: return(0);
290: }
294: int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
295: {
296: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
297: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
298: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
299: int ierr,m = a->mbs,i,i2,nz,idx;
300: const int *diag,*ai = a->i,*aj = a->j,*vi;
303: its = its*lits;
304: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
305: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
306: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
307: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
308: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
310: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
312: diag = a->diag;
313: idiag = a->idiag;
314: VecGetArray(xx,&x);
315: VecGetArray(bb,(PetscScalar**)&b);
317: if (flag & SOR_ZERO_INITIAL_GUESS) {
318: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
319: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
320: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
321: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
322: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
323: i2 = 4;
324: idiag += 16;
325: for (i=1; i<m; i++) {
326: v = aa + 16*ai[i];
327: vi = aj + ai[i];
328: nz = diag[i] - ai[i];
329: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
330: while (nz--) {
331: idx = 4*(*vi++);
332: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
333: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
334: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
335: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
336: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
337: v += 16;
338: }
339: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
340: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
341: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
342: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
343: idiag += 16;
344: i2 += 4;
345: }
346: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
347: PetscLogFlops(16*(a->nz));
348: }
349: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
350: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
351: i2 = 0;
352: mdiag = a->idiag+16*a->mbs;
353: for (i=0; i<m; i++) {
354: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
355: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
356: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
357: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
358: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
359: mdiag += 16;
360: i2 += 4;
361: }
362: PetscLogFlops(28*m);
363: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
364: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
365: }
366: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
367: idiag = a->idiag+16*a->mbs - 16;
368: i2 = 4*m - 4;
369: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
370: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
371: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
372: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
373: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
374: idiag -= 16;
375: i2 -= 4;
376: for (i=m-2; i>=0; i--) {
377: v = aa + 16*(diag[i]+1);
378: vi = aj + diag[i] + 1;
379: nz = ai[i+1] - diag[i] - 1;
380: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
381: while (nz--) {
382: idx = 4*(*vi++);
383: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
384: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
385: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
386: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
387: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
388: v += 16;
389: }
390: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
391: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
392: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
393: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
394: idiag -= 16;
395: i2 -= 4;
396: }
397: PetscLogFlops(16*(a->nz));
398: }
399: } else {
400: SETERRQ(1,"Only supports point block SOR with zero initial guess");
401: }
402: VecRestoreArray(xx,&x);
403: VecRestoreArray(bb,(PetscScalar**)&b);
404: return(0);
405: }
409: int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
410: {
411: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
412: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
413: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
414: int ierr,m = a->mbs,i,i2,nz,idx;
415: const int *diag,*ai = a->i,*aj = a->j,*vi;
418: its = its*lits;
419: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
420: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
421: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
422: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
423: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
425: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
427: diag = a->diag;
428: idiag = a->idiag;
429: VecGetArray(xx,&x);
430: VecGetArray(bb,(PetscScalar**)&b);
432: if (flag & SOR_ZERO_INITIAL_GUESS) {
433: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
434: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
435: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
436: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
437: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
438: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
439: i2 = 5;
440: idiag += 25;
441: for (i=1; i<m; i++) {
442: v = aa + 25*ai[i];
443: vi = aj + ai[i];
444: nz = diag[i] - ai[i];
445: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
446: while (nz--) {
447: idx = 5*(*vi++);
448: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
449: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
450: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
451: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
452: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
453: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
454: v += 25;
455: }
456: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
457: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
458: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
459: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
460: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
461: idiag += 25;
462: i2 += 5;
463: }
464: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465: PetscLogFlops(25*(a->nz));
466: }
467: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
468: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
469: i2 = 0;
470: mdiag = a->idiag+25*a->mbs;
471: for (i=0; i<m; i++) {
472: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
473: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
474: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
475: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
476: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
477: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
478: mdiag += 25;
479: i2 += 5;
480: }
481: PetscLogFlops(45*m);
482: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
483: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
484: }
485: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
486: idiag = a->idiag+25*a->mbs - 25;
487: i2 = 5*m - 5;
488: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
489: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
490: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
491: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
492: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
493: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
494: idiag -= 25;
495: i2 -= 5;
496: for (i=m-2; i>=0; i--) {
497: v = aa + 25*(diag[i]+1);
498: vi = aj + diag[i] + 1;
499: nz = ai[i+1] - diag[i] - 1;
500: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
501: while (nz--) {
502: idx = 5*(*vi++);
503: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
504: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
505: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
506: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
507: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
508: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
509: v += 25;
510: }
511: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
512: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
513: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
514: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
515: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
516: idiag -= 25;
517: i2 -= 5;
518: }
519: PetscLogFlops(25*(a->nz));
520: }
521: } else {
522: SETERRQ(1,"Only supports point block SOR with zero initial guess");
523: }
524: VecRestoreArray(xx,&x);
525: VecRestoreArray(bb,(PetscScalar**)&b);
526: return(0);
527: }
529: /*
530: Special version for Fun3d sequential benchmark
531: */
532: #if defined(PETSC_HAVE_FORTRAN_CAPS)
533: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
534: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
535: #define matsetvaluesblocked4_ matsetvaluesblocked4
536: #endif
538: EXTERN_C_BEGIN
541: void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
542: {
543: Mat A = *AA;
544: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
545: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
546: int *ai=a->i,*ailen=a->ilen;
547: int *aj=a->j,stepval;
548: const PetscScalar *value = v;
549: MatScalar *ap,*aa = a->a,*bap;
552: stepval = (n-1)*4;
553: for (k=0; k<m; k++) { /* loop over added rows */
554: row = im[k];
555: rp = aj + ai[row];
556: ap = aa + 16*ai[row];
557: nrow = ailen[row];
558: low = 0;
559: for (l=0; l<n; l++) { /* loop over added columns */
560: col = in[l];
561: value = v + k*(stepval+4)*4 + l*4;
562: low = 0; high = nrow;
563: while (high-low > 7) {
564: t = (low+high)/2;
565: if (rp[t] > col) high = t;
566: else low = t;
567: }
568: for (i=low; i<high; i++) {
569: if (rp[i] > col) break;
570: if (rp[i] == col) {
571: bap = ap + 16*i;
572: for (ii=0; ii<4; ii++,value+=stepval) {
573: for (jj=ii; jj<16; jj+=4) {
574: bap[jj] += *value++;
575: }
576: }
577: goto noinsert2;
578: }
579: }
580: N = nrow++ - 1;
581: /* shift up all the later entries in this row */
582: for (ii=N; ii>=i; ii--) {
583: rp[ii+1] = rp[ii];
584: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
585: }
586: if (N >= i) {
587: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
588: }
589: rp[i] = col;
590: bap = ap + 16*i;
591: for (ii=0; ii<4; ii++,value+=stepval) {
592: for (jj=ii; jj<16; jj+=4) {
593: bap[jj] = *value++;
594: }
595: }
596: noinsert2:;
597: low = i;
598: }
599: ailen[row] = nrow;
600: }
601: }
602: EXTERN_C_END
604: #if defined(PETSC_HAVE_FORTRAN_CAPS)
605: #define matsetvalues4_ MATSETVALUES4
606: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
607: #define matsetvalues4_ matsetvalues4
608: #endif
610: EXTERN_C_BEGIN
613: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
614: {
615: Mat A = *AA;
616: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
617: int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
618: int *ai=a->i,*ailen=a->ilen;
619: int *aj=a->j,brow,bcol;
620: int ridx,cidx;
621: MatScalar *ap,value,*aa=a->a,*bap;
622:
624: for (k=0; k<m; k++) { /* loop over added rows */
625: row = im[k]; brow = row/4;
626: rp = aj + ai[brow];
627: ap = aa + 16*ai[brow];
628: nrow = ailen[brow];
629: low = 0;
630: for (l=0; l<n; l++) { /* loop over added columns */
631: col = in[l]; bcol = col/4;
632: ridx = row % 4; cidx = col % 4;
633: value = v[l + k*n];
634: low = 0; high = nrow;
635: while (high-low > 7) {
636: t = (low+high)/2;
637: if (rp[t] > bcol) high = t;
638: else low = t;
639: }
640: for (i=low; i<high; i++) {
641: if (rp[i] > bcol) break;
642: if (rp[i] == bcol) {
643: bap = ap + 16*i + 4*cidx + ridx;
644: *bap += value;
645: goto noinsert1;
646: }
647: }
648: N = nrow++ - 1;
649: /* shift up all the later entries in this row */
650: for (ii=N; ii>=i; ii--) {
651: rp[ii+1] = rp[ii];
652: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
653: }
654: if (N>=i) {
655: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
656: }
657: rp[i] = bcol;
658: ap[16*i + 4*cidx + ridx] = value;
659: noinsert1:;
660: low = i;
661: }
662: ailen[brow] = nrow;
663: }
664: }
665: EXTERN_C_END
667: /* UGLY, ugly, ugly
668: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
669: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
670: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
671: converts the entries into single precision and then calls ..._MatScalar() to put them
672: into the single precision data structures.
673: */
674: #if defined(PETSC_USE_MAT_SINGLE)
675: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
676: #else
677: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
678: #endif
680: #define CHUNKSIZE 10
682: /*
683: Checks for missing diagonals
684: */
687: int MatMissingDiagonal_SeqBAIJ(Mat A)
688: {
689: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690: int *diag,*jj = a->j,i,ierr;
693: MatMarkDiagonal_SeqBAIJ(A);
694: diag = a->diag;
695: for (i=0; i<a->mbs; i++) {
696: if (jj[diag[i]] != i) {
697: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
698: }
699: }
700: return(0);
701: }
705: int MatMarkDiagonal_SeqBAIJ(Mat A)
706: {
707: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
708: int i,j,*diag,m = a->mbs,ierr;
711: if (a->diag) return(0);
713: PetscMalloc((m+1)*sizeof(int),&diag);
714: PetscLogObjectMemory(A,(m+1)*sizeof(int));
715: for (i=0; i<m; i++) {
716: diag[i] = a->i[i+1];
717: for (j=a->i[i]; j<a->i[i+1]; j++) {
718: if (a->j[j] == i) {
719: diag[i] = j;
720: break;
721: }
722: }
723: }
724: a->diag = diag;
725: return(0);
726: }
729: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
733: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
734: {
735: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
736: int ierr,n = a->mbs,i;
739: *nn = n;
740: if (!ia) return(0);
741: if (symmetric) {
742: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
743: } else if (oshift == 1) {
744: /* temporarily add 1 to i and j indices */
745: int nz = a->i[n];
746: for (i=0; i<nz; i++) a->j[i]++;
747: for (i=0; i<n+1; i++) a->i[i]++;
748: *ia = a->i; *ja = a->j;
749: } else {
750: *ia = a->i; *ja = a->j;
751: }
753: return(0);
754: }
758: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
759: {
760: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
761: int i,n = a->mbs,ierr;
764: if (!ia) return(0);
765: if (symmetric) {
766: PetscFree(*ia);
767: PetscFree(*ja);
768: } else if (oshift == 1) {
769: int nz = a->i[n]-1;
770: for (i=0; i<nz; i++) a->j[i]--;
771: for (i=0; i<n+1; i++) a->i[i]--;
772: }
773: return(0);
774: }
778: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
779: {
780: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
783: *bs = baij->bs;
784: return(0);
785: }
790: int MatDestroy_SeqBAIJ(Mat A)
791: {
792: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793: int ierr;
796: #if defined(PETSC_USE_LOG)
797: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
798: #endif
799: PetscFree(a->a);
800: if (!a->singlemalloc) {
801: PetscFree(a->i);
802: PetscFree(a->j);
803: }
804: if (a->row) {
805: ISDestroy(a->row);
806: }
807: if (a->col) {
808: ISDestroy(a->col);
809: }
810: if (a->diag) {PetscFree(a->diag);}
811: if (a->idiag) {PetscFree(a->idiag);}
812: if (a->ilen) {PetscFree(a->ilen);}
813: if (a->imax) {PetscFree(a->imax);}
814: if (a->solve_work) {PetscFree(a->solve_work);}
815: if (a->mult_work) {PetscFree(a->mult_work);}
816: if (a->icol) {ISDestroy(a->icol);}
817: if (a->saved_values) {PetscFree(a->saved_values);}
818: #if defined(PETSC_USE_MAT_SINGLE)
819: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
820: #endif
821: if (a->xtoy) {PetscFree(a->xtoy);}
823: PetscFree(a);
824: return(0);
825: }
829: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
830: {
831: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
834: switch (op) {
835: case MAT_ROW_ORIENTED:
836: a->roworiented = PETSC_TRUE;
837: break;
838: case MAT_COLUMN_ORIENTED:
839: a->roworiented = PETSC_FALSE;
840: break;
841: case MAT_COLUMNS_SORTED:
842: a->sorted = PETSC_TRUE;
843: break;
844: case MAT_COLUMNS_UNSORTED:
845: a->sorted = PETSC_FALSE;
846: break;
847: case MAT_KEEP_ZEROED_ROWS:
848: a->keepzeroedrows = PETSC_TRUE;
849: break;
850: case MAT_NO_NEW_NONZERO_LOCATIONS:
851: a->nonew = 1;
852: break;
853: case MAT_NEW_NONZERO_LOCATION_ERR:
854: a->nonew = -1;
855: break;
856: case MAT_NEW_NONZERO_ALLOCATION_ERR:
857: a->nonew = -2;
858: break;
859: case MAT_YES_NEW_NONZERO_LOCATIONS:
860: a->nonew = 0;
861: break;
862: case MAT_ROWS_SORTED:
863: case MAT_ROWS_UNSORTED:
864: case MAT_YES_NEW_DIAGONALS:
865: case MAT_IGNORE_OFF_PROC_ENTRIES:
866: case MAT_USE_HASH_TABLE:
867: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
868: break;
869: case MAT_NO_NEW_DIAGONALS:
870: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
871: case MAT_SYMMETRIC:
872: case MAT_STRUCTURALLY_SYMMETRIC:
873: case MAT_NOT_SYMMETRIC:
874: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
875: case MAT_HERMITIAN:
876: case MAT_NOT_HERMITIAN:
877: case MAT_SYMMETRY_ETERNAL:
878: case MAT_NOT_SYMMETRY_ETERNAL:
879: break;
880: default:
881: SETERRQ(PETSC_ERR_SUP,"unknown option");
882: }
883: return(0);
884: }
888: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
889: {
890: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
891: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
892: MatScalar *aa,*aa_i;
893: PetscScalar *v_i;
896: bs = a->bs;
897: ai = a->i;
898: aj = a->j;
899: aa = a->a;
900: bs2 = a->bs2;
901:
902: if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
903:
904: bn = row/bs; /* Block number */
905: bp = row % bs; /* Block Position */
906: M = ai[bn+1] - ai[bn];
907: *nz = bs*M;
908:
909: if (v) {
910: *v = 0;
911: if (*nz) {
912: PetscMalloc((*nz)*sizeof(PetscScalar),v);
913: for (i=0; i<M; i++) { /* for each block in the block row */
914: v_i = *v + i*bs;
915: aa_i = aa + bs2*(ai[bn] + i);
916: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
917: }
918: }
919: }
921: if (idx) {
922: *idx = 0;
923: if (*nz) {
924: PetscMalloc((*nz)*sizeof(int),idx);
925: for (i=0; i<M; i++) { /* for each block in the block row */
926: idx_i = *idx + i*bs;
927: itmp = bs*aj[ai[bn] + i];
928: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
929: }
930: }
931: }
932: return(0);
933: }
937: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
938: {
942: if (idx) {if (*idx) {PetscFree(*idx);}}
943: if (v) {if (*v) {PetscFree(*v);}}
944: return(0);
945: }
949: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
950: {
951: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
952: Mat C;
953: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
954: int *rows,*cols,bs2=a->bs2;
955: PetscScalar *array;
958: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
959: PetscMalloc((1+nbs)*sizeof(int),&col);
960: PetscMemzero(col,(1+nbs)*sizeof(int));
962: #if defined(PETSC_USE_MAT_SINGLE)
963: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
964: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
965: #else
966: array = a->a;
967: #endif
969: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
970: MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);
971: MatSetType(C,A->type_name);
972: MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);
973: PetscFree(col);
974: PetscMalloc(2*bs*sizeof(int),&rows);
975: cols = rows + bs;
976: for (i=0; i<mbs; i++) {
977: cols[0] = i*bs;
978: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
979: len = ai[i+1] - ai[i];
980: for (j=0; j<len; j++) {
981: rows[0] = (*aj++)*bs;
982: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
983: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
984: array += bs2;
985: }
986: }
987: PetscFree(rows);
988: #if defined(PETSC_USE_MAT_SINGLE)
989: PetscFree(array);
990: #endif
991:
992: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
993: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
994:
995: if (B) {
996: *B = C;
997: } else {
998: MatHeaderCopy(A,C);
999: }
1000: return(0);
1001: }
1005: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1006: {
1007: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1008: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1009: PetscScalar *aa;
1010: FILE *file;
1013: PetscViewerBinaryGetDescriptor(viewer,&fd);
1014: PetscMalloc((4+A->m)*sizeof(int),&col_lens);
1015: col_lens[0] = MAT_FILE_COOKIE;
1017: col_lens[1] = A->m;
1018: col_lens[2] = A->n;
1019: col_lens[3] = a->nz*bs2;
1021: /* store lengths of each row and write (including header) to file */
1022: count = 0;
1023: for (i=0; i<a->mbs; i++) {
1024: for (j=0; j<bs; j++) {
1025: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1026: }
1027: }
1028: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
1029: PetscFree(col_lens);
1031: /* store column indices (zero start index) */
1032: PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
1033: count = 0;
1034: for (i=0; i<a->mbs; i++) {
1035: for (j=0; j<bs; j++) {
1036: for (k=a->i[i]; k<a->i[i+1]; k++) {
1037: for (l=0; l<bs; l++) {
1038: jj[count++] = bs*a->j[k] + l;
1039: }
1040: }
1041: }
1042: }
1043: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
1044: PetscFree(jj);
1046: /* store nonzero values */
1047: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1048: count = 0;
1049: for (i=0; i<a->mbs; i++) {
1050: for (j=0; j<bs; j++) {
1051: for (k=a->i[i]; k<a->i[i+1]; k++) {
1052: for (l=0; l<bs; l++) {
1053: aa[count++] = a->a[bs2*k + l*bs + j];
1054: }
1055: }
1056: }
1057: }
1058: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
1059: PetscFree(aa);
1061: PetscViewerBinaryGetInfoPointer(viewer,&file);
1062: if (file) {
1063: fprintf(file,"-matload_block_size %d\n",a->bs);
1064: }
1065: return(0);
1066: }
1070: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1071: {
1072: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1073: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
1074: PetscViewerFormat format;
1077: PetscViewerGetFormat(viewer,&format);
1078: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1079: PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);
1080: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1081: Mat aij;
1082: MatConvert(A,MATSEQAIJ,&aij);
1083: MatView(aij,viewer);
1084: MatDestroy(aij);
1085: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086: return(0);
1087: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1088: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1089: for (i=0; i<a->mbs; i++) {
1090: for (j=0; j<bs; j++) {
1091: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
1092: for (k=a->i[i]; k<a->i[i+1]; k++) {
1093: for (l=0; l<bs; l++) {
1094: #if defined(PETSC_USE_COMPLEX)
1095: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1096: PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1097: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1098: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1099: PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1100: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1101: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1102: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1103: }
1104: #else
1105: if (a->a[bs2*k + l*bs + j] != 0.0) {
1106: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1107: }
1108: #endif
1109: }
1110: }
1111: PetscViewerASCIIPrintf(viewer,"\n");
1112: }
1113: }
1114: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1115: } else {
1116: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1117: for (i=0; i<a->mbs; i++) {
1118: for (j=0; j<bs; j++) {
1119: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
1120: for (k=a->i[i]; k<a->i[i+1]; k++) {
1121: for (l=0; l<bs; l++) {
1122: #if defined(PETSC_USE_COMPLEX)
1123: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1124: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1125: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1126: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1127: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1128: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1129: } else {
1130: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1131: }
1132: #else
1133: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1134: #endif
1135: }
1136: }
1137: PetscViewerASCIIPrintf(viewer,"\n");
1138: }
1139: }
1140: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1141: }
1142: PetscViewerFlush(viewer);
1143: return(0);
1144: }
1148: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1149: {
1150: Mat A = (Mat) Aa;
1151: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1152: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1153: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1154: MatScalar *aa;
1155: PetscViewer viewer;
1159: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1160: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1162: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1164: /* loop over matrix elements drawing boxes */
1165: color = PETSC_DRAW_BLUE;
1166: for (i=0,row=0; i<mbs; i++,row+=bs) {
1167: for (j=a->i[i]; j<a->i[i+1]; j++) {
1168: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1169: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1170: aa = a->a + j*bs2;
1171: for (k=0; k<bs; k++) {
1172: for (l=0; l<bs; l++) {
1173: if (PetscRealPart(*aa++) >= 0.) continue;
1174: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1175: }
1176: }
1177: }
1178: }
1179: color = PETSC_DRAW_CYAN;
1180: for (i=0,row=0; i<mbs; i++,row+=bs) {
1181: for (j=a->i[i]; j<a->i[i+1]; j++) {
1182: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1183: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1184: aa = a->a + j*bs2;
1185: for (k=0; k<bs; k++) {
1186: for (l=0; l<bs; l++) {
1187: if (PetscRealPart(*aa++) != 0.) continue;
1188: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1189: }
1190: }
1191: }
1192: }
1194: color = PETSC_DRAW_RED;
1195: for (i=0,row=0; i<mbs; i++,row+=bs) {
1196: for (j=a->i[i]; j<a->i[i+1]; j++) {
1197: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1198: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1199: aa = a->a + j*bs2;
1200: for (k=0; k<bs; k++) {
1201: for (l=0; l<bs; l++) {
1202: if (PetscRealPart(*aa++) <= 0.) continue;
1203: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1204: }
1205: }
1206: }
1207: }
1208: return(0);
1209: }
1213: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1214: {
1215: int ierr;
1216: PetscReal xl,yl,xr,yr,w,h;
1217: PetscDraw draw;
1218: PetscTruth isnull;
1222: PetscViewerDrawGetDraw(viewer,0,&draw);
1223: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1225: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1226: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1227: xr += w; yr += h; xl = -w; yl = -h;
1228: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1229: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1230: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1231: return(0);
1232: }
1236: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1237: {
1238: int ierr;
1239: PetscTruth isascii,isbinary,isdraw;
1242: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1243: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1244: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1245: if (isascii){
1246: MatView_SeqBAIJ_ASCII(A,viewer);
1247: } else if (isbinary) {
1248: MatView_SeqBAIJ_Binary(A,viewer);
1249: } else if (isdraw) {
1250: MatView_SeqBAIJ_Draw(A,viewer);
1251: } else {
1252: Mat B;
1253: MatConvert(A,MATSEQAIJ,&B);
1254: MatView(B,viewer);
1255: MatDestroy(B);
1256: }
1257: return(0);
1258: }
1263: int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1264: {
1265: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1266: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1267: int *ai = a->i,*ailen = a->ilen;
1268: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1269: MatScalar *ap,*aa = a->a,zero = 0.0;
1272: for (k=0; k<m; k++) { /* loop over rows */
1273: row = im[k]; brow = row/bs;
1274: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1275: if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1276: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1277: nrow = ailen[brow];
1278: for (l=0; l<n; l++) { /* loop over columns */
1279: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1280: if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1281: col = in[l] ;
1282: bcol = col/bs;
1283: cidx = col%bs;
1284: ridx = row%bs;
1285: high = nrow;
1286: low = 0; /* assume unsorted */
1287: while (high-low > 5) {
1288: t = (low+high)/2;
1289: if (rp[t] > bcol) high = t;
1290: else low = t;
1291: }
1292: for (i=low; i<high; i++) {
1293: if (rp[i] > bcol) break;
1294: if (rp[i] == bcol) {
1295: *v++ = ap[bs2*i+bs*cidx+ridx];
1296: goto finished;
1297: }
1298: }
1299: *v++ = zero;
1300: finished:;
1301: }
1302: }
1303: return(0);
1304: }
1306: #if defined(PETSC_USE_MAT_SINGLE)
1309: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1310: {
1311: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1312: int ierr,i,N = m*n*b->bs2;
1313: MatScalar *vsingle;
1316: if (N > b->setvalueslen) {
1317: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
1318: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1319: b->setvalueslen = N;
1320: }
1321: vsingle = b->setvaluescopy;
1322: for (i=0; i<N; i++) {
1323: vsingle[i] = v[i];
1324: }
1325: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1326: return(0);
1327: }
1328: #endif
1333: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1334: {
1335: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1336: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1337: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1338: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1339: PetscTruth roworiented=a->roworiented;
1340: const MatScalar *value = v;
1341: MatScalar *ap,*aa = a->a,*bap;
1344: if (roworiented) {
1345: stepval = (n-1)*bs;
1346: } else {
1347: stepval = (m-1)*bs;
1348: }
1349: for (k=0; k<m; k++) { /* loop over added rows */
1350: row = im[k];
1351: if (row < 0) continue;
1352: #if defined(PETSC_USE_BOPT_g)
1353: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1354: #endif
1355: rp = aj + ai[row];
1356: ap = aa + bs2*ai[row];
1357: rmax = imax[row];
1358: nrow = ailen[row];
1359: low = 0;
1360: for (l=0; l<n; l++) { /* loop over added columns */
1361: if (in[l] < 0) continue;
1362: #if defined(PETSC_USE_BOPT_g)
1363: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1364: #endif
1365: col = in[l];
1366: if (roworiented) {
1367: value = v + k*(stepval+bs)*bs + l*bs;
1368: } else {
1369: value = v + l*(stepval+bs)*bs + k*bs;
1370: }
1371: if (!sorted) low = 0; high = nrow;
1372: while (high-low > 7) {
1373: t = (low+high)/2;
1374: if (rp[t] > col) high = t;
1375: else low = t;
1376: }
1377: for (i=low; i<high; i++) {
1378: if (rp[i] > col) break;
1379: if (rp[i] == col) {
1380: bap = ap + bs2*i;
1381: if (roworiented) {
1382: if (is == ADD_VALUES) {
1383: for (ii=0; ii<bs; ii++,value+=stepval) {
1384: for (jj=ii; jj<bs2; jj+=bs) {
1385: bap[jj] += *value++;
1386: }
1387: }
1388: } else {
1389: for (ii=0; ii<bs; ii++,value+=stepval) {
1390: for (jj=ii; jj<bs2; jj+=bs) {
1391: bap[jj] = *value++;
1392: }
1393: }
1394: }
1395: } else {
1396: if (is == ADD_VALUES) {
1397: for (ii=0; ii<bs; ii++,value+=stepval) {
1398: for (jj=0; jj<bs; jj++) {
1399: *bap++ += *value++;
1400: }
1401: }
1402: } else {
1403: for (ii=0; ii<bs; ii++,value+=stepval) {
1404: for (jj=0; jj<bs; jj++) {
1405: *bap++ = *value++;
1406: }
1407: }
1408: }
1409: }
1410: goto noinsert2;
1411: }
1412: }
1413: if (nonew == 1) goto noinsert2;
1414: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1415: if (nrow >= rmax) {
1416: /* there is no extra room in row, therefore enlarge */
1417: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1418: MatScalar *new_a;
1420: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1422: /* malloc new storage space */
1423: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1424: PetscMalloc(len,&new_a);
1425: new_j = (int*)(new_a + bs2*new_nz);
1426: new_i = new_j + new_nz;
1428: /* copy over old data into new slots */
1429: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1430: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1431: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
1432: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
1433: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
1434: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
1435: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1436: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
1437: /* free up old matrix storage */
1438: PetscFree(a->a);
1439: if (!a->singlemalloc) {
1440: PetscFree(a->i);
1441: PetscFree(a->j);
1442: }
1443: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1444: a->singlemalloc = PETSC_TRUE;
1446: rp = aj + ai[row]; ap = aa + bs2*ai[row];
1447: rmax = imax[row] = imax[row] + CHUNKSIZE;
1448: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1449: a->maxnz += bs2*CHUNKSIZE;
1450: a->reallocs++;
1451: a->nz++;
1452: }
1453: N = nrow++ - 1;
1454: /* shift up all the later entries in this row */
1455: for (ii=N; ii>=i; ii--) {
1456: rp[ii+1] = rp[ii];
1457: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1458: }
1459: if (N >= i) {
1460: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1461: }
1462: rp[i] = col;
1463: bap = ap + bs2*i;
1464: if (roworiented) {
1465: for (ii=0; ii<bs; ii++,value+=stepval) {
1466: for (jj=ii; jj<bs2; jj+=bs) {
1467: bap[jj] = *value++;
1468: }
1469: }
1470: } else {
1471: for (ii=0; ii<bs; ii++,value+=stepval) {
1472: for (jj=0; jj<bs; jj++) {
1473: *bap++ = *value++;
1474: }
1475: }
1476: }
1477: noinsert2:;
1478: low = i;
1479: }
1480: ailen[row] = nrow;
1481: }
1482: return(0);
1483: }
1487: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1488: {
1489: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1490: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1491: int m = A->m,*ip,N,*ailen = a->ilen;
1492: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1493: MatScalar *aa = a->a,*ap;
1496: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1498: if (m) rmax = ailen[0];
1499: for (i=1; i<mbs; i++) {
1500: /* move each row back by the amount of empty slots (fshift) before it*/
1501: fshift += imax[i-1] - ailen[i-1];
1502: rmax = PetscMax(rmax,ailen[i]);
1503: if (fshift) {
1504: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1505: N = ailen[i];
1506: for (j=0; j<N; j++) {
1507: ip[j-fshift] = ip[j];
1508: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1509: }
1510: }
1511: ai[i] = ai[i-1] + ailen[i-1];
1512: }
1513: if (mbs) {
1514: fshift += imax[mbs-1] - ailen[mbs-1];
1515: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1516: }
1517: /* reset ilen and imax for each row */
1518: for (i=0; i<mbs; i++) {
1519: ailen[i] = imax[i] = ai[i+1] - ai[i];
1520: }
1521: a->nz = ai[mbs];
1523: /* diagonals may have moved, so kill the diagonal pointers */
1524: a->idiagvalid = PETSC_FALSE;
1525: if (fshift && a->diag) {
1526: PetscFree(a->diag);
1527: PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1528: a->diag = 0;
1529: }
1530: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1531: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1532: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1533: a->reallocs = 0;
1534: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1536: return(0);
1537: }
1541: /*
1542: This function returns an array of flags which indicate the locations of contiguous
1543: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1544: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1545: Assume: sizes should be long enough to hold all the values.
1546: */
1549: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1550: {
1551: int i,j,k,row;
1552: PetscTruth flg;
1555: for (i=0,j=0; i<n; j++) {
1556: row = idx[i];
1557: if (row%bs!=0) { /* Not the begining of a block */
1558: sizes[j] = 1;
1559: i++;
1560: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1561: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1562: i++;
1563: } else { /* Begining of the block, so check if the complete block exists */
1564: flg = PETSC_TRUE;
1565: for (k=1; k<bs; k++) {
1566: if (row+k != idx[i+k]) { /* break in the block */
1567: flg = PETSC_FALSE;
1568: break;
1569: }
1570: }
1571: if (flg == PETSC_TRUE) { /* No break in the bs */
1572: sizes[j] = bs;
1573: i+= bs;
1574: } else {
1575: sizes[j] = 1;
1576: i++;
1577: }
1578: }
1579: }
1580: *bs_max = j;
1581: return(0);
1582: }
1583:
1586: int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1587: {
1588: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1589: int ierr,i,j,k,count,is_n,*is_idx,*rows;
1590: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1591: PetscScalar zero = 0.0;
1592: MatScalar *aa;
1595: /* Make a copy of the IS and sort it */
1596: ISGetLocalSize(is,&is_n);
1597: ISGetIndices(is,&is_idx);
1599: /* allocate memory for rows,sizes */
1600: PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1601: sizes = rows + is_n;
1603: /* copy IS values to rows, and sort them */
1604: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1605: PetscSortInt(is_n,rows);
1606: if (baij->keepzeroedrows) {
1607: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1608: bs_max = is_n;
1609: } else {
1610: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1611: }
1612: ISRestoreIndices(is,&is_idx);
1614: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1615: row = rows[j];
1616: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1617: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1618: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1619: if (sizes[i] == bs && !baij->keepzeroedrows) {
1620: if (diag) {
1621: if (baij->ilen[row/bs] > 0) {
1622: baij->ilen[row/bs] = 1;
1623: baij->j[baij->i[row/bs]] = row/bs;
1624: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1625: }
1626: /* Now insert all the diagonal values for this bs */
1627: for (k=0; k<bs; k++) {
1628: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1629: }
1630: } else { /* (!diag) */
1631: baij->ilen[row/bs] = 0;
1632: } /* end (!diag) */
1633: } else { /* (sizes[i] != bs) */
1634: #if defined (PETSC_USE_DEBUG)
1635: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1636: #endif
1637: for (k=0; k<count; k++) {
1638: aa[0] = zero;
1639: aa += bs;
1640: }
1641: if (diag) {
1642: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1643: }
1644: }
1645: }
1647: PetscFree(rows);
1648: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1649: return(0);
1650: }
1654: int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1655: {
1656: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1657: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1658: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1659: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1660: int ridx,cidx,bs2=a->bs2,ierr;
1661: PetscTruth roworiented=a->roworiented;
1662: MatScalar *ap,value,*aa=a->a,*bap;
1665: for (k=0; k<m; k++) { /* loop over added rows */
1666: row = im[k]; brow = row/bs;
1667: if (row < 0) continue;
1668: #if defined(PETSC_USE_BOPT_g)
1669: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1670: #endif
1671: rp = aj + ai[brow];
1672: ap = aa + bs2*ai[brow];
1673: rmax = imax[brow];
1674: nrow = ailen[brow];
1675: low = 0;
1676: for (l=0; l<n; l++) { /* loop over added columns */
1677: if (in[l] < 0) continue;
1678: #if defined(PETSC_USE_BOPT_g)
1679: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1680: #endif
1681: col = in[l]; bcol = col/bs;
1682: ridx = row % bs; cidx = col % bs;
1683: if (roworiented) {
1684: value = v[l + k*n];
1685: } else {
1686: value = v[k + l*m];
1687: }
1688: if (!sorted) low = 0; high = nrow;
1689: while (high-low > 7) {
1690: t = (low+high)/2;
1691: if (rp[t] > bcol) high = t;
1692: else low = t;
1693: }
1694: for (i=low; i<high; i++) {
1695: if (rp[i] > bcol) break;
1696: if (rp[i] == bcol) {
1697: bap = ap + bs2*i + bs*cidx + ridx;
1698: if (is == ADD_VALUES) *bap += value;
1699: else *bap = value;
1700: goto noinsert1;
1701: }
1702: }
1703: if (nonew == 1) goto noinsert1;
1704: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1705: if (nrow >= rmax) {
1706: /* there is no extra room in row, therefore enlarge */
1707: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1708: MatScalar *new_a;
1710: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1712: /* Malloc new storage space */
1713: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1714: PetscMalloc(len,&new_a);
1715: new_j = (int*)(new_a + bs2*new_nz);
1716: new_i = new_j + new_nz;
1718: /* copy over old data into new slots */
1719: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1720: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1721: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1722: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1723: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1724: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1725: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1726: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1727: /* free up old matrix storage */
1728: PetscFree(a->a);
1729: if (!a->singlemalloc) {
1730: PetscFree(a->i);
1731: PetscFree(a->j);
1732: }
1733: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1734: a->singlemalloc = PETSC_TRUE;
1736: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1737: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1738: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1739: a->maxnz += bs2*CHUNKSIZE;
1740: a->reallocs++;
1741: a->nz++;
1742: }
1743: N = nrow++ - 1;
1744: /* shift up all the later entries in this row */
1745: for (ii=N; ii>=i; ii--) {
1746: rp[ii+1] = rp[ii];
1747: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1748: }
1749: if (N>=i) {
1750: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1751: }
1752: rp[i] = bcol;
1753: ap[bs2*i + bs*cidx + ridx] = value;
1754: noinsert1:;
1755: low = i;
1756: }
1757: ailen[brow] = nrow;
1758: }
1759: return(0);
1760: }
1765: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1766: {
1767: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1768: Mat outA;
1769: int ierr;
1770: PetscTruth row_identity,col_identity;
1773: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1774: ISIdentity(row,&row_identity);
1775: ISIdentity(col,&col_identity);
1776: if (!row_identity || !col_identity) {
1777: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1778: }
1780: outA = inA;
1781: inA->factor = FACTOR_LU;
1783: if (!a->diag) {
1784: MatMarkDiagonal_SeqBAIJ(inA);
1785: }
1787: a->row = row;
1788: a->col = col;
1789: PetscObjectReference((PetscObject)row);
1790: PetscObjectReference((PetscObject)col);
1791:
1792: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1793: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1794: PetscLogObjectParent(inA,a->icol);
1795:
1796: /*
1797: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1798: for ILU(0) factorization with natural ordering
1799: */
1800: if (a->bs < 8) {
1801: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1802: } else {
1803: if (!a->solve_work) {
1804: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1805: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1806: }
1807: }
1809: MatLUFactorNumeric(inA,&outA);
1811: return(0);
1812: }
1815: int MatPrintHelp_SeqBAIJ(Mat A)
1816: {
1817: static PetscTruth called = PETSC_FALSE;
1818: MPI_Comm comm = A->comm;
1819: int ierr;
1822: if (called) {return(0);} else called = PETSC_TRUE;
1823: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1824: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");
1825: return(0);
1826: }
1828: EXTERN_C_BEGIN
1831: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1832: {
1833: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1834: int i,nz,nbs;
1837: nz = baij->maxnz/baij->bs2;
1838: nbs = baij->nbs;
1839: for (i=0; i<nz; i++) {
1840: baij->j[i] = indices[i];
1841: }
1842: baij->nz = nz;
1843: for (i=0; i<nbs; i++) {
1844: baij->ilen[i] = baij->imax[i];
1845: }
1847: return(0);
1848: }
1849: EXTERN_C_END
1853: /*@
1854: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1855: in the matrix.
1857: Input Parameters:
1858: + mat - the SeqBAIJ matrix
1859: - indices - the column indices
1861: Level: advanced
1863: Notes:
1864: This can be called if you have precomputed the nonzero structure of the
1865: matrix and want to provide it to the matrix object to improve the performance
1866: of the MatSetValues() operation.
1868: You MUST have set the correct numbers of nonzeros per row in the call to
1869: MatCreateSeqBAIJ().
1871: MUST be called before any calls to MatSetValues();
1873: @*/
1874: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1875: {
1876: int ierr,(*f)(Mat,int *);
1881: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1882: if (f) {
1883: (*f)(mat,indices);
1884: } else {
1885: SETERRQ(1,"Wrong type of matrix to set column indices");
1886: }
1887: return(0);
1888: }
1892: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1893: {
1894: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1895: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1896: PetscReal atmp;
1897: PetscScalar *x,zero = 0.0;
1898: MatScalar *aa;
1899: int ncols,brow,krow,kcol;
1902: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1903: bs = a->bs;
1904: aa = a->a;
1905: ai = a->i;
1906: aj = a->j;
1907: mbs = a->mbs;
1909: VecSet(&zero,v);
1910: VecGetArray(v,&x);
1911: VecGetLocalSize(v,&n);
1912: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1913: for (i=0; i<mbs; i++) {
1914: ncols = ai[1] - ai[0]; ai++;
1915: brow = bs*i;
1916: for (j=0; j<ncols; j++){
1917: /* bcol = bs*(*aj); */
1918: for (kcol=0; kcol<bs; kcol++){
1919: for (krow=0; krow<bs; krow++){
1920: atmp = PetscAbsScalar(*aa); aa++;
1921: row = brow + krow; /* row index */
1922: /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1923: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1924: }
1925: }
1926: aj++;
1927: }
1928: }
1929: VecRestoreArray(v,&x);
1930: return(0);
1931: }
1935: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1936: {
1937: int ierr;
1940: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1941: return(0);
1942: }
1946: int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1947: {
1948: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1950: *array = a->a;
1951: return(0);
1952: }
1956: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1957: {
1959: return(0);
1960: }
1962: #include petscblaslapack.h
1965: int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1966: {
1967: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1968: int ierr,one=1,i,bs=y->bs,j,bs2;
1971: if (str == SAME_NONZERO_PATTERN) {
1972: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1973: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974: if (y->xtoy && y->XtoY != X) {
1975: PetscFree(y->xtoy);
1976: MatDestroy(y->XtoY);
1977: }
1978: if (!y->xtoy) { /* get xtoy */
1979: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1980: y->XtoY = X;
1981: }
1982: bs2 = bs*bs;
1983: for (i=0; i<x->nz; i++) {
1984: j = 0;
1985: while (j < bs2){
1986: y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1987: j++;
1988: }
1989: }
1990: PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1991: } else {
1992: MatAXPY_Basic(a,X,Y,str);
1993: }
1994: return(0);
1995: }
1997: /* -------------------------------------------------------------------*/
1998: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1999: MatGetRow_SeqBAIJ,
2000: MatRestoreRow_SeqBAIJ,
2001: MatMult_SeqBAIJ_N,
2002: /* 4*/ MatMultAdd_SeqBAIJ_N,
2003: MatMultTranspose_SeqBAIJ,
2004: MatMultTransposeAdd_SeqBAIJ,
2005: MatSolve_SeqBAIJ_N,
2006: 0,
2007: 0,
2008: /*10*/ 0,
2009: MatLUFactor_SeqBAIJ,
2010: 0,
2011: 0,
2012: MatTranspose_SeqBAIJ,
2013: /*15*/ MatGetInfo_SeqBAIJ,
2014: MatEqual_SeqBAIJ,
2015: MatGetDiagonal_SeqBAIJ,
2016: MatDiagonalScale_SeqBAIJ,
2017: MatNorm_SeqBAIJ,
2018: /*20*/ 0,
2019: MatAssemblyEnd_SeqBAIJ,
2020: 0,
2021: MatSetOption_SeqBAIJ,
2022: MatZeroEntries_SeqBAIJ,
2023: /*25*/ MatZeroRows_SeqBAIJ,
2024: MatLUFactorSymbolic_SeqBAIJ,
2025: MatLUFactorNumeric_SeqBAIJ_N,
2026: 0,
2027: 0,
2028: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2029: MatILUFactorSymbolic_SeqBAIJ,
2030: 0,
2031: MatGetArray_SeqBAIJ,
2032: MatRestoreArray_SeqBAIJ,
2033: /*35*/ MatDuplicate_SeqBAIJ,
2034: 0,
2035: 0,
2036: MatILUFactor_SeqBAIJ,
2037: 0,
2038: /*40*/ MatAXPY_SeqBAIJ,
2039: MatGetSubMatrices_SeqBAIJ,
2040: MatIncreaseOverlap_SeqBAIJ,
2041: MatGetValues_SeqBAIJ,
2042: 0,
2043: /*45*/ MatPrintHelp_SeqBAIJ,
2044: MatScale_SeqBAIJ,
2045: 0,
2046: 0,
2047: 0,
2048: /*50*/ MatGetBlockSize_SeqBAIJ,
2049: MatGetRowIJ_SeqBAIJ,
2050: MatRestoreRowIJ_SeqBAIJ,
2051: 0,
2052: 0,
2053: /*55*/ 0,
2054: 0,
2055: 0,
2056: 0,
2057: MatSetValuesBlocked_SeqBAIJ,
2058: /*60*/ MatGetSubMatrix_SeqBAIJ,
2059: MatDestroy_SeqBAIJ,
2060: MatView_SeqBAIJ,
2061: MatGetPetscMaps_Petsc,
2062: 0,
2063: /*65*/ 0,
2064: 0,
2065: 0,
2066: 0,
2067: 0,
2068: /*70*/ MatGetRowMax_SeqBAIJ,
2069: MatConvert_Basic,
2070: 0,
2071: 0,
2072: 0,
2073: /*75*/ 0,
2074: 0,
2075: 0,
2076: 0,
2077: 0,
2078: /*80*/ 0,
2079: 0,
2080: 0,
2081: 0,
2082: /*85*/ MatLoad_SeqBAIJ,
2083: 0,
2084: 0,
2085: 0,
2086: 0
2087: };
2089: EXTERN_C_BEGIN
2092: int MatStoreValues_SeqBAIJ(Mat mat)
2093: {
2094: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2095: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
2096: int ierr;
2099: if (aij->nonew != 1) {
2100: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2101: }
2103: /* allocate space for values if not already there */
2104: if (!aij->saved_values) {
2105: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2106: }
2108: /* copy values over */
2109: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2110: return(0);
2111: }
2112: EXTERN_C_END
2114: EXTERN_C_BEGIN
2117: int MatRetrieveValues_SeqBAIJ(Mat mat)
2118: {
2119: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2120: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
2123: if (aij->nonew != 1) {
2124: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2125: }
2126: if (!aij->saved_values) {
2127: SETERRQ(1,"Must call MatStoreValues(A);first");
2128: }
2130: /* copy values over */
2131: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2132: return(0);
2133: }
2134: EXTERN_C_END
2136: EXTERN_C_BEGIN
2137: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2138: extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2139: EXTERN_C_END
2141: EXTERN_C_BEGIN
2144: int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2145: {
2146: Mat_SeqBAIJ *b;
2147: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
2148: PetscTruth flg;
2152: B->preallocated = PETSC_TRUE;
2153: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
2154: if (nnz && newbs != bs) {
2155: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
2156: }
2157: bs = newbs;
2159: mbs = B->m/bs;
2160: nbs = B->n/bs;
2161: bs2 = bs*bs;
2163: if (mbs*bs!=B->m || nbs*bs!=B->n) {
2164: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2165: }
2167: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2168: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2169: if (nnz) {
2170: for (i=0; i<mbs; i++) {
2171: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2172: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2173: }
2174: }
2176: b = (Mat_SeqBAIJ*)B->data;
2177: PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
2178: B->ops->solve = MatSolve_SeqBAIJ_Update;
2179: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2180: if (!flg) {
2181: switch (bs) {
2182: case 1:
2183: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2184: B->ops->mult = MatMult_SeqBAIJ_1;
2185: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2186: break;
2187: case 2:
2188: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2189: B->ops->mult = MatMult_SeqBAIJ_2;
2190: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2191: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2192: break;
2193: case 3:
2194: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2195: B->ops->mult = MatMult_SeqBAIJ_3;
2196: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2197: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2198: break;
2199: case 4:
2200: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2201: B->ops->mult = MatMult_SeqBAIJ_4;
2202: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2203: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2204: break;
2205: case 5:
2206: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2207: B->ops->mult = MatMult_SeqBAIJ_5;
2208: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2209: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2210: break;
2211: case 6:
2212: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2213: B->ops->mult = MatMult_SeqBAIJ_6;
2214: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2215: break;
2216: case 7:
2217: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2218: B->ops->mult = MatMult_SeqBAIJ_7;
2219: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2220: break;
2221: default:
2222: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2223: B->ops->mult = MatMult_SeqBAIJ_N;
2224: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2225: break;
2226: }
2227: }
2228: b->bs = bs;
2229: b->mbs = mbs;
2230: b->nbs = nbs;
2231: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
2232: if (!nnz) {
2233: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2234: else if (nz <= 0) nz = 1;
2235: for (i=0; i<mbs; i++) b->imax[i] = nz;
2236: nz = nz*mbs;
2237: } else {
2238: nz = 0;
2239: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2240: }
2242: /* allocate the matrix space */
2243: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2244: PetscMalloc(len,&b->a);
2245: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2246: b->j = (int*)(b->a + nz*bs2);
2247: PetscMemzero(b->j,nz*sizeof(int));
2248: b->i = b->j + nz;
2249: b->singlemalloc = PETSC_TRUE;
2251: b->i[0] = 0;
2252: for (i=1; i<mbs+1; i++) {
2253: b->i[i] = b->i[i-1] + b->imax[i-1];
2254: }
2256: /* b->ilen will count nonzeros in each block row so far. */
2257: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
2258: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2259: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2261: b->bs = bs;
2262: b->bs2 = bs2;
2263: b->mbs = mbs;
2264: b->nz = 0;
2265: b->maxnz = nz*bs2;
2266: B->info.nz_unneeded = (PetscReal)b->maxnz;
2267: return(0);
2268: }
2269: EXTERN_C_END
2271: /*MC
2272: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2273: block sparse compressed row format.
2275: Options Database Keys:
2276: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2278: Level: beginner
2280: .seealso: MatCreateSeqBAIJ
2281: M*/
2283: EXTERN_C_BEGIN
2286: int MatCreate_SeqBAIJ(Mat B)
2287: {
2288: int ierr,size;
2289: Mat_SeqBAIJ *b;
2292: MPI_Comm_size(B->comm,&size);
2293: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2295: B->m = B->M = PetscMax(B->m,B->M);
2296: B->n = B->N = PetscMax(B->n,B->N);
2297: PetscNew(Mat_SeqBAIJ,&b);
2298: B->data = (void*)b;
2299: PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2300: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2301: B->factor = 0;
2302: B->lupivotthreshold = 1.0;
2303: B->mapping = 0;
2304: b->row = 0;
2305: b->col = 0;
2306: b->icol = 0;
2307: b->reallocs = 0;
2308: b->saved_values = 0;
2309: #if defined(PETSC_USE_MAT_SINGLE)
2310: b->setvalueslen = 0;
2311: b->setvaluescopy = PETSC_NULL;
2312: #endif
2314: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
2315: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
2317: b->sorted = PETSC_FALSE;
2318: b->roworiented = PETSC_TRUE;
2319: b->nonew = 0;
2320: b->diag = 0;
2321: b->solve_work = 0;
2322: b->mult_work = 0;
2323: B->spptr = 0;
2324: B->info.nz_unneeded = (PetscReal)b->maxnz;
2325: b->keepzeroedrows = PETSC_FALSE;
2326: b->xtoy = 0;
2327: b->XtoY = 0;
2329: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2330: "MatStoreValues_SeqBAIJ",
2331: MatStoreValues_SeqBAIJ);
2332: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2333: "MatRetrieveValues_SeqBAIJ",
2334: MatRetrieveValues_SeqBAIJ);
2335: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2336: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2337: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2338: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2339: "MatConvert_SeqBAIJ_SeqAIJ",
2340: MatConvert_SeqBAIJ_SeqAIJ);
2341: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2342: "MatConvert_SeqBAIJ_SeqSBAIJ",
2343: MatConvert_SeqBAIJ_SeqSBAIJ);
2344: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2345: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2346: MatSeqBAIJSetPreallocation_SeqBAIJ);
2347: return(0);
2348: }
2349: EXTERN_C_END
2353: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2354: {
2355: Mat C;
2356: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2357: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
2360: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2362: *B = 0;
2363: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
2364: MatSetType(C,A->type_name);
2365: c = (Mat_SeqBAIJ*)C->data;
2367: C->M = A->M;
2368: C->N = A->N;
2369: c->bs = a->bs;
2370: c->bs2 = a->bs2;
2371: c->mbs = a->mbs;
2372: c->nbs = a->nbs;
2373: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2375: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
2376: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
2377: for (i=0; i<mbs; i++) {
2378: c->imax[i] = a->imax[i];
2379: c->ilen[i] = a->ilen[i];
2380: }
2382: /* allocate the matrix space */
2383: c->singlemalloc = PETSC_TRUE;
2384: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2385: PetscMalloc(len,&c->a);
2386: c->j = (int*)(c->a + nz*bs2);
2387: c->i = c->j + nz;
2388: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2389: if (mbs > 0) {
2390: PetscMemcpy(c->j,a->j,nz*sizeof(int));
2391: if (cpvalues == MAT_COPY_VALUES) {
2392: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2393: } else {
2394: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2395: }
2396: }
2398: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2399: c->sorted = a->sorted;
2400: c->roworiented = a->roworiented;
2401: c->nonew = a->nonew;
2403: if (a->diag) {
2404: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
2405: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2406: for (i=0; i<mbs; i++) {
2407: c->diag[i] = a->diag[i];
2408: }
2409: } else c->diag = 0;
2410: c->nz = a->nz;
2411: c->maxnz = a->maxnz;
2412: c->solve_work = 0;
2413: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
2414: c->mult_work = 0;
2415: C->preallocated = PETSC_TRUE;
2416: C->assembled = PETSC_TRUE;
2417: *B = C;
2418: PetscFListDuplicate(A->qlist,&C->qlist);
2419: return(0);
2420: }
2424: int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2425: {
2426: Mat_SeqBAIJ *a;
2427: Mat B;
2428: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2429: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2430: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
2431: int *masked,nmask,tmp,bs2,ishift;
2432: PetscScalar *aa;
2433: MPI_Comm comm = ((PetscObject)viewer)->comm;
2436: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2437: bs2 = bs*bs;
2439: MPI_Comm_size(comm,&size);
2440: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2441: PetscViewerBinaryGetDescriptor(viewer,&fd);
2442: PetscBinaryRead(fd,header,4,PETSC_INT);
2443: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2444: M = header[1]; N = header[2]; nz = header[3];
2446: if (header[3] < 0) {
2447: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2448: }
2450: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2452: /*
2453: This code adds extra rows to make sure the number of rows is
2454: divisible by the blocksize
2455: */
2456: mbs = M/bs;
2457: extra_rows = bs - M + bs*(mbs);
2458: if (extra_rows == bs) extra_rows = 0;
2459: else mbs++;
2460: if (extra_rows) {
2461: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2462: }
2464: /* read in row lengths */
2465: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
2466: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2467: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2469: /* read in column indices */
2470: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
2471: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2472: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2474: /* loop over row lengths determining block row lengths */
2475: PetscMalloc(mbs*sizeof(int),&browlengths);
2476: PetscMemzero(browlengths,mbs*sizeof(int));
2477: PetscMalloc(2*mbs*sizeof(int),&mask);
2478: PetscMemzero(mask,mbs*sizeof(int));
2479: masked = mask + mbs;
2480: rowcount = 0; nzcount = 0;
2481: for (i=0; i<mbs; i++) {
2482: nmask = 0;
2483: for (j=0; j<bs; j++) {
2484: kmax = rowlengths[rowcount];
2485: for (k=0; k<kmax; k++) {
2486: tmp = jj[nzcount++]/bs;
2487: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2488: }
2489: rowcount++;
2490: }
2491: browlengths[i] += nmask;
2492: /* zero out the mask elements we set */
2493: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2494: }
2496: /* create our matrix */
2497: MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2498: MatSetType(B,type);
2499: MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
2500: a = (Mat_SeqBAIJ*)B->data;
2502: /* set matrix "i" values */
2503: a->i[0] = 0;
2504: for (i=1; i<= mbs; i++) {
2505: a->i[i] = a->i[i-1] + browlengths[i-1];
2506: a->ilen[i-1] = browlengths[i-1];
2507: }
2508: a->nz = 0;
2509: for (i=0; i<mbs; i++) a->nz += browlengths[i];
2511: /* read in nonzero values */
2512: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2513: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2514: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2516: /* set "a" and "j" values into matrix */
2517: nzcount = 0; jcount = 0;
2518: for (i=0; i<mbs; i++) {
2519: nzcountb = nzcount;
2520: nmask = 0;
2521: for (j=0; j<bs; j++) {
2522: kmax = rowlengths[i*bs+j];
2523: for (k=0; k<kmax; k++) {
2524: tmp = jj[nzcount++]/bs;
2525: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2526: }
2527: }
2528: /* sort the masked values */
2529: PetscSortInt(nmask,masked);
2531: /* set "j" values into matrix */
2532: maskcount = 1;
2533: for (j=0; j<nmask; j++) {
2534: a->j[jcount++] = masked[j];
2535: mask[masked[j]] = maskcount++;
2536: }
2537: /* set "a" values into matrix */
2538: ishift = bs2*a->i[i];
2539: for (j=0; j<bs; j++) {
2540: kmax = rowlengths[i*bs+j];
2541: for (k=0; k<kmax; k++) {
2542: tmp = jj[nzcountb]/bs ;
2543: block = mask[tmp] - 1;
2544: point = jj[nzcountb] - bs*tmp;
2545: idx = ishift + bs2*block + j + bs*point;
2546: a->a[idx] = (MatScalar)aa[nzcountb++];
2547: }
2548: }
2549: /* zero out the mask elements we set */
2550: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2551: }
2552: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2554: PetscFree(rowlengths);
2555: PetscFree(browlengths);
2556: PetscFree(aa);
2557: PetscFree(jj);
2558: PetscFree(mask);
2560: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2561: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2562: MatView_Private(B);
2564: *A = B;
2565: return(0);
2566: }
2570: /*@C
2571: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2572: compressed row) format. For good matrix assembly performance the
2573: user should preallocate the matrix storage by setting the parameter nz
2574: (or the array nnz). By setting these parameters accurately, performance
2575: during matrix assembly can be increased by more than a factor of 50.
2577: Collective on MPI_Comm
2579: Input Parameters:
2580: + comm - MPI communicator, set to PETSC_COMM_SELF
2581: . bs - size of block
2582: . m - number of rows
2583: . n - number of columns
2584: . nz - number of nonzero blocks per block row (same for all rows)
2585: - nnz - array containing the number of nonzero blocks in the various block rows
2586: (possibly different for each block row) or PETSC_NULL
2588: Output Parameter:
2589: . A - the matrix
2591: Options Database Keys:
2592: . -mat_no_unroll - uses code that does not unroll the loops in the
2593: block calculations (much slower)
2594: . -mat_block_size - size of the blocks to use
2596: Level: intermediate
2598: Notes:
2599: A nonzero block is any block that as 1 or more nonzeros in it
2601: The block AIJ format is fully compatible with standard Fortran 77
2602: storage. That is, the stored row and column indices can begin at
2603: either one (as in Fortran) or zero. See the users' manual for details.
2605: Specify the preallocated storage with either nz or nnz (not both).
2606: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2607: allocation. For additional details, see the users manual chapter on
2608: matrices.
2610: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2611: @*/
2612: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2613: {
2615:
2617: MatCreate(comm,m,n,m,n,A);
2618: MatSetType(*A,MATSEQBAIJ);
2619: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2620: return(0);
2621: }
2625: /*@C
2626: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2627: per row in the matrix. For good matrix assembly performance the
2628: user should preallocate the matrix storage by setting the parameter nz
2629: (or the array nnz). By setting these parameters accurately, performance
2630: during matrix assembly can be increased by more than a factor of 50.
2632: Collective on MPI_Comm
2634: Input Parameters:
2635: + A - the matrix
2636: . bs - size of block
2637: . nz - number of block nonzeros per block row (same for all rows)
2638: - nnz - array containing the number of block nonzeros in the various block rows
2639: (possibly different for each block row) or PETSC_NULL
2641: Options Database Keys:
2642: . -mat_no_unroll - uses code that does not unroll the loops in the
2643: block calculations (much slower)
2644: . -mat_block_size - size of the blocks to use
2646: Level: intermediate
2648: Notes:
2649: The block AIJ format is fully compatible with standard Fortran 77
2650: storage. That is, the stored row and column indices can begin at
2651: either one (as in Fortran) or zero. See the users' manual for details.
2653: Specify the preallocated storage with either nz or nnz (not both).
2654: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2655: allocation. For additional details, see the users manual chapter on
2656: matrices.
2658: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2659: @*/
2660: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2661: {
2662: int ierr,(*f)(Mat,int,int,const int[]);
2665: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2666: if (f) {
2667: (*f)(B,bs,nz,nnz);
2668: }
2669: return(0);
2670: }