Actual source code: baijfact.c
1: /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include src/mat/impls/baij/seq/baij.h
6: #include src/inline/ilu.h
8: /* ------------------------------------------------------------*/
9: /*
10: Version for when blocks are 2 by 2
11: */
14: int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
15: {
16: Mat C = *B;
17: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
18: IS isrow = b->row,isicol = b->icol;
19: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20: int *ajtmpold,*ajtmp,nz,row;
21: int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22: MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
23: MatScalar p1,p2,p3,p4;
24: MatScalar *ba = b->a,*aa = a->a;
27: ISGetIndices(isrow,&r);
28: ISGetIndices(isicol,&ic);
29: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
31: for (i=0; i<n; i++) {
32: nz = bi[i+1] - bi[i];
33: ajtmp = bj + bi[i];
34: for (j=0; j<nz; j++) {
35: x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
36: }
37: /* load in initial (unfactored row) */
38: idx = r[i];
39: nz = ai[idx+1] - ai[idx];
40: ajtmpold = aj + ai[idx];
41: v = aa + 4*ai[idx];
42: for (j=0; j<nz; j++) {
43: x = rtmp+4*ic[ajtmpold[j]];
44: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
45: v += 4;
46: }
47: row = *ajtmp++;
48: while (row < i) {
49: pc = rtmp + 4*row;
50: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
51: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
52: pv = ba + 4*diag_offset[row];
53: pj = bj + diag_offset[row] + 1;
54: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
55: pc[0] = m1 = p1*x1 + p3*x2;
56: pc[1] = m2 = p2*x1 + p4*x2;
57: pc[2] = m3 = p1*x3 + p3*x4;
58: pc[3] = m4 = p2*x3 + p4*x4;
59: nz = bi[row+1] - diag_offset[row] - 1;
60: pv += 4;
61: for (j=0; j<nz; j++) {
62: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
63: x = rtmp + 4*pj[j];
64: x[0] -= m1*x1 + m3*x2;
65: x[1] -= m2*x1 + m4*x2;
66: x[2] -= m1*x3 + m3*x4;
67: x[3] -= m2*x3 + m4*x4;
68: pv += 4;
69: }
70: PetscLogFlops(16*nz+12);
71: }
72: row = *ajtmp++;
73: }
74: /* finished row so stick it into b->a */
75: pv = ba + 4*bi[i];
76: pj = bj + bi[i];
77: nz = bi[i+1] - bi[i];
78: for (j=0; j<nz; j++) {
79: x = rtmp+4*pj[j];
80: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
81: pv += 4;
82: }
83: /* invert diagonal block */
84: w = ba + 4*diag_offset[i];
85: Kernel_A_gets_inverse_A_2(w);
86: }
88: PetscFree(rtmp);
89: ISRestoreIndices(isicol,&ic);
90: ISRestoreIndices(isrow,&r);
91: C->factor = FACTOR_LU;
92: C->assembled = PETSC_TRUE;
93: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
94: return(0);
95: }
96: /*
97: Version for when blocks are 2 by 2 Using natural ordering
98: */
101: int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
102: {
103: Mat C = *B;
104: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105: int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
106: int *ajtmpold,*ajtmp,nz,row;
107: int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
108: MatScalar *pv,*v,*rtmp,*pc,*w,*x;
109: MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
110: MatScalar *ba = b->a,*aa = a->a;
113: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
115: for (i=0; i<n; i++) {
116: nz = bi[i+1] - bi[i];
117: ajtmp = bj + bi[i];
118: for (j=0; j<nz; j++) {
119: x = rtmp+4*ajtmp[j];
120: x[0] = x[1] = x[2] = x[3] = 0.0;
121: }
122: /* load in initial (unfactored row) */
123: nz = ai[i+1] - ai[i];
124: ajtmpold = aj + ai[i];
125: v = aa + 4*ai[i];
126: for (j=0; j<nz; j++) {
127: x = rtmp+4*ajtmpold[j];
128: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
129: v += 4;
130: }
131: row = *ajtmp++;
132: while (row < i) {
133: pc = rtmp + 4*row;
134: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
135: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
136: pv = ba + 4*diag_offset[row];
137: pj = bj + diag_offset[row] + 1;
138: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
139: pc[0] = m1 = p1*x1 + p3*x2;
140: pc[1] = m2 = p2*x1 + p4*x2;
141: pc[2] = m3 = p1*x3 + p3*x4;
142: pc[3] = m4 = p2*x3 + p4*x4;
143: nz = bi[row+1] - diag_offset[row] - 1;
144: pv += 4;
145: for (j=0; j<nz; j++) {
146: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
147: x = rtmp + 4*pj[j];
148: x[0] -= m1*x1 + m3*x2;
149: x[1] -= m2*x1 + m4*x2;
150: x[2] -= m1*x3 + m3*x4;
151: x[3] -= m2*x3 + m4*x4;
152: pv += 4;
153: }
154: PetscLogFlops(16*nz+12);
155: }
156: row = *ajtmp++;
157: }
158: /* finished row so stick it into b->a */
159: pv = ba + 4*bi[i];
160: pj = bj + bi[i];
161: nz = bi[i+1] - bi[i];
162: for (j=0; j<nz; j++) {
163: x = rtmp+4*pj[j];
164: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
165: pv += 4;
166: }
167: /* invert diagonal block */
168: w = ba + 4*diag_offset[i];
169: Kernel_A_gets_inverse_A_2(w);
170: /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
171: }
173: PetscFree(rtmp);
174: C->factor = FACTOR_LU;
175: C->assembled = PETSC_TRUE;
176: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
177: return(0);
178: }
180: /* ----------------------------------------------------------- */
181: /*
182: Version for when blocks are 1 by 1.
183: */
186: int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
187: {
188: Mat C = *B;
189: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
190: IS isrow = b->row,isicol = b->icol;
191: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
192: int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
193: int *diag_offset = b->diag,diag,*pj;
194: MatScalar *pv,*v,*rtmp,multiplier,*pc;
195: MatScalar *ba = b->a,*aa = a->a;
198: ISGetIndices(isrow,&r);
199: ISGetIndices(isicol,&ic);
200: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
202: for (i=0; i<n; i++) {
203: nz = bi[i+1] - bi[i];
204: ajtmp = bj + bi[i];
205: for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
207: /* load in initial (unfactored row) */
208: nz = ai[r[i]+1] - ai[r[i]];
209: ajtmpold = aj + ai[r[i]];
210: v = aa + ai[r[i]];
211: for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
213: row = *ajtmp++;
214: while (row < i) {
215: pc = rtmp + row;
216: if (*pc != 0.0) {
217: pv = ba + diag_offset[row];
218: pj = bj + diag_offset[row] + 1;
219: multiplier = *pc * *pv++;
220: *pc = multiplier;
221: nz = bi[row+1] - diag_offset[row] - 1;
222: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
223: PetscLogFlops(1+2*nz);
224: }
225: row = *ajtmp++;
226: }
227: /* finished row so stick it into b->a */
228: pv = ba + bi[i];
229: pj = bj + bi[i];
230: nz = bi[i+1] - bi[i];
231: for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
232: diag = diag_offset[i] - bi[i];
233: /* check pivot entry for current row */
234: if (pv[diag] == 0.0) {
235: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
236: }
237: pv[diag] = 1.0/pv[diag];
238: }
240: PetscFree(rtmp);
241: ISRestoreIndices(isicol,&ic);
242: ISRestoreIndices(isrow,&r);
243: C->factor = FACTOR_LU;
244: C->assembled = PETSC_TRUE;
245: PetscLogFlops(C->n);
246: return(0);
247: }
250: /* ----------------------------------------------------------- */
253: int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
254: {
255: int ierr;
256: Mat C;
259: MatLUFactorSymbolic(A,row,col,info,&C);
260: MatLUFactorNumeric(A,&C);
261: MatHeaderCopy(A,C);
262: PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
263: return(0);
264: }