Actual source code: cholbs.c

  1: /*$Id: cholbs.c,v 1.64 2001/08/07 03:02:51 balay Exp $*/

 3:  #include petsc.h

  5: /* We must define MLOG for BlockSolve logging */
  6: #if defined(PETSC_USE_LOG)
  7: #define MLOG
  8: #endif

 10:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

 14: int MatCholeskyFactorNumeric_MPIRowbs(Mat mat,Mat *factp)
 15: {
 16:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
 18: #if defined(PETSC_USE_LOG)
 19:   PetscReal flop1 = BSlocal_flops();
 20: #endif


 24:   if (!mbs->blocksolveassembly) {
 25:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 26:   }
 27: 
 28:   /* Do prep work if same nonzero structure as previously factored matrix */
 29:   if (mbs->factor == FACTOR_CHOLESKY) {
 30:     /* Copy the nonzeros */
 31:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 32:   }
 33:   /* Form incomplete Cholesky factor */
 34:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 35:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 36:     CHKERRBS(0); mbs->failures++;
 37:     /* Copy only the nonzeros */
 38:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 39:     /* Increment the diagonal shift */
 40:     mbs->alpha += 0.1;
 41:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 42:     PetscLogInfo(mat,"MatCholeskyFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 43:                                  mbs->failures,mbs->ierr,mbs->alpha);
 44:   }
 45: #if defined(PETSC_USE_LOG)
 46:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 47: #endif

 49:   mbs->factor = FACTOR_CHOLESKY;
 50:   return(0);
 51: }

 55: int MatLUFactorNumeric_MPIRowbs(Mat mat,Mat *factp)
 56: {
 57:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;

 59: #if defined(PETSC_USE_LOG)
 60:   PetscReal flop1 = BSlocal_flops();
 61: #endif

 64:   /* Do prep work if same nonzero structure as previously factored matrix */
 65:   if (mbs->factor == FACTOR_LU) {
 66:     /* Copy the nonzeros */
 67:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 68:   }
 69:   /* Form incomplete Cholesky factor */
 70:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 71:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 72:     CHKERRBS(0); mbs->failures++;
 73:     /* Copy only the nonzeros */
 74:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 75:     /* Increment the diagonal shift */
 76:     mbs->alpha += 0.1;
 77:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 78:     PetscLogInfo(mat,"MatLUFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 79:                                        mbs->failures,mbs->ierr,mbs->alpha);
 80:   }
 81:   mbs->factor = FACTOR_LU;
 82:   (*factp)->assembled = PETSC_TRUE;
 83: #if defined(PETSC_USE_LOG)
 84:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 85: #endif
 86:   return(0);
 87: }
 88: /* ------------------------------------------------------------------- */
 91: int MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
 92: {
 93:   Mat          submat = (Mat) mat->data;
 94:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
 95:   int          ierr;
 96:   PetscScalar  *ya,*xa,*xworka;

 98: #if defined(PETSC_USE_LOG)
 99:   PetscReal flop1 = BSlocal_flops();
100: #endif

103:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
104:   if (!mbs->vecs_permscale) {
105:     VecGetArray(x,&xa);
106:     VecGetArray(mbs->xwork,&xworka);
107:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
108:     VecRestoreArray(x,&xa);
109:     VecRestoreArray(mbs->xwork,&xworka);
110:     VecPointwiseMult(mbs->diag,mbs->xwork,y);
111:   } else {
112:     VecCopy(x,y);
113:   }

115:   VecGetArray(y,&ya);
116:   if (mbs->procinfo->single) {
117:     /* Use BlockSolve routine for no cliques/inodes */
118:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
119:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
120:   } else {
121:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
122:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
123:   }
124:   VecRestoreArray(y,&ya);

126:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
127:   if (!mbs->vecs_permscale) {
128:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
129:     VecGetArray(y,&ya);
130:     VecGetArray(mbs->xwork,&xworka);
131:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
132:     VecRestoreArray(y,&ya);
133:     VecRestoreArray(mbs->xwork,&xworka);
134:   }
135: #if defined(PETSC_USE_LOG)
136:   PetscLogFlops((int)(BSlocal_flops()-flop1));
137: #endif
138:   return(0);
139: }

141: /* ------------------------------------------------------------------- */
144: int MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
145: {
146:   Mat          submat = (Mat) mat->data;
147:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
148:   int          ierr;
149:   PetscScalar  *ya,*xa,*xworka;

151: #if defined(PETSC_USE_LOG)
152:   PetscReal flop1 = BSlocal_flops();
153: #endif

156:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
157:   if (!mbs->vecs_permscale) {
158:     VecGetArray(x,&xa);
159:     VecGetArray(mbs->xwork,&xworka);
160:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
161:     VecRestoreArray(x,&xa);
162:     VecRestoreArray(mbs->xwork,&xworka);
163:     VecPointwiseMult(mbs->diag,mbs->xwork,y);
164:   } else {
165:     VecCopy(x,y);
166:   }

168:   VecGetArray(y,&ya);
169:   if (mbs->procinfo->single){
170:     /* Use BlockSolve routine for no cliques/inodes */
171:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
172:   } else {
173:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
174:   }
175:   VecRestoreArray(y,&ya);

177: #if defined(PETSC_USE_LOG)
178:   PetscLogFlops((int)(BSlocal_flops()-flop1));
179: #endif

181:   return(0);
182: }

184: /* ------------------------------------------------------------------- */
187: int MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
188: {
189:   Mat          submat = (Mat) mat->data;
190:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
191:   int          ierr;
192:   PetscScalar  *ya,*xworka;

194: #if defined (PETSC_USE_LOG)
195:   PetscReal flop1 = BSlocal_flops();
196: #endif

199:   VecCopy(x,y);

201:   VecGetArray(y,&ya);
202:   if (mbs->procinfo->single) {
203:     /* Use BlockSolve routine for no cliques/inodes */
204:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
205:   } else {
206:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
207:   }
208:   VecRestoreArray(y,&ya);

210:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
211:   if (!mbs->vecs_permscale) {
212:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
213:     VecGetArray(y,&ya);
214:     VecGetArray(mbs->xwork,&xworka);
215:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
216:     VecRestoreArray(y,&ya);
217:     VecRestoreArray(mbs->xwork,&xworka);
218:   }
219: #if defined (PETSC_USE_LOG)
220:   PetscLogFlops((int)(BSlocal_flops()-flop1));
221: #endif
222:   return(0);
223: }


226: /* 
227:     The logging variables required by BlockSolve, 

229:     This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
230:   of whether PETSc or BlockSolve is compiled with logging turned on. 

232:     It is bad because it relys on BlockSolve's internals not changing related to 
233:   logging but we have no choice, plus it is unlikely BlockSolve will be developed
234:   in the near future anyways.
235: */
236: double MLOG_flops;
237: double MLOG_event_flops;
238: double MLOG_time_stamp;
239: int    MLOG_sequence_num;
240: #if defined (MLOG_MAX_EVNTS) 
241: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
242: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
243: #else
244: typedef struct __MLOG_log_type {
245:         double        time_stamp;
246:         double        total_time;
247:         double  flops;
248:         int        event_num;
249: } MLOG_log_type;
250: #define        MLOG_MAX_EVNTS 1300
251: #define        MLOG_MAX_ACCUM 75
252: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
253: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
254: #endif