Actual source code: nn.c

  1: /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/

 3:  #include src/ksp/pc/impls/is/nn/nn.h

  5: /* -------------------------------------------------------------------------- */
  6: /*
  7:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  8:                     by setting data structures and options.   

 10:    Input Parameter:
 11: .  pc - the preconditioner context

 13:    Application Interface Routine: PCSetUp()

 15:    Notes:
 16:    The interface routine PCSetUp() is not usually called directly by
 17:    the user, but instead is called by PCApply() if necessary.
 18: */
 21: static int PCSetUp_NN(PC pc)
 22: {
 24: 
 26:   if (!pc->setupcalled) {
 27:     /* Set up all the "iterative substructuring" common block */
 28:     PCISSetUp(pc);
 29:     /* Create the coarse matrix. */
 30:     PCNNCreateCoarseMatrix(pc);
 31:   }
 32:   return(0);
 33: }

 35: /* -------------------------------------------------------------------------- */
 36: /*
 37:    PCApply_NN - Applies the NN preconditioner to a vector.

 39:    Input Parameters:
 40: .  pc - the preconditioner context
 41: .  r - input vector (global)

 43:    Output Parameter:
 44: .  z - output vector (global)

 46:    Application Interface Routine: PCApply()
 47:  */
 50: static int PCApply_NN(PC pc,Vec r,Vec z)
 51: {
 52:   PC_IS       *pcis = (PC_IS*)(pc->data);
 53:   int         ierr;
 54:   PetscScalar m_one = -1.0;
 55:   Vec         w = pcis->vec1_global;


 59:   /*
 60:     Dirichlet solvers.
 61:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 62:     Storing the local results at vec2_D
 63:   */
 64:   VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 65:   VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 66:   KSPSetRhs(pcis->ksp_D,pcis->vec1_D);
 67:   KSPSetSolution(pcis->ksp_D,pcis->vec2_D);
 68:   KSPSolve(pcis->ksp_D);
 69: 
 70:   /*
 71:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 72:     Storing the result in the interface portion of the global vector w.
 73:   */
 74:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 75:   VecScale(&m_one,pcis->vec1_B);
 76:   VecCopy(r,w);
 77:   VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
 78:   VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);

 80:   /*
 81:     Apply the interface preconditioner
 82:   */
 83:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 84:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 86:   /*
 87:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 88:     The result is stored in vec1_D.
 89:   */
 90:   VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 91:   VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 92:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 94:   /*
 95:     Dirichlet solvers.
 96:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 97:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 98:   */
 99:   VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
100:   VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
101:   KSPSetRhs(pcis->ksp_D,pcis->vec1_D);
102:   KSPSetSolution(pcis->ksp_D,pcis->vec2_D);
103:   KSPSolve(pcis->ksp_D);
104:   VecScale(&m_one,pcis->vec2_D);
105:   VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
106:   VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);

108:   return(0);
109: }

111: /* -------------------------------------------------------------------------- */
112: /*
113:    PCDestroy_NN - Destroys the private context for the NN preconditioner
114:    that was created with PCCreate_NN().

116:    Input Parameter:
117: .  pc - the preconditioner context

119:    Application Interface Routine: PCDestroy()
120: */
123: static int PCDestroy_NN(PC pc)
124: {
125:   PC_NN *pcnn = (PC_NN*)pc->data;
126:   int   ierr;


130:   PCISDestroy(pc);

132:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
133:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
134:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
135:   if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
136:   if (pcnn->DZ_IN) {
137:     if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
138:     PetscFree(pcnn->DZ_IN);
139:   }

141:   /*
142:       Free the private data structure that was hanging off the PC
143:   */
144:   PetscFree(pcnn);
145:   return(0);
146: }

148: /* -------------------------------------------------------------------------- */
149: /*MC
150:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

152:    Options Database Keys:
153: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
154:                                        (this skips the first coarse grid solve in the preconditioner)
155: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
156:                                        (this skips the second coarse grid solve in the preconditioner)
157: .    -pc_is_damp_fixed <fact> -
158: .    -pc_is_remove_nullspace_fixed -
159: .    -pc_is_set_damping_factor_floating <fact> -
160: .    -pc_is_not_damp_floating -
161: +    -pc_is_not_remove_nullspace_floating - 

163:    Level: intermediate

165:    Notes: The matrix used with this preconditioner must be of type MATIS 

167:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
168:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
169:           on the subdomains; though in our experience using approximate solvers is slower.).

171:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
172:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
173:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

175:    Contributed by Paulo Goldfeld

177: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
178: M*/
179: EXTERN_C_BEGIN
182: int PCCreate_NN(PC pc)
183: {
184:   int   ierr;
185:   PC_NN *pcnn;


189:   /*
190:      Creates the private data structure for this preconditioner and
191:      attach it to the PC object.
192:   */
193:   PetscNew(PC_NN,&pcnn);
194:   pc->data  = (void*)pcnn;

196:   /*
197:      Logs the memory usage; this is not needed but allows PETSc to 
198:      monitor how much memory is being used for various purposes.
199:   */
200:   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */

202:   PCISCreate(pc);
203:   pcnn->coarse_mat  = 0;
204:   pcnn->coarse_x    = 0;
205:   pcnn->coarse_b    = 0;
206:   pcnn->ksp_coarse = 0;
207:   pcnn->DZ_IN       = 0;

209:   /*
210:       Set the pointers for the functions that are provided above.
211:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
212:       are called, they will automatically call these functions.  Note we
213:       choose not to provide a couple of these functions since they are
214:       not needed.
215:   */
216:   pc->ops->apply               = PCApply_NN;
217:   pc->ops->applytranspose      = 0;
218:   pc->ops->setup               = PCSetUp_NN;
219:   pc->ops->destroy             = PCDestroy_NN;
220:   pc->ops->view                = 0;
221:   pc->ops->applyrichardson     = 0;
222:   pc->ops->applysymmetricleft  = 0;
223:   pc->ops->applysymmetricright = 0;

225:   return(0);
226: }
227: EXTERN_C_END


230: /* -------------------------------------------------------------------------- */
231: /*
232:    PCNNCreateCoarseMatrix - 
233: */
236: int PCNNCreateCoarseMatrix (PC pc)
237: {
238:   MPI_Request    *send_request, *recv_request;
239:   int            i, j, k, ierr;

241:   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
242:   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

244:   /* aliasing some names */
245:   PC_IS*         pcis     = (PC_IS*)(pc->data);
246:   PC_NN*         pcnn     = (PC_NN*)pc->data;
247:   int            n_neigh  = pcis->n_neigh;
248:   int*           neigh    = pcis->neigh;
249:   int*           n_shared = pcis->n_shared;
250:   int**          shared   = pcis->shared;
251:   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */


255:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
256:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);

258:   /* Allocate memory for DZ */
259:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
260:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
261:   {
262:     int size_of_Z = 0;
263:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
264:     DZ_IN = pcnn->DZ_IN;
265:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
266:     for (i=0; i<n_neigh; i++) {
267:       size_of_Z += n_shared[i];
268:     }
269:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
270:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
271:   }
272:   for (i=1; i<n_neigh; i++) {
273:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
274:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
275:   }

277:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
278:   /* First, set the auxiliary array pcis->work_N. */
279:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
280:   for (i=1; i<n_neigh; i++){
281:     for (j=0; j<n_shared[i]; j++) {
282:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
283:     }
284:   }

286:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
287:   /* Notice that send_request[] and recv_request[] could have one less element. */
288:   /* We make them longer to have request[i] corresponding to neigh[i].          */
289:   {
290:     int tag;
291:     PetscObjectGetNewTag((PetscObject)pc,&tag);
292:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
293:     recv_request = send_request + (n_neigh);
294:     for (i=1; i<n_neigh; i++) {
295:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
296:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
297:     }
298:   }

300:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
301:   for(j=0; j<n_shared[0]; j++) {
302:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
303:   }

305:   /* Start computing with local D*Z while communication goes on.    */
306:   /* Apply Schur complement. The result is "stored" in vec (more    */
307:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
308:   /* and also scattered to pcnn->work_N.                            */
309:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
310:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

312:   /* Compute the first column, while completing the receiving. */
313:   for (i=0; i<n_neigh; i++) {
314:     MPI_Status stat;
315:     int ind=0;
316:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
317:     mat[ind*n_neigh+0] = 0.0;
318:     for (k=0; k<n_shared[ind]; k++) {
319:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
320:     }
321:   }

323:   /* Compute the remaining of the columns */
324:   for (j=1; j<n_neigh; j++) {
325:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
326:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
327:     for (i=0; i<n_neigh; i++) {
328:       mat[i*n_neigh+j] = 0.0;
329:       for (k=0; k<n_shared[i]; k++) {
330:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
331:       }
332:     }
333:   }

335:   /* Complete the sending. */
336:   if (n_neigh>1) {
337:     MPI_Status *stat;
338:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
339:     MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
340:     PetscFree(stat);
341:   }

343:   /* Free the memory for the MPI requests */
344:   PetscFree(send_request);

346:   /* Free the memory for DZ_OUT */
347:   if (DZ_OUT) {
348:     if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
349:     PetscFree(DZ_OUT);
350:   }

352:   {
353:     int size,n_neigh_m1;
354:     MPI_Comm_size(pc->comm,&size);
355:     n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
356:     /* Create the global coarse vectors (rhs and solution). */
357:     VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
358:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
359:     /* Create and set the global coarse AIJ matrix. */
360:     MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));
361:     MatSetType(pcnn->coarse_mat,MATAIJ);
362:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
363:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
364:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
365:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
366:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
367:   }

369:   {
370:     int         rank;
371:     PetscScalar one = 1.0;
372:     IS          is;
373:     MPI_Comm_rank(pc->comm,&rank);
374:     /* "Zero out" rows of not-purely-Neumann subdomains */
375:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
376:       ISCreateStride(pc->comm,0,0,0,&is);
377:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
378:       ISCreateStride(pc->comm,1,rank,0,&is);
379:     }
380:     MatZeroRows(pcnn->coarse_mat,is,&one);
381:     ISDestroy(is);
382:   }

384:   /* Create the coarse linear solver context */
385:   {
386:     PC  pc_ctx, inner_pc;
387:     KSPCreate(pc->comm,&pcnn->ksp_coarse);
388:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
389:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
390:     PCSetType(pc_ctx,PCREDUNDANT);
391:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
392:     PCRedundantGetPC(pc_ctx,&inner_pc);
393:     PCSetType(inner_pc,PCLU);
394:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
395:     KSPSetFromOptions(pcnn->ksp_coarse);
396:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
397:     KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);
398:     KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);
399:     KSPSetUp(pcnn->ksp_coarse);
400:   }

402:   /* Free the memory for mat */
403:   PetscFree(mat);

405:   /* for DEBUGGING, save the coarse matrix to a file. */
406:   {
407:     PetscTruth flg;
408:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
409:     if (flg) {
410:       PetscViewer viewer;
411:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
412:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
413:       MatView(pcnn->coarse_mat,viewer);
414:       PetscViewerDestroy(viewer);
415:     }
416:   }

418:   /*  Set the variable pcnn->factor_coarse_rhs. */
419:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

421:   /* See historical note 02, at the bottom of this file. */

423:   return(0);
424: }

426: /* -------------------------------------------------------------------------- */
427: /*
428:    PCNNApplySchurToChunk - 

430:    Input parameters:
431: .  pcnn
432: .  n - size of chunk
433: .  idx - indices of chunk
434: .  chunk - values

436:    Output parameters:
437: .  array_N - result of Schur complement applied to chunk, scattered to big array
438: .  vec1_B  - result of Schur complement applied to chunk
439: .  vec2_B  - garbage (used as work space)
440: .  vec1_D  - garbage (used as work space)
441: .  vec2_D  - garbage (used as work space)

443: */
446: int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
447: {
448:   int   i, ierr;
449:   PC_IS *pcis = (PC_IS*)(pc->data);


453:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
454:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
455:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
456:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
457:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);

459:   return(0);
460: }

462: /* -------------------------------------------------------------------------- */
463: /*
464:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
465:                                       the preconditioner for the Schur complement.

467:    Input parameter:
468: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

470:    Output parameters:
471: .  z - global vector of interior and interface nodes. The values on the interface are the result of
472:        the application of the interface preconditioner to the interface part of r. The values on the
473:        interior nodes are garbage.
474: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
475: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
476: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
477: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
478: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
479: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
480: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
481: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

483: */
486: int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
487:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
488: {
489:   int    ierr;
490:   PC_IS* pcis = (PC_IS*)(pc->data);


494:   /*
495:     First balancing step.
496:   */
497:   {
498:     PetscTruth flg;
499:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
500:     if (!flg) {
501:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
502:     } else {
503:       VecCopy(r,z);
504:     }
505:   }

507:   /*
508:     Extract the local interface part of z and scale it by D 
509:   */
510:   VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
511:   VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
512:   VecPointwiseMult(pcis->D,vec1_B,vec2_B);

514:   /* Neumann Solver */
515:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

517:   /*
518:     Second balancing step.
519:   */
520:   {
521:     PetscTruth flg;
522:     PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
523:     if (!flg) {
524:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
525:     } else {
526:       PetscScalar zero = 0.0;
527:       VecPointwiseMult(pcis->D,vec1_B,vec2_B);
528:       VecSet(&zero,z);
529:       VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
530:       VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
531:     }
532:   }

534:   return(0);
535: }

537: /* -------------------------------------------------------------------------- */
538: /*
539:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
540:                    input argument u is provided), or s, as given in equations
541:                    (12) and (13), if the input argument u is a null vector.
542:                    Notice that the input argument u plays the role of u_i in
543:                    equation (14). The equation numbers refer to [Man93].

545:    Input Parameters:
546: .  pcnn - NN preconditioner context.
547: .  r - MPI vector of all nodes (interior and interface). It's preserved.
548: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

550:    Output Parameters:
551: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
552: .  vec1_B - Sequential vector of local interface nodes. Workspace.
553: .  vec2_B - Sequential vector of local interface nodes. Workspace.
554: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
555: .  vec1_D - Sequential vector of local interior nodes. Workspace.
556: .  vec2_D - Sequential vector of local interior nodes. Workspace.
557: .  work_N - Array of all local nodes (interior and interface). Workspace.

559: */
562: int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
563:                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
564: {
565:   int            k, ierr;
566:   PetscScalar    zero     =  0.0;
567:   PetscScalar    m_one    = -1.0;
568:   PetscScalar    value;
569:   PetscScalar*   lambda;
570:   PC_NN*         pcnn     = (PC_NN*)(pc->data);
571:   PC_IS*         pcis     = (PC_IS*)(pc->data);

574:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);

576:   if (u) {
577:     if (!vec3_B) { vec3_B = u; }
578:     VecPointwiseMult(pcis->D,u,vec1_B);
579:     VecSet(&zero,z);
580:     VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
581:     VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
582:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
583:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
584:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
585:     VecScale(&m_one,vec3_B);
586:     VecCopy(r,z);
587:     VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
588:     VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
589:   } else {
590:     VecCopy(r,z);
591:   }
592:   VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
593:   VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
594:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
595:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
596:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
597:   {
598:     int rank;
599:     MPI_Comm_rank(pc->comm,&rank);
600:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
601:     /*
602:        Since we are only inserting local values (one value actually) we don't need to do the 
603:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
604:        VecAssemblyBegin(pcnn->coarse_b); 
605:        VecAssemblyEnd  (pcnn->coarse_b);
606:     */
607:   }
608:   KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);
609:   KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);
610:   KSPSolve(pcnn->ksp_coarse);
611:   if (!u) { VecScale(&m_one,pcnn->coarse_x); }
612:   VecGetArray(pcnn->coarse_x,&lambda);
613:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
614:   VecRestoreArray(pcnn->coarse_x,&lambda);
615:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
616:   VecSet(&zero,z);
617:   VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
618:   VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
619:   if (!u) {
620:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
621:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
622:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
623:     VecCopy(r,z);
624:   }
625:   VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
626:   VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
627:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);

629:   return(0);
630: }




636: /*  -------   E N D   O F   T H E   C O D E   -------  */
637: /*                                                     */
638: /*  From now on, "footnotes" (or "historical notes").  */
639: /*                                                     */
640: /*  -------------------------------------------------  */


643: #ifdef __HISTORICAL_NOTES___do_not_compile__

645: /* --------------------------------------------------------------------------
646:    Historical note 01 
647:    -------------------------------------------------------------------------- */
648: /*
649:    We considered the possibility of an alternative D_i that would still
650:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
651:    The basic principle was still the pseudo-inverse of the counting
652:    function; the difference was that we would not count subdomains
653:    that do not contribute to the coarse space (i.e., not pure-Neumann
654:    subdomains).

656:    This turned out to be a bad idea:  we would solve trivial Neumann
657:    problems in the not pure-Neumann subdomains, since we would be scaling
658:    the balanced residual by zero.
659: */

661:     {
662:       PetscTruth flg;
663:       PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);
664:       if (flg) {
665:         Vec    counter;
666:         PetscScalar one=1.0, zero=0.0;
667:         VecDuplicate(pc->vec,&counter);
668:         VecSet(&zero,counter);
669:         if (pcnn->pure_neumann) {
670:           VecSet(&one,pcnn->D);
671:         } else {
672:           VecSet(&zero,pcnn->D);
673:         }
674:         VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
675:         VecScatterEnd  (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
676:         VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
677:         VecScatterEnd  (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
678:         VecDestroy(counter);
679:         if (pcnn->pure_neumann) {
680:           VecReciprocal(pcnn->D);
681:         } else {
682:           VecSet(&zero,pcnn->D);
683:         }
684:       }
685:     }



689: /* --------------------------------------------------------------------------
690:    Historical note 02 
691:    -------------------------------------------------------------------------- */
692: /*
693:    We tried an alternative coarse problem, that would eliminate exactly a
694:    constant error. Turned out not to improve the overall convergence.
695: */

697:   /*  Set the variable pcnn->factor_coarse_rhs. */
698:   {
699:     PetscTruth flg;
700:     PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);
701:     if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
702:     else {
703:       PetscScalar zero = 0.0, one = 1.0;
704:       VecSet(&one,pcnn->vec1_B);
705:       ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);
706:       VecSet(&zero,pcnn->vec1_global);
707:       VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
708:       VecScatterEnd  (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
709:       VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
710:       VecScatterEnd  (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
711:       if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
712:       else {
713:         ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);
714:         for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
715:           pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
716:         }
717:         if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
718:         else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); }
719:       }
720:     }
721:   }

723: #endif /* __HISTORICAL_NOTES___do_not_compile */