Actual source code: nn.c
1: /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/
3: #include src/ksp/pc/impls/is/nn/nn.h
5: /* -------------------------------------------------------------------------- */
6: /*
7: PCSetUp_NN - Prepares for the use of the NN preconditioner
8: by setting data structures and options.
10: Input Parameter:
11: . pc - the preconditioner context
13: Application Interface Routine: PCSetUp()
15: Notes:
16: The interface routine PCSetUp() is not usually called directly by
17: the user, but instead is called by PCApply() if necessary.
18: */
21: static int PCSetUp_NN(PC pc)
22: {
24:
26: if (!pc->setupcalled) {
27: /* Set up all the "iterative substructuring" common block */
28: PCISSetUp(pc);
29: /* Create the coarse matrix. */
30: PCNNCreateCoarseMatrix(pc);
31: }
32: return(0);
33: }
35: /* -------------------------------------------------------------------------- */
36: /*
37: PCApply_NN - Applies the NN preconditioner to a vector.
39: Input Parameters:
40: . pc - the preconditioner context
41: . r - input vector (global)
43: Output Parameter:
44: . z - output vector (global)
46: Application Interface Routine: PCApply()
47: */
50: static int PCApply_NN(PC pc,Vec r,Vec z)
51: {
52: PC_IS *pcis = (PC_IS*)(pc->data);
53: int ierr;
54: PetscScalar m_one = -1.0;
55: Vec w = pcis->vec1_global;
59: /*
60: Dirichlet solvers.
61: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
62: Storing the local results at vec2_D
63: */
64: VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
65: VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
66: KSPSetRhs(pcis->ksp_D,pcis->vec1_D);
67: KSPSetSolution(pcis->ksp_D,pcis->vec2_D);
68: KSPSolve(pcis->ksp_D);
69:
70: /*
71: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
72: Storing the result in the interface portion of the global vector w.
73: */
74: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
75: VecScale(&m_one,pcis->vec1_B);
76: VecCopy(r,w);
77: VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
78: VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
80: /*
81: Apply the interface preconditioner
82: */
83: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
84: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
86: /*
87: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
88: The result is stored in vec1_D.
89: */
90: VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
91: VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
92: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
94: /*
95: Dirichlet solvers.
96: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
97: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
98: */
99: VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
100: VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
101: KSPSetRhs(pcis->ksp_D,pcis->vec1_D);
102: KSPSetSolution(pcis->ksp_D,pcis->vec2_D);
103: KSPSolve(pcis->ksp_D);
104: VecScale(&m_one,pcis->vec2_D);
105: VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
106: VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
108: return(0);
109: }
111: /* -------------------------------------------------------------------------- */
112: /*
113: PCDestroy_NN - Destroys the private context for the NN preconditioner
114: that was created with PCCreate_NN().
116: Input Parameter:
117: . pc - the preconditioner context
119: Application Interface Routine: PCDestroy()
120: */
123: static int PCDestroy_NN(PC pc)
124: {
125: PC_NN *pcnn = (PC_NN*)pc->data;
126: int ierr;
130: PCISDestroy(pc);
132: if (pcnn->coarse_mat) {MatDestroy(pcnn->coarse_mat);}
133: if (pcnn->coarse_x) {VecDestroy(pcnn->coarse_x);}
134: if (pcnn->coarse_b) {VecDestroy(pcnn->coarse_b);}
135: if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
136: if (pcnn->DZ_IN) {
137: if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
138: PetscFree(pcnn->DZ_IN);
139: }
141: /*
142: Free the private data structure that was hanging off the PC
143: */
144: PetscFree(pcnn);
145: return(0);
146: }
148: /* -------------------------------------------------------------------------- */
149: /*MC
150: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
152: Options Database Keys:
153: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
154: (this skips the first coarse grid solve in the preconditioner)
155: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
156: (this skips the second coarse grid solve in the preconditioner)
157: . -pc_is_damp_fixed <fact> -
158: . -pc_is_remove_nullspace_fixed -
159: . -pc_is_set_damping_factor_floating <fact> -
160: . -pc_is_not_damp_floating -
161: + -pc_is_not_remove_nullspace_floating -
163: Level: intermediate
165: Notes: The matrix used with this preconditioner must be of type MATIS
167: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
168: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
169: on the subdomains; though in our experience using approximate solvers is slower.).
171: Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
172: Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
173: Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
175: Contributed by Paulo Goldfeld
177: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MatIS
178: M*/
179: EXTERN_C_BEGIN
182: int PCCreate_NN(PC pc)
183: {
184: int ierr;
185: PC_NN *pcnn;
189: /*
190: Creates the private data structure for this preconditioner and
191: attach it to the PC object.
192: */
193: PetscNew(PC_NN,&pcnn);
194: pc->data = (void*)pcnn;
196: /*
197: Logs the memory usage; this is not needed but allows PETSc to
198: monitor how much memory is being used for various purposes.
199: */
200: PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */
202: PCISCreate(pc);
203: pcnn->coarse_mat = 0;
204: pcnn->coarse_x = 0;
205: pcnn->coarse_b = 0;
206: pcnn->ksp_coarse = 0;
207: pcnn->DZ_IN = 0;
209: /*
210: Set the pointers for the functions that are provided above.
211: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
212: are called, they will automatically call these functions. Note we
213: choose not to provide a couple of these functions since they are
214: not needed.
215: */
216: pc->ops->apply = PCApply_NN;
217: pc->ops->applytranspose = 0;
218: pc->ops->setup = PCSetUp_NN;
219: pc->ops->destroy = PCDestroy_NN;
220: pc->ops->view = 0;
221: pc->ops->applyrichardson = 0;
222: pc->ops->applysymmetricleft = 0;
223: pc->ops->applysymmetricright = 0;
225: return(0);
226: }
227: EXTERN_C_END
230: /* -------------------------------------------------------------------------- */
231: /*
232: PCNNCreateCoarseMatrix -
233: */
236: int PCNNCreateCoarseMatrix (PC pc)
237: {
238: MPI_Request *send_request, *recv_request;
239: int i, j, k, ierr;
241: PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
242: PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
244: /* aliasing some names */
245: PC_IS* pcis = (PC_IS*)(pc->data);
246: PC_NN* pcnn = (PC_NN*)pc->data;
247: int n_neigh = pcis->n_neigh;
248: int* neigh = pcis->neigh;
249: int* n_shared = pcis->n_shared;
250: int** shared = pcis->shared;
251: PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */
255: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
256: PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);
258: /* Allocate memory for DZ */
259: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
260: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
261: {
262: int size_of_Z = 0;
263: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
264: DZ_IN = pcnn->DZ_IN;
265: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
266: for (i=0; i<n_neigh; i++) {
267: size_of_Z += n_shared[i];
268: }
269: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
270: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
271: }
272: for (i=1; i<n_neigh; i++) {
273: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
274: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
275: }
277: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
278: /* First, set the auxiliary array pcis->work_N. */
279: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
280: for (i=1; i<n_neigh; i++){
281: for (j=0; j<n_shared[i]; j++) {
282: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
283: }
284: }
286: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
287: /* Notice that send_request[] and recv_request[] could have one less element. */
288: /* We make them longer to have request[i] corresponding to neigh[i]. */
289: {
290: int tag;
291: PetscObjectGetNewTag((PetscObject)pc,&tag);
292: PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
293: recv_request = send_request + (n_neigh);
294: for (i=1; i<n_neigh; i++) {
295: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
296: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
297: }
298: }
300: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
301: for(j=0; j<n_shared[0]; j++) {
302: DZ_IN[0][j] = pcis->work_N[shared[0][j]];
303: }
305: /* Start computing with local D*Z while communication goes on. */
306: /* Apply Schur complement. The result is "stored" in vec (more */
307: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
308: /* and also scattered to pcnn->work_N. */
309: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
310: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
312: /* Compute the first column, while completing the receiving. */
313: for (i=0; i<n_neigh; i++) {
314: MPI_Status stat;
315: int ind=0;
316: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
317: mat[ind*n_neigh+0] = 0.0;
318: for (k=0; k<n_shared[ind]; k++) {
319: mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
320: }
321: }
323: /* Compute the remaining of the columns */
324: for (j=1; j<n_neigh; j++) {
325: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
326: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
327: for (i=0; i<n_neigh; i++) {
328: mat[i*n_neigh+j] = 0.0;
329: for (k=0; k<n_shared[i]; k++) {
330: mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
331: }
332: }
333: }
335: /* Complete the sending. */
336: if (n_neigh>1) {
337: MPI_Status *stat;
338: PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
339: MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
340: PetscFree(stat);
341: }
343: /* Free the memory for the MPI requests */
344: PetscFree(send_request);
346: /* Free the memory for DZ_OUT */
347: if (DZ_OUT) {
348: if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
349: PetscFree(DZ_OUT);
350: }
352: {
353: int size,n_neigh_m1;
354: MPI_Comm_size(pc->comm,&size);
355: n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
356: /* Create the global coarse vectors (rhs and solution). */
357: VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
358: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
359: /* Create and set the global coarse AIJ matrix. */
360: MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));
361: MatSetType(pcnn->coarse_mat,MATAIJ);
362: MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
363: MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
364: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
365: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
366: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
367: }
369: {
370: int rank;
371: PetscScalar one = 1.0;
372: IS is;
373: MPI_Comm_rank(pc->comm,&rank);
374: /* "Zero out" rows of not-purely-Neumann subdomains */
375: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
376: ISCreateStride(pc->comm,0,0,0,&is);
377: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
378: ISCreateStride(pc->comm,1,rank,0,&is);
379: }
380: MatZeroRows(pcnn->coarse_mat,is,&one);
381: ISDestroy(is);
382: }
384: /* Create the coarse linear solver context */
385: {
386: PC pc_ctx, inner_pc;
387: KSPCreate(pc->comm,&pcnn->ksp_coarse);
388: KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
389: KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
390: PCSetType(pc_ctx,PCREDUNDANT);
391: KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
392: PCRedundantGetPC(pc_ctx,&inner_pc);
393: PCSetType(inner_pc,PCLU);
394: KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
395: KSPSetFromOptions(pcnn->ksp_coarse);
396: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
397: KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);
398: KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);
399: KSPSetUp(pcnn->ksp_coarse);
400: }
402: /* Free the memory for mat */
403: PetscFree(mat);
405: /* for DEBUGGING, save the coarse matrix to a file. */
406: {
407: PetscTruth flg;
408: PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
409: if (flg) {
410: PetscViewer viewer;
411: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
412: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
413: MatView(pcnn->coarse_mat,viewer);
414: PetscViewerDestroy(viewer);
415: }
416: }
418: /* Set the variable pcnn->factor_coarse_rhs. */
419: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
421: /* See historical note 02, at the bottom of this file. */
423: return(0);
424: }
426: /* -------------------------------------------------------------------------- */
427: /*
428: PCNNApplySchurToChunk -
430: Input parameters:
431: . pcnn
432: . n - size of chunk
433: . idx - indices of chunk
434: . chunk - values
436: Output parameters:
437: . array_N - result of Schur complement applied to chunk, scattered to big array
438: . vec1_B - result of Schur complement applied to chunk
439: . vec2_B - garbage (used as work space)
440: . vec1_D - garbage (used as work space)
441: . vec2_D - garbage (used as work space)
443: */
446: int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
447: {
448: int i, ierr;
449: PC_IS *pcis = (PC_IS*)(pc->data);
453: PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
454: for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
455: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
456: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
457: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
459: return(0);
460: }
462: /* -------------------------------------------------------------------------- */
463: /*
464: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
465: the preconditioner for the Schur complement.
467: Input parameter:
468: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
470: Output parameters:
471: . z - global vector of interior and interface nodes. The values on the interface are the result of
472: the application of the interface preconditioner to the interface part of r. The values on the
473: interior nodes are garbage.
474: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
475: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
476: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
477: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
478: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
479: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
480: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
481: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
483: */
486: int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
487: Vec vec2_D, Vec vec1_N, Vec vec2_N)
488: {
489: int ierr;
490: PC_IS* pcis = (PC_IS*)(pc->data);
494: /*
495: First balancing step.
496: */
497: {
498: PetscTruth flg;
499: PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
500: if (!flg) {
501: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
502: } else {
503: VecCopy(r,z);
504: }
505: }
507: /*
508: Extract the local interface part of z and scale it by D
509: */
510: VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
511: VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
512: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
514: /* Neumann Solver */
515: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
517: /*
518: Second balancing step.
519: */
520: {
521: PetscTruth flg;
522: PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
523: if (!flg) {
524: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
525: } else {
526: PetscScalar zero = 0.0;
527: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
528: VecSet(&zero,z);
529: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
530: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
531: }
532: }
534: return(0);
535: }
537: /* -------------------------------------------------------------------------- */
538: /*
539: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
540: input argument u is provided), or s, as given in equations
541: (12) and (13), if the input argument u is a null vector.
542: Notice that the input argument u plays the role of u_i in
543: equation (14). The equation numbers refer to [Man93].
545: Input Parameters:
546: . pcnn - NN preconditioner context.
547: . r - MPI vector of all nodes (interior and interface). It's preserved.
548: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
550: Output Parameters:
551: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
552: . vec1_B - Sequential vector of local interface nodes. Workspace.
553: . vec2_B - Sequential vector of local interface nodes. Workspace.
554: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
555: . vec1_D - Sequential vector of local interior nodes. Workspace.
556: . vec2_D - Sequential vector of local interior nodes. Workspace.
557: . work_N - Array of all local nodes (interior and interface). Workspace.
559: */
562: int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
563: Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
564: {
565: int k, ierr;
566: PetscScalar zero = 0.0;
567: PetscScalar m_one = -1.0;
568: PetscScalar value;
569: PetscScalar* lambda;
570: PC_NN* pcnn = (PC_NN*)(pc->data);
571: PC_IS* pcis = (PC_IS*)(pc->data);
574: PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
576: if (u) {
577: if (!vec3_B) { vec3_B = u; }
578: VecPointwiseMult(pcis->D,u,vec1_B);
579: VecSet(&zero,z);
580: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
581: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
582: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
583: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
584: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
585: VecScale(&m_one,vec3_B);
586: VecCopy(r,z);
587: VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
588: VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
589: } else {
590: VecCopy(r,z);
591: }
592: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
593: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
594: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
595: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
596: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
597: {
598: int rank;
599: MPI_Comm_rank(pc->comm,&rank);
600: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
601: /*
602: Since we are only inserting local values (one value actually) we don't need to do the
603: reduction that tells us there is no data that needs to be moved. Hence we comment out these
604: VecAssemblyBegin(pcnn->coarse_b);
605: VecAssemblyEnd (pcnn->coarse_b);
606: */
607: }
608: KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);
609: KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);
610: KSPSolve(pcnn->ksp_coarse);
611: if (!u) { VecScale(&m_one,pcnn->coarse_x); }
612: VecGetArray(pcnn->coarse_x,&lambda);
613: for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
614: VecRestoreArray(pcnn->coarse_x,&lambda);
615: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
616: VecSet(&zero,z);
617: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
618: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
619: if (!u) {
620: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
621: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
622: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
623: VecCopy(r,z);
624: }
625: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
626: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
627: PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
629: return(0);
630: }
636: /* ------- E N D O F T H E C O D E ------- */
637: /* */
638: /* From now on, "footnotes" (or "historical notes"). */
639: /* */
640: /* ------------------------------------------------- */
643: #ifdef __HISTORICAL_NOTES___do_not_compile__
645: /* --------------------------------------------------------------------------
646: Historical note 01
647: -------------------------------------------------------------------------- */
648: /*
649: We considered the possibility of an alternative D_i that would still
650: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
651: The basic principle was still the pseudo-inverse of the counting
652: function; the difference was that we would not count subdomains
653: that do not contribute to the coarse space (i.e., not pure-Neumann
654: subdomains).
656: This turned out to be a bad idea: we would solve trivial Neumann
657: problems in the not pure-Neumann subdomains, since we would be scaling
658: the balanced residual by zero.
659: */
661: {
662: PetscTruth flg;
663: PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);
664: if (flg) {
665: Vec counter;
666: PetscScalar one=1.0, zero=0.0;
667: VecDuplicate(pc->vec,&counter);
668: VecSet(&zero,counter);
669: if (pcnn->pure_neumann) {
670: VecSet(&one,pcnn->D);
671: } else {
672: VecSet(&zero,pcnn->D);
673: }
674: VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
675: VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
676: VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
677: VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
678: VecDestroy(counter);
679: if (pcnn->pure_neumann) {
680: VecReciprocal(pcnn->D);
681: } else {
682: VecSet(&zero,pcnn->D);
683: }
684: }
685: }
689: /* --------------------------------------------------------------------------
690: Historical note 02
691: -------------------------------------------------------------------------- */
692: /*
693: We tried an alternative coarse problem, that would eliminate exactly a
694: constant error. Turned out not to improve the overall convergence.
695: */
697: /* Set the variable pcnn->factor_coarse_rhs. */
698: {
699: PetscTruth flg;
700: PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);
701: if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
702: else {
703: PetscScalar zero = 0.0, one = 1.0;
704: VecSet(&one,pcnn->vec1_B);
705: ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);
706: VecSet(&zero,pcnn->vec1_global);
707: VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
708: VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
709: VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
710: VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
711: if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
712: else {
713: ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);
714: for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
715: pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
716: }
717: if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
718: else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); }
719: }
720: }
721: }
723: #endif /* __HISTORICAL_NOTES___do_not_compile */