Actual source code: vpscat.c
1: /*$Id: vpscat.c,v 1.165 2001/09/08 13:16:48 bsmith Exp $*/
2: /*
3: Defines parallel vector scatters.
4: */
6: #include src/vec/is/isimpl.h
7: #include vecimpl.h
8: #include src/vec/impls/dvecimpl.h
9: #include src/vec/impls/mpi/pvecimpl.h
10: #include petscsys.h
14: int VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
15: {
16: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
17: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
18: int i,rank,ierr;
19: PetscViewerFormat format;
20: PetscTruth isascii;
23: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
24: if (isascii) {
25: MPI_Comm_rank(ctx->comm,&rank);
26: PetscViewerGetFormat(viewer,&format);
27: if (format == PETSC_VIEWER_ASCII_INFO) {
28: int nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
30: MPI_Reduce(&to->n,&nsend_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
31: MPI_Reduce(&from->n,&nrecv_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
32: itmp = to->starts[to->n+1];
33: MPI_Reduce(&itmp,&lensend_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
34: itmp = from->starts[from->n+1];
35: MPI_Reduce(&itmp,&lenrecv_max,1,MPI_INT,MPI_MAX,0,ctx->comm);
36: MPI_Reduce(&itmp,&alldata,1,MPI_INT,MPI_SUM,0,ctx->comm);
38: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
39: PetscViewerASCIIPrintf(viewer," Maximum number sends %d\n",nsend_max);
40: PetscViewerASCIIPrintf(viewer," Maximum number receives %d\n",nrecv_max);
41: PetscViewerASCIIPrintf(viewer," Maximum data sent %d\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Maximum data received %d\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
43: PetscViewerASCIIPrintf(viewer," Total data sent %d\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
45: } else {
46: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %d; Number to self = %d\n",rank,to->n,to->local.n);
47: if (to->n) {
48: for (i=0; i<to->n; i++){
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d length = %d to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
50: }
51: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
52: for (i=0; i<to->starts[to->n]; i++){
53: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%d \n",rank,to->indices[i]);
54: }
55: }
57: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %d; Number from self = %d\n",rank,from->n,from->local.n);
58: if (from->n) {
59: for (i=0; i<from->n; i++){
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d length %d from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
61: }
63: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
64: for (i=0; i<from->starts[from->n]; i++){
65: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%d \n",rank,from->indices[i]);
66: }
67: }
68: if (to->local.n) {
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
70: for (i=0; i<to->local.n; i++){
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %d to %d \n",rank,from->local.slots[i],to->local.slots[i]);
72: }
73: }
75: PetscViewerFlush(viewer);
76: }
77: } else {
78: SETERRQ1(1,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
79: }
80: return(0);
81: }
83: /* -----------------------------------------------------------------------------------*/
84: /*
85: The next routine determines what part of the local part of the scatter is an
86: exact copy of values into their current location. We check this here and
87: then know that we need not perform that portion of the scatter.
88: */
91: int VecScatterLocalOptimize_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from)
92: {
93: int n = gen_to->n,n_nonmatching = 0,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
94: int *nto_slots,*nfrom_slots,j = 0,ierr;
95:
97: for (i=0; i<n; i++) {
98: if (to_slots[i] != from_slots[i]) n_nonmatching++;
99: }
101: if (!n_nonmatching) {
102: gen_to->nonmatching_computed = PETSC_TRUE;
103: gen_to->n_nonmatching = gen_from->n_nonmatching = 0;
104: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %d to 0\n", n);
105: } else if (n_nonmatching == n) {
106: gen_to->nonmatching_computed = PETSC_FALSE;
107: PetscLogInfo(0,"VecScatterLocalOptimize_Private:All values non-matching\n");
108: } else {
109: gen_to->nonmatching_computed= PETSC_TRUE;
110: gen_to->n_nonmatching = gen_from->n_nonmatching = n_nonmatching;
111: PetscMalloc(n_nonmatching*sizeof(int),&nto_slots);
112: gen_to->slots_nonmatching = nto_slots;
113: PetscMalloc(n_nonmatching*sizeof(int),&nfrom_slots);
114: gen_from->slots_nonmatching = nfrom_slots;
115: for (i=0; i<n; i++) {
116: if (to_slots[i] != from_slots[i]) {
117: nto_slots[j] = to_slots[i];
118: nfrom_slots[j] = from_slots[i];
119: j++;
120: }
121: }
122: PetscLogInfo(0,"VecScatterLocalOptimize_Private:Reduced %d to %d\n",n,n_nonmatching);
123: }
124: return(0);
125: }
127: /* --------------------------------------------------------------------------------------*/
130: int VecScatterCopy_PtoP(VecScatter in,VecScatter out)
131: {
132: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
133: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
134: int len,ny,ierr;
137: out->postrecvs = in->postrecvs;
138: out->begin = in->begin;
139: out->end = in->end;
140: out->copy = in->copy;
141: out->destroy = in->destroy;
142: out->view = in->view;
144: /* allocate entire send scatter context */
145: PetscNew(VecScatter_MPI_General,&out_to);
146: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
147: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
148: ny = in_to->starts[in_to->n];
149: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (in_to->n+1)*sizeof(int) +
150: (in_to->n)*(sizeof(int) + sizeof(MPI_Request));
151: out_to->n = in_to->n;
152: out_to->type = in_to->type;
153: out_to->sendfirst = in_to->sendfirst;
154: PetscMalloc(len,&out_to->values);
155: PetscLogObjectMemory(out,len);
156: out_to->requests = (MPI_Request*)(out_to->values + ny);
157: out_to->indices = (int*)(out_to->requests + out_to->n);
158: out_to->starts = (int*)(out_to->indices + ny);
159: out_to->procs = (int*)(out_to->starts + out_to->n + 1);
160: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(int));
161: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(int));
162: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(int));
163: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
164: out_to->rstatus = out_to->rstatus + PetscMax(in_to->n,in_from->n) + 1;
165: PetscLogObjectMemory(out,2*(PetscMax(in_to->n,in_from->n) + 1)*sizeof(MPI_Status));
166: out->todata = (void*)out_to;
167: out_to->local.n = in_to->local.n;
168: out_to->local.nonmatching_computed = PETSC_FALSE;
169: out_to->local.n_nonmatching = 0;
170: out_to->local.slots_nonmatching = 0;
171: if (in_to->local.n) {
172: PetscMalloc(in_to->local.n*sizeof(int),&out_to->local.slots);
173: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(int));
174: PetscLogObjectMemory(out,in_to->local.n*sizeof(int));
175: } else {
176: out_to->local.slots = 0;
177: }
179: /* allocate entire receive context */
180: PetscNew(VecScatter_MPI_General,&out_from);
181: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
182: out_from->type = in_from->type;
183: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
184: ny = in_from->starts[in_from->n];
185: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (in_from->n+1)*sizeof(int) +
186: (in_from->n)*(sizeof(int) + sizeof(MPI_Request));
187: out_from->n = in_from->n;
188: out_from->sendfirst = in_from->sendfirst;
189: PetscMalloc(len,&out_from->values);
190: PetscLogObjectMemory(out,len);
191: out_from->requests = (MPI_Request*)(out_from->values + ny);
192: out_from->indices = (int*)(out_from->requests + out_from->n);
193: out_from->starts = (int*)(out_from->indices + ny);
194: out_from->procs = (int*)(out_from->starts + out_from->n + 1);
195: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(int));
196: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(int));
197: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(int));
198: out->fromdata = (void*)out_from;
199: out_from->local.n = in_from->local.n;
200: out_from->local.nonmatching_computed = PETSC_FALSE;
201: out_from->local.n_nonmatching = 0;
202: out_from->local.slots_nonmatching = 0;
203: if (in_from->local.n) {
204: PetscMalloc(in_from->local.n*sizeof(int),&out_from->local.slots);
205: PetscLogObjectMemory(out,in_from->local.n*sizeof(int));
206: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(int));
207: } else {
208: out_from->local.slots = 0;
209: }
210: return(0);
211: }
213: /* -------------------------------------------------------------------------------------*/
216: int VecScatterDestroy_PtoP(VecScatter ctx)
217: {
218: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
219: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
223: if (gen_to->local.slots) {PetscFree(gen_to->local.slots);}
224: if (gen_from->local.slots) {PetscFree(gen_from->local.slots);}
225: if (gen_to->local.slots_nonmatching) {PetscFree(gen_to->local.slots_nonmatching);}
226: if (gen_from->local.slots_nonmatching) {PetscFree(gen_from->local.slots_nonmatching);}
227: PetscFree(gen_to->sstatus);
228: PetscFree(gen_to->values);
229: PetscFree(gen_to);
230: PetscFree(gen_from->values);
231: PetscFree(gen_from);
232: PetscLogObjectDestroy(ctx);
233: PetscHeaderDestroy(ctx);
234: return(0);
235: }
237: /* --------------------------------------------------------------------------------------*/
238: /*
239: Even though the next routines are written with parallel
240: vectors, either xin or yin (but not both) may be Seq
241: vectors, one for each processor.
242:
243: gen_from indices indicate where arriving stuff is stashed
244: gen_to indices indicate where departing stuff came from.
245: the naming can be VERY confusing.
247: */
250: int VecScatterBegin_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
251: {
252: VecScatter_MPI_General *gen_to,*gen_from;
253: MPI_Comm comm = ctx->comm;
254: PetscScalar *xv,*yv,*val,*rvalues,*svalues;
255: MPI_Request *rwaits,*swaits;
256: int tag = ctx->tag,i,j,*indices,*rstarts,*sstarts,*rprocs,*sprocs;
257: int nrecvs,nsends,iend,ierr;
260: VecGetArray(xin,&xv);
261: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
262: if (mode & SCATTER_REVERSE){
263: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
264: gen_from = (VecScatter_MPI_General*)ctx->todata;
265: } else {
266: gen_to = (VecScatter_MPI_General*)ctx->todata;
267: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
268: }
269: rvalues = gen_from->values;
270: svalues = gen_to->values;
271: nrecvs = gen_from->n;
272: nsends = gen_to->n;
273: rwaits = gen_from->requests;
274: swaits = gen_to->requests;
275: indices = gen_to->indices;
276: rstarts = gen_from->starts;
277: sstarts = gen_to->starts;
278: rprocs = gen_from->procs;
279: sprocs = gen_to->procs;
281: if (!(mode & SCATTER_LOCAL)) {
283: if (gen_to->sendfirst) {
284: /* do sends: */
285: for (i=0; i<nsends; i++) {
286: val = svalues + sstarts[i];
287: iend = sstarts[i+1]-sstarts[i];
288: /* pack the message */
289: for (j=0; j<iend; j++) {
290: val[j] = xv[*indices++];
291: }
292: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
293: }
294: }
295:
296: /* post receives: */
297: for (i=0; i<nrecvs; i++) {
298: MPI_Irecv(rvalues+rstarts[i],rstarts[i+1]-rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
299: }
301: if (!gen_to->sendfirst) {
302: /* do sends: */
303: for (i=0; i<nsends; i++) {
304: val = svalues + sstarts[i];
305: iend = sstarts[i+1]-sstarts[i];
306: /* pack the message */
307: for (j=0; j<iend; j++) {
308: val[j] = xv[*indices++];
309: }
310: MPI_Isend(val,iend,MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
311: }
312: }
313: }
315: /* take care of local scatters */
316: if (gen_to->local.n && addv == INSERT_VALUES) {
317: if (yv == xv && !gen_to->local.nonmatching_computed) {
318: VecScatterLocalOptimize_Private(&gen_to->local,&gen_from->local);
319: }
320: if (gen_to->local.is_copy) {
321: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
322: } else if (yv != xv || !gen_to->local.nonmatching_computed) {
323: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
324: int n = gen_to->local.n;
325: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
326: } else {
327: /*
328: In this case, it is copying the values into their old locations, thus we can skip those
329: */
330: int *tslots = gen_to->local.slots_nonmatching,*fslots = gen_from->local.slots_nonmatching;
331: int n = gen_to->local.n_nonmatching;
332: for (i=0; i<n; i++) {yv[fslots[i]] = xv[tslots[i]];}
333: }
334: } else if (gen_to->local.n) {
335: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
336: int n = gen_to->local.n;
337: if (addv == ADD_VALUES) {
338: for (i=0; i<n; i++) {yv[fslots[i]] += xv[tslots[i]];}
339: #if !defined(PETSC_USE_COMPLEX)
340: } else if (addv == MAX_VALUES) {
341: for (i=0; i<n; i++) {yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[tslots[i]]);}
342: #endif
343: } else {SETERRQ(1,"Wrong insert option");}
344: }
346: VecRestoreArray(xin,&xv);
347: if (xin != yin) {VecRestoreArray(yin,&yv);}
348: return(0);
349: }
351: /* --------------------------------------------------------------------------------------*/
354: int VecScatterEnd_PtoP(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
355: {
356: VecScatter_MPI_General *gen_to,*gen_from;
357: PetscScalar *rvalues,*yv,*val;
358: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices;
359: MPI_Request *rwaits,*swaits;
360: MPI_Status rstatus,*sstatus;
363: if (mode & SCATTER_LOCAL) return(0);
364: VecGetArray(yin,&yv);
366: if (mode & SCATTER_REVERSE){
367: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
368: gen_from = (VecScatter_MPI_General*)ctx->todata;
369: sstatus = gen_from->sstatus;
370: } else {
371: gen_to = (VecScatter_MPI_General*)ctx->todata;
372: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
373: sstatus = gen_to->sstatus;
374: }
375: rvalues = gen_from->values;
376: nrecvs = gen_from->n;
377: nsends = gen_to->n;
378: rwaits = gen_from->requests;
379: swaits = gen_to->requests;
380: indices = gen_from->indices;
381: rstarts = gen_from->starts;
383: /* wait on receives */
384: count = nrecvs;
385: while (count) {
386: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
387: /* unpack receives into our local space */
388: val = rvalues + rstarts[imdex];
389: n = rstarts[imdex+1]-rstarts[imdex];
390: lindices = indices + rstarts[imdex];
391: if (addv == INSERT_VALUES) {
392: for (i=0; i<n; i++) {
393: yv[lindices[i]] = *val++;
394: }
395: } else if (addv == ADD_VALUES) {
396: for (i=0; i<n; i++) {
397: yv[lindices[i]] += *val++;
398: }
399: #if !defined(PETSC_USE_COMPLEX)
400: } else if (addv == MAX_VALUES) {
401: for (i=0; i<n; i++) {
402: yv[lindices[i]] = PetscMax(yv[lindices[i]],*val); val++;
403: }
404: #endif
405: } else {SETERRQ(1,"Wrong insert option");}
406: count--;
407: }
409: /* wait on sends */
410: if (nsends) {
411: MPI_Waitall(nsends,swaits,sstatus);
412: }
413: VecRestoreArray(yin,&yv);
414: return(0);
415: }
416: /* ==========================================================================================*/
417: /*
418: Special scatters for fixed block sizes. These provide better performance
419: because the local copying and packing and unpacking are done with loop
420: unrolling to the size of the block.
422: Also uses MPI persistent sends and receives, these (at least in theory)
423: allow MPI to optimize repeated sends and receives of the same type.
424: */
426: /*
427: This is for use with the "ready-receiver" mode. In theory on some
428: machines it could lead to better performance. In practice we've never
429: seen it give better performance. Accessed with the -vecscatter_rr flag.
430: */
433: int VecScatterPostRecvs_PtoP_X(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
434: {
435: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
438: MPI_Startall_irecv(gen_from->starts[gen_from->n]*gen_from->bs,gen_from->n,gen_from->requests);
439: return(0);
440: }
442: /* --------------------------------------------------------------------------------------*/
443: /*
444: Special optimization to see if the local part of the scatter is actually
445: a copy. The scatter routines call PetscMemcpy() instead.
446:
447: */
450: int VecScatterLocalOptimizeCopy_Private(VecScatter_Seq_General *gen_to,VecScatter_Seq_General *gen_from,int bs)
451: {
452: int n = gen_to->n,i,*to_slots = gen_to->slots,*from_slots = gen_from->slots;
453: int to_start,from_start;
454:
456: to_start = to_slots[0];
457: from_start = from_slots[0];
459: for (i=1; i<n; i++) {
460: to_start += bs;
461: from_start += bs;
462: if (to_slots[i] != to_start) return(0);
463: if (from_slots[i] != from_start) return(0);
464: }
465: gen_to->is_copy = PETSC_TRUE;
466: gen_to->copy_start = to_slots[0];
467: gen_to->copy_length = bs*sizeof(PetscScalar)*n;
468: gen_from->is_copy = PETSC_TRUE;
469: gen_from->copy_start = from_slots[0];
470: gen_from->copy_length = bs*sizeof(PetscScalar)*n;
472: PetscLogInfo(0,"VecScatterLocalOptimizeCopy_Private:Local scatter is a copy, optimizing for it\n");
474: return(0);
475: }
477: /* --------------------------------------------------------------------------------------*/
481: int VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
482: {
483: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
484: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
485: int len,ny,bs = in_from->bs,ierr;
488: out->postrecvs = in->postrecvs;
489: out->begin = in->begin;
490: out->end = in->end;
491: out->copy = in->copy;
492: out->destroy = in->destroy;
493: out->view = in->view;
495: /* allocate entire send scatter context */
496: PetscNew(VecScatter_MPI_General,&out_to);
497: PetscMemzero(out_to,sizeof(VecScatter_MPI_General));
498: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
499: ny = in_to->starts[in_to->n];
500: len = ny*(sizeof(int) + bs*sizeof(PetscScalar)) + (in_to->n+1)*sizeof(int) +
501: (in_to->n)*(sizeof(int) + sizeof(MPI_Request));
502: out_to->n = in_to->n;
503: out_to->type = in_to->type;
504: out_to->sendfirst = in_to->sendfirst;
506: PetscMalloc(len,&out_to->values);
507: PetscLogObjectMemory(out,len);
508: out_to->requests = (MPI_Request*)(out_to->values + bs*ny);
509: out_to->indices = (int*)(out_to->requests + out_to->n);
510: out_to->starts = (int*)(out_to->indices + ny);
511: out_to->procs = (int*)(out_to->starts + out_to->n + 1);
512: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(int));
513: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(int));
514: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(int));
515: PetscMalloc(2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status),&out_to->sstatus);
516: out_to->rstatus = out_to->sstatus + PetscMax(in_to->n,in_from->n) + 1;
517:
518: PetscLogObjectMemory(out,2*(PetscMax(in_to->n,in_from->n)+1)*sizeof(MPI_Status));
519: out->todata = (void*)out_to;
520: out_to->local.n = in_to->local.n;
521: out_to->local.nonmatching_computed = PETSC_FALSE;
522: out_to->local.n_nonmatching = 0;
523: out_to->local.slots_nonmatching = 0;
524: if (in_to->local.n) {
525: PetscMalloc(in_to->local.n*sizeof(int),out_to->local.slots);
526: PetscMemcpy(out_to->local.slots,in_to->local.slots,in_to->local.n*sizeof(int));
527: PetscLogObjectMemory(out,in_to->local.n*sizeof(int));
528: } else {
529: out_to->local.slots = 0;
530: }
532: /* allocate entire receive context */
533: PetscNew(VecScatter_MPI_General,&out_from);
534: PetscMemzero(out_from,sizeof(VecScatter_MPI_General));
535: out_from->type = in_from->type;
536: PetscLogObjectMemory(out,sizeof(VecScatter_MPI_General));
537: ny = in_from->starts[in_from->n];
538: len = ny*(sizeof(int) + bs*sizeof(PetscScalar)) + (in_from->n+1)*sizeof(int) +
539: (in_from->n)*(sizeof(int) + sizeof(MPI_Request));
540: out_from->n = in_from->n;
541: out_from->sendfirst = in_from->sendfirst;
542: PetscMalloc(len,&out_from->values);
543: PetscLogObjectMemory(out,len);
544: out_from->requests = (MPI_Request*)(out_from->values + bs*ny);
545: out_from->indices = (int*)(out_from->requests + out_from->n);
546: out_from->starts = (int*)(out_from->indices + ny);
547: out_from->procs = (int*)(out_from->starts + out_from->n + 1);
548: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(int));
549: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(int));
550: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(int));
551: out->fromdata = (void*)out_from;
552: out_from->local.n = in_from->local.n;
553: out_from->local.nonmatching_computed = PETSC_FALSE;
554: out_from->local.n_nonmatching = 0;
555: out_from->local.slots_nonmatching = 0;
556: if (in_from->local.n) {
557: PetscMalloc(in_from->local.n*sizeof(int),&out_from->local.slots);
558: PetscLogObjectMemory(out,in_from->local.n*sizeof(int));
559: PetscMemcpy(out_from->local.slots,in_from->local.slots,in_from->local.n*sizeof(int));
560: } else {
561: out_from->local.slots = 0;
562: }
564: /*
565: set up the request arrays for use with isend_init() and irecv_init()
566: */
567: {
568: MPI_Comm comm;
569: int *sstarts = out_to->starts, *rstarts = out_from->starts;
570: int *sprocs = out_to->procs, *rprocs = out_from->procs;
571: int tag,i;
572: PetscTruth flg;
573: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
574: MPI_Request *rev_swaits,*rev_rwaits;
575: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
577: PetscMalloc((in_to->n+1)*sizeof(MPI_Request),&out_to->rev_requests);
578: PetscMalloc((in_from->n+1)*sizeof(MPI_Request),&out_from->rev_requests);
579: PetscLogObjectMemory(out,(in_to->n+in_from->n+2)*sizeof(MPI_Request));
581: rev_rwaits = out_to->rev_requests;
582: rev_swaits = out_from->rev_requests;
584: out_from->bs = out_to->bs = bs;
585: tag = out->tag;
586: comm = out->comm;
588: /* Register the receives that you will use later (sends for scatter reverse) */
589: for (i=0; i<out_from->n; i++) {
590: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
591: comm,rwaits+i);
592: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
593: comm,rev_swaits+i);
594: }
596: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
597: if (flg) {
598: out->postrecvs = VecScatterPostRecvs_PtoP_X;
599: out_to->use_readyreceiver = PETSC_TRUE;
600: out_from->use_readyreceiver = PETSC_TRUE;
601: for (i=0; i<out_to->n; i++) {
602: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
603: comm,swaits+i);
604: }
605: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter ready receiver mode\n");
606: } else {
607: out->postrecvs = 0;
608: out_to->use_readyreceiver = PETSC_FALSE;
609: out_from->use_readyreceiver = PETSC_FALSE;
610: flg = PETSC_FALSE;
611: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flg);
612: if (flg) {
613: PetscLogInfo(0,"VecScatterCopy_PtoP_X:Using VecScatter Ssend mode\n");
614: }
615: for (i=0; i<out_to->n; i++) {
616: if (!flg) {
617: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
618: comm,swaits+i);
619: } else {
620: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
621: comm,swaits+i);
622: }
623: }
624: }
625: /* Register receives for scatter reverse */
626: for (i=0; i<out_to->n; i++) {
627: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
628: comm,rev_rwaits+i);
629: }
630: }
632: return(0);
633: }
635: /* --------------------------------------------------------------------------------------*/
639: int VecScatterBegin_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
640: {
641: VecScatter_MPI_General *gen_to,*gen_from;
642: PetscScalar *xv,*yv,*val,*svalues;
643: MPI_Request *rwaits,*swaits;
644: int *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,ierr,len;
647: VecGetArray(xin,&xv);
648: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
650: if (mode & SCATTER_REVERSE) {
651: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
652: gen_from = (VecScatter_MPI_General*)ctx->todata;
653: rwaits = gen_from->rev_requests;
654: swaits = gen_to->rev_requests;
655: } else {
656: gen_to = (VecScatter_MPI_General*)ctx->todata;
657: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
658: rwaits = gen_from->requests;
659: swaits = gen_to->requests;
660: }
661: svalues = gen_to->values;
662: nrecvs = gen_from->n;
663: nsends = gen_to->n;
664: indices = gen_to->indices;
665: sstarts = gen_to->starts;
667: if (!(mode & SCATTER_LOCAL)) {
669: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
670: /* post receives since they were not posted in VecScatterPostRecvs() */
671: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
672: }
673: if (ctx->packtogether) {
674: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
675: len = 12*sstarts[nsends];
676: val = svalues;
677: for (i=0; i<len; i += 12) {
678: idx = *indices++;
679: val[0] = xv[idx];
680: val[1] = xv[idx+1];
681: val[2] = xv[idx+2];
682: val[3] = xv[idx+3];
683: val[4] = xv[idx+4];
684: val[5] = xv[idx+5];
685: val[6] = xv[idx+6];
686: val[7] = xv[idx+7];
687: val[8] = xv[idx+8];
688: val[9] = xv[idx+9];
689: val[10] = xv[idx+10];
690: val[11] = xv[idx+11];
691: val += 12;
692: }
693: MPI_Startall_isend(len,nsends,swaits);
694: } else {
695: /* this version packs and sends one at a time */
696: val = svalues;
697: for (i=0; i<nsends; i++) {
698: iend = sstarts[i+1]-sstarts[i];
700: for (j=0; j<iend; j++) {
701: idx = *indices++;
702: val[0] = xv[idx];
703: val[1] = xv[idx+1];
704: val[2] = xv[idx+2];
705: val[3] = xv[idx+3];
706: val[4] = xv[idx+4];
707: val[5] = xv[idx+5];
708: val[6] = xv[idx+6];
709: val[7] = xv[idx+7];
710: val[8] = xv[idx+8];
711: val[9] = xv[idx+9];
712: val[10] = xv[idx+10];
713: val[11] = xv[idx+11];
714: val += 12;
715: }
716: MPI_Start_isend(12*iend,swaits+i);
717: }
718: }
720: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
721: /* post receives since they were not posted in VecScatterPostRecvs() */
722: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
723: }
724: }
726: /* take care of local scatters */
727: if (gen_to->local.n) {
728: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
729: int n = gen_to->local.n,il,ir;
730: if (addv == INSERT_VALUES) {
731: if (gen_to->local.is_copy) {
732: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
733: } else {
734: for (i=0; i<n; i++) {
735: il = fslots[i]; ir = tslots[i];
736: yv[il] = xv[ir];
737: yv[il+1] = xv[ir+1];
738: yv[il+2] = xv[ir+2];
739: yv[il+3] = xv[ir+3];
740: yv[il+4] = xv[ir+4];
741: yv[il+5] = xv[ir+5];
742: yv[il+6] = xv[ir+6];
743: yv[il+7] = xv[ir+7];
744: yv[il+8] = xv[ir+8];
745: yv[il+9] = xv[ir+9];
746: yv[il+10] = xv[ir+10];
747: yv[il+11] = xv[ir+11];
748: }
749: }
750: } else if (addv == ADD_VALUES) {
751: for (i=0; i<n; i++) {
752: il = fslots[i]; ir = tslots[i];
753: yv[il] += xv[ir];
754: yv[il+1] += xv[ir+1];
755: yv[il+2] += xv[ir+2];
756: yv[il+3] += xv[ir+3];
757: yv[il+4] += xv[ir+4];
758: yv[il+5] += xv[ir+5];
759: yv[il+6] += xv[ir+6];
760: yv[il+7] += xv[ir+7];
761: yv[il+8] += xv[ir+8];
762: yv[il+9] += xv[ir+9];
763: yv[il+10] += xv[ir+10];
764: yv[il+11] += xv[ir+11];
765: }
766: #if !defined(PETSC_USE_COMPLEX)
767: } else if (addv == MAX_VALUES) {
768: for (i=0; i<n; i++) {
769: il = fslots[i]; ir = tslots[i];
770: yv[il] = PetscMax(yv[il],xv[ir]);
771: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
772: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
773: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
774: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
775: yv[il+5] = PetscMax(yv[il+5],xv[ir+5]);
776: yv[il+6] = PetscMax(yv[il+6],xv[ir+6]);
777: yv[il+7] = PetscMax(yv[il+7],xv[ir+7]);
778: yv[il+8] = PetscMax(yv[il+8],xv[ir+8]);
779: yv[il+9] = PetscMax(yv[il+9],xv[ir+9]);
780: yv[il+10] = PetscMax(yv[il+10],xv[ir+10]);
781: yv[il+11] = PetscMax(yv[il+11],xv[ir+11]);
782: }
783: #endif
784: } else {SETERRQ(1,"Wrong insert option");}
785: }
786: VecRestoreArray(xin,&xv);
787: if (xin != yin) {VecRestoreArray(yin,&yv);}
788: return(0);
789: }
791: /* --------------------------------------------------------------------------------------*/
795: int VecScatterEnd_PtoP_12(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
796: {
797: VecScatter_MPI_General *gen_to,*gen_from;
798: PetscScalar *rvalues,*yv,*val;
799: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
800: MPI_Request *rwaits,*swaits;
801: MPI_Status *rstatus,*sstatus;
804: if (mode & SCATTER_LOCAL) return(0);
805: VecGetArray(yin,&yv);
807: if (mode & SCATTER_REVERSE) {
808: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
809: gen_from = (VecScatter_MPI_General*)ctx->todata;
810: rwaits = gen_from->rev_requests;
811: swaits = gen_to->rev_requests;
812: sstatus = gen_from->sstatus;
813: rstatus = gen_from->rstatus;
814: } else {
815: gen_to = (VecScatter_MPI_General*)ctx->todata;
816: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
817: rwaits = gen_from->requests;
818: swaits = gen_to->requests;
819: sstatus = gen_to->sstatus;
820: rstatus = gen_to->rstatus;
821: }
822: rvalues = gen_from->values;
823: nrecvs = gen_from->n;
824: nsends = gen_to->n;
825: indices = gen_from->indices;
826: rstarts = gen_from->starts;
828: /* wait on receives */
829: count = nrecvs;
830: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
831: MPI_Waitall(nrecvs,rwaits,rstatus);
832: n = rstarts[count];
833: val = rvalues;
834: lindices = indices;
835: if (addv == INSERT_VALUES) {
836: for (i=0; i<n; i++) {
837: idx = lindices[i];
838: yv[idx] = val[0];
839: yv[idx+1] = val[1];
840: yv[idx+2] = val[2];
841: yv[idx+3] = val[3];
842: yv[idx+4] = val[4];
843: yv[idx+5] = val[5];
844: yv[idx+6] = val[6];
845: yv[idx+7] = val[7];
846: yv[idx+8] = val[8];
847: yv[idx+9] = val[9];
848: yv[idx+10] = val[10];
849: yv[idx+11] = val[11];
850: val += 12;
851: }
852: } else if (addv == ADD_VALUES) {
853: for (i=0; i<n; i++) {
854: idx = lindices[i];
855: yv[idx] += val[0];
856: yv[idx+1] += val[1];
857: yv[idx+2] += val[2];
858: yv[idx+3] += val[3];
859: yv[idx+4] += val[4];
860: yv[idx+5] += val[5];
861: yv[idx+6] += val[6];
862: yv[idx+7] += val[7];
863: yv[idx+8] += val[8];
864: yv[idx+9] += val[9];
865: yv[idx+10] += val[10];
866: yv[idx+11] += val[11];
867: val += 12;
868: }
869: #if !defined(PETSC_USE_COMPLEX)
870: } else if (addv == MAX_VALUES) {
871: for (i=0; i<n; i++) {
872: idx = lindices[i];
873: yv[idx] = PetscMax(yv[idx],val[0]);
874: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
875: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
876: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
877: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
878: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
879: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
880: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
881: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
882: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
883: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
884: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
885: val += 12;
886: }
887: #endif
888: } else {SETERRQ(1,"Wrong insert option");}
889: } else { /* unpack each message as it arrives, default version */
890: while (count) {
891: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
892: /* unpack receives into our local space */
893: val = rvalues + 12*rstarts[imdex];
894: lindices = indices + rstarts[imdex];
895: n = rstarts[imdex+1] - rstarts[imdex];
896: if (addv == INSERT_VALUES) {
897: for (i=0; i<n; i++) {
898: idx = lindices[i];
899: yv[idx] = val[0];
900: yv[idx+1] = val[1];
901: yv[idx+2] = val[2];
902: yv[idx+3] = val[3];
903: yv[idx+4] = val[4];
904: yv[idx+5] = val[5];
905: yv[idx+6] = val[6];
906: yv[idx+7] = val[7];
907: yv[idx+8] = val[8];
908: yv[idx+9] = val[9];
909: yv[idx+10] = val[10];
910: yv[idx+11] = val[11];
911: val += 12;
912: }
913: } else if (addv == ADD_VALUES) {
914: for (i=0; i<n; i++) {
915: idx = lindices[i];
916: yv[idx] += val[0];
917: yv[idx+1] += val[1];
918: yv[idx+2] += val[2];
919: yv[idx+3] += val[3];
920: yv[idx+4] += val[4];
921: yv[idx+5] += val[5];
922: yv[idx+6] += val[6];
923: yv[idx+7] += val[7];
924: yv[idx+8] += val[8];
925: yv[idx+9] += val[9];
926: yv[idx+10] += val[10];
927: yv[idx+11] += val[11];
928: val += 12;
929: }
930: #if !defined(PETSC_USE_COMPLEX)
931: } else if (addv == MAX_VALUES) {
932: for (i=0; i<n; i++) {
933: idx = lindices[i];
934: yv[idx] = PetscMax(yv[idx],val[0]);
935: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
936: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
937: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
938: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
939: yv[idx+5] = PetscMax(yv[idx+5],val[5]);
940: yv[idx+6] = PetscMax(yv[idx+6],val[6]);
941: yv[idx+7] = PetscMax(yv[idx+7],val[7]);
942: yv[idx+8] = PetscMax(yv[idx+8],val[8]);
943: yv[idx+9] = PetscMax(yv[idx+9],val[9]);
944: yv[idx+10] = PetscMax(yv[idx+10],val[10]);
945: yv[idx+11] = PetscMax(yv[idx+11],val[11]);
946: val += 12;
947: }
948: #endif
949: } else {SETERRQ(1,"Wrong insert option");}
950: count--;
951: }
952: }
953: /* wait on sends */
954: if (nsends) {
955: MPI_Waitall(nsends,swaits,sstatus);
956: }
957: VecRestoreArray(yin,&yv);
958: return(0);
959: }
961: /* --------------------------------------------------------------------------------------*/
965: int VecScatterBegin_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
966: {
967: VecScatter_MPI_General *gen_to,*gen_from;
968: PetscScalar *xv,*yv,*val,*svalues;
969: MPI_Request *rwaits,*swaits;
970: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
973: VecGetArray(xin,&xv);
974: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
975: if (mode & SCATTER_REVERSE) {
976: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
977: gen_from = (VecScatter_MPI_General*)ctx->todata;
978: rwaits = gen_from->rev_requests;
979: swaits = gen_to->rev_requests;
980: } else {
981: gen_to = (VecScatter_MPI_General*)ctx->todata;
982: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
983: rwaits = gen_from->requests;
984: swaits = gen_to->requests;
985: }
986: svalues = gen_to->values;
987: nrecvs = gen_from->n;
988: nsends = gen_to->n;
989: indices = gen_to->indices;
990: sstarts = gen_to->starts;
992: if (!(mode & SCATTER_LOCAL)) {
994: if (gen_to->sendfirst) {
995: /* this version packs and sends one at a time */
996: val = svalues;
997: for (i=0; i<nsends; i++) {
998: iend = sstarts[i+1]-sstarts[i];
1000: for (j=0; j<iend; j++) {
1001: idx = *indices++;
1002: val[0] = xv[idx];
1003: val[1] = xv[idx+1];
1004: val[2] = xv[idx+2];
1005: val[3] = xv[idx+3];
1006: val[4] = xv[idx+4];
1007: val += 5;
1008: }
1009: MPI_Start_isend(5*iend,swaits+i);
1010: }
1011: }
1013: if (!gen_from->use_readyreceiver) {
1014: /* post receives since they were not posted in VecScatterPostRecvs() */
1015: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1016: }
1018: if (!gen_to->sendfirst) {
1019: /* this version packs all the messages together and sends */
1020: /*
1021: len = 5*sstarts[nsends];
1022: val = svalues;
1023: for (i=0; i<len; i += 5) {
1024: idx = *indices++;
1025: val[0] = xv[idx];
1026: val[1] = xv[idx+1];
1027: val[2] = xv[idx+2];
1028: val[3] = xv[idx+3];
1029: val[4] = xv[idx+4];
1030: val += 5;
1031: }
1032: MPI_Startall_isend(len,nsends,swaits);
1033: */
1035: /* this version packs and sends one at a time */
1036: val = svalues;
1037: for (i=0; i<nsends; i++) {
1038: iend = sstarts[i+1]-sstarts[i];
1040: for (j=0; j<iend; j++) {
1041: idx = *indices++;
1042: val[0] = xv[idx];
1043: val[1] = xv[idx+1];
1044: val[2] = xv[idx+2];
1045: val[3] = xv[idx+3];
1046: val[4] = xv[idx+4];
1047: val += 5;
1048: }
1049: MPI_Start_isend(5*iend,swaits+i);
1050: }
1051: }
1052: }
1054: /* take care of local scatters */
1055: if (gen_to->local.n) {
1056: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1057: int n = gen_to->local.n,il,ir;
1058: if (addv == INSERT_VALUES) {
1059: if (gen_to->local.is_copy) {
1060: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1061: } else {
1062: for (i=0; i<n; i++) {
1063: il = fslots[i]; ir = tslots[i];
1064: yv[il] = xv[ir];
1065: yv[il+1] = xv[ir+1];
1066: yv[il+2] = xv[ir+2];
1067: yv[il+3] = xv[ir+3];
1068: yv[il+4] = xv[ir+4];
1069: }
1070: }
1071: } else if (addv == ADD_VALUES) {
1072: for (i=0; i<n; i++) {
1073: il = fslots[i]; ir = tslots[i];
1074: yv[il] += xv[ir];
1075: yv[il+1] += xv[ir+1];
1076: yv[il+2] += xv[ir+2];
1077: yv[il+3] += xv[ir+3];
1078: yv[il+4] += xv[ir+4];
1079: }
1080: #if !defined(PETSC_USE_COMPLEX)
1081: } else if (addv == MAX_VALUES) {
1082: for (i=0; i<n; i++) {
1083: il = fslots[i]; ir = tslots[i];
1084: yv[il] = PetscMax(yv[il],xv[ir]);
1085: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1086: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1087: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1088: yv[il+4] = PetscMax(yv[il+4],xv[ir+4]);
1089: }
1090: #endif
1091: } else {SETERRQ(1,"Wrong insert option");}
1092: }
1093: VecRestoreArray(xin,&xv);
1094: if (xin != yin) {VecRestoreArray(yin,&yv);}
1095: return(0);
1096: }
1098: /* --------------------------------------------------------------------------------------*/
1102: int VecScatterEnd_PtoP_5(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1103: {
1104: VecScatter_MPI_General *gen_to,*gen_from;
1105: PetscScalar *rvalues,*yv,*val;
1106: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1107: MPI_Request *rwaits,*swaits;
1108: MPI_Status rstatus,*sstatus;
1111: if (mode & SCATTER_LOCAL) return(0);
1112: VecGetArray(yin,&yv);
1114: if (mode & SCATTER_REVERSE) {
1115: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1116: gen_from = (VecScatter_MPI_General*)ctx->todata;
1117: rwaits = gen_from->rev_requests;
1118: swaits = gen_to->rev_requests;
1119: sstatus = gen_from->sstatus;
1120: } else {
1121: gen_to = (VecScatter_MPI_General*)ctx->todata;
1122: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1123: rwaits = gen_from->requests;
1124: swaits = gen_to->requests;
1125: sstatus = gen_to->sstatus;
1126: }
1127: rvalues = gen_from->values;
1128: nrecvs = gen_from->n;
1129: nsends = gen_to->n;
1130: indices = gen_from->indices;
1131: rstarts = gen_from->starts;
1133: /* wait on receives */
1134: count = nrecvs;
1135: while (count) {
1136: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1137: /* unpack receives into our local space */
1138: val = rvalues + 5*rstarts[imdex];
1139: lindices = indices + rstarts[imdex];
1140: n = rstarts[imdex+1] - rstarts[imdex];
1141: if (addv == INSERT_VALUES) {
1142: for (i=0; i<n; i++) {
1143: idx = lindices[i];
1144: yv[idx] = val[0];
1145: yv[idx+1] = val[1];
1146: yv[idx+2] = val[2];
1147: yv[idx+3] = val[3];
1148: yv[idx+4] = val[4];
1149: val += 5;
1150: }
1151: } else if (addv == ADD_VALUES) {
1152: for (i=0; i<n; i++) {
1153: idx = lindices[i];
1154: yv[idx] += val[0];
1155: yv[idx+1] += val[1];
1156: yv[idx+2] += val[2];
1157: yv[idx+3] += val[3];
1158: yv[idx+4] += val[4];
1159: val += 5;
1160: }
1161: #if !defined(PETSC_USE_COMPLEX)
1162: } else if (addv == MAX_VALUES) {
1163: for (i=0; i<n; i++) {
1164: idx = lindices[i];
1165: yv[idx] = PetscMax(yv[idx],val[0]);
1166: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1167: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1168: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1169: yv[idx+4] = PetscMax(yv[idx+4],val[4]);
1170: val += 5;
1171: }
1172: #endif
1173: } else {SETERRQ(1,"Wrong insert option");}
1174: count--;
1175: }
1176: /* wait on sends */
1177: if (nsends) {
1178: MPI_Waitall(nsends,swaits,sstatus);
1179: }
1180: VecRestoreArray(yin,&yv);
1181: return(0);
1182: }
1184: /* --------------------------------------------------------------------------------------*/
1188: int VecScatterBegin_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1189: {
1190: VecScatter_MPI_General *gen_to,*gen_from;
1191: PetscScalar *xv,*yv,*val,*svalues;
1192: MPI_Request *rwaits,*swaits;
1193: int *indices,*sstarts,iend,i,j,nrecvs,nsends,idx,ierr,len;
1196: VecGetArray(xin,&xv);
1197: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1199: if (mode & SCATTER_REVERSE) {
1200: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1201: gen_from = (VecScatter_MPI_General*)ctx->todata;
1202: rwaits = gen_from->rev_requests;
1203: swaits = gen_to->rev_requests;
1204: } else {
1205: gen_to = (VecScatter_MPI_General*)ctx->todata;
1206: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1207: rwaits = gen_from->requests;
1208: swaits = gen_to->requests;
1209: }
1210: svalues = gen_to->values;
1211: nrecvs = gen_from->n;
1212: nsends = gen_to->n;
1213: indices = gen_to->indices;
1214: sstarts = gen_to->starts;
1216: if (!(mode & SCATTER_LOCAL)) {
1218: if (!gen_from->use_readyreceiver && !gen_to->sendfirst) {
1219: /* post receives since they were not posted in VecScatterPostRecvs() */
1220: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1221: }
1223: if (ctx->packtogether) {
1224: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
1225: len = 4*sstarts[nsends];
1226: val = svalues;
1227: for (i=0; i<len; i += 4) {
1228: idx = *indices++;
1229: val[0] = xv[idx];
1230: val[1] = xv[idx+1];
1231: val[2] = xv[idx+2];
1232: val[3] = xv[idx+3];
1233: val += 4;
1234: }
1235: MPI_Startall_isend(len,nsends,swaits);
1236: } else {
1237: /* this version packs and sends one at a time, default */
1238: val = svalues;
1239: for (i=0; i<nsends; i++) {
1240: iend = sstarts[i+1]-sstarts[i];
1242: for (j=0; j<iend; j++) {
1243: idx = *indices++;
1244: val[0] = xv[idx];
1245: val[1] = xv[idx+1];
1246: val[2] = xv[idx+2];
1247: val[3] = xv[idx+3];
1248: val += 4;
1249: }
1250: MPI_Start_isend(4*iend,swaits+i);
1251: }
1252: }
1254: if (!gen_from->use_readyreceiver && gen_to->sendfirst) {
1255: /* post receives since they were not posted in VecScatterPostRecvs() */
1256: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1257: }
1258: }
1260: /* take care of local scatters */
1261: if (gen_to->local.n) {
1262: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1263: int n = gen_to->local.n,il,ir;
1264: if (addv == INSERT_VALUES) {
1265: if (gen_to->local.is_copy) {
1266: PetscMemcpy(yv+gen_from->local.copy_start,xv+gen_to->local.copy_start,gen_to->local.copy_length);
1267: } else {
1268: for (i=0; i<n; i++) {
1269: il = fslots[i]; ir = tslots[i];
1270: yv[il] = xv[ir];
1271: yv[il+1] = xv[ir+1];
1272: yv[il+2] = xv[ir+2];
1273: yv[il+3] = xv[ir+3];
1274: }
1275: }
1276: } else if (addv == ADD_VALUES) {
1277: for (i=0; i<n; i++) {
1278: il = fslots[i]; ir = tslots[i];
1279: yv[il] += xv[ir];
1280: yv[il+1] += xv[ir+1];
1281: yv[il+2] += xv[ir+2];
1282: yv[il+3] += xv[ir+3];
1283: }
1284: #if !defined(PETSC_USE_COMPLEX)
1285: } else if (addv == MAX_VALUES) {
1286: for (i=0; i<n; i++) {
1287: il = fslots[i]; ir = tslots[i];
1288: yv[il] = PetscMax(yv[il],xv[ir]);
1289: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1290: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1291: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1292: }
1293: #endif
1294: } else {SETERRQ(1,"Wrong insert option");}
1295: }
1296: VecRestoreArray(xin,&xv);
1297: if (xin != yin) {VecRestoreArray(yin,&yv);}
1298: return(0);
1299: }
1301: /* --------------------------------------------------------------------------------------*/
1305: int VecScatterEnd_PtoP_4(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1306: {
1307: VecScatter_MPI_General *gen_to,*gen_from;
1308: PetscScalar *rvalues,*yv,*val;
1309: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1310: MPI_Request *rwaits,*swaits;
1311: MPI_Status *rstatus,*sstatus;
1314: if (mode & SCATTER_LOCAL) return(0);
1315: VecGetArray(yin,&yv);
1317: if (mode & SCATTER_REVERSE) {
1318: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1319: gen_from = (VecScatter_MPI_General*)ctx->todata;
1320: rwaits = gen_from->rev_requests;
1321: swaits = gen_to->rev_requests;
1322: sstatus = gen_from->sstatus;
1323: rstatus = gen_from->rstatus;
1324: } else {
1325: gen_to = (VecScatter_MPI_General*)ctx->todata;
1326: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1327: rwaits = gen_from->requests;
1328: swaits = gen_to->requests;
1329: sstatus = gen_to->sstatus;
1330: rstatus = gen_to->rstatus;
1331: }
1332: rvalues = gen_from->values;
1333: nrecvs = gen_from->n;
1334: nsends = gen_to->n;
1335: indices = gen_from->indices;
1336: rstarts = gen_from->starts;
1338: /* wait on receives */
1339: count = nrecvs;
1340: if (ctx->packtogether) { /* receive all messages, then unpack all, when -vecscatter_packtogether used */
1341: MPI_Waitall(nrecvs,rwaits,rstatus);
1342: n = rstarts[count];
1343: val = rvalues;
1344: lindices = indices;
1345: if (addv == INSERT_VALUES) {
1346: for (i=0; i<n; i++) {
1347: idx = lindices[i];
1348: yv[idx] = val[0];
1349: yv[idx+1] = val[1];
1350: yv[idx+2] = val[2];
1351: yv[idx+3] = val[3];
1352: val += 4;
1353: }
1354: } else if (addv == ADD_VALUES) {
1355: for (i=0; i<n; i++) {
1356: idx = lindices[i];
1357: yv[idx] += val[0];
1358: yv[idx+1] += val[1];
1359: yv[idx+2] += val[2];
1360: yv[idx+3] += val[3];
1361: val += 4;
1362: }
1363: #if !defined(PETSC_USE_COMPLEX)
1364: } else if (addv == MAX_VALUES) {
1365: for (i=0; i<n; i++) {
1366: idx = lindices[i];
1367: yv[idx] = PetscMax(yv[idx],val[0]);
1368: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1369: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1370: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1371: val += 4;
1372: }
1373: #endif
1374: } else {SETERRQ(1,"Wrong insert option");}
1375: } else { /* unpack each message as it arrives, default version */
1376: while (count) {
1377: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus[0]);
1378: /* unpack receives into our local space */
1379: val = rvalues + 4*rstarts[imdex];
1380: lindices = indices + rstarts[imdex];
1381: n = rstarts[imdex+1] - rstarts[imdex];
1382: if (addv == INSERT_VALUES) {
1383: for (i=0; i<n; i++) {
1384: idx = lindices[i];
1385: yv[idx] = val[0];
1386: yv[idx+1] = val[1];
1387: yv[idx+2] = val[2];
1388: yv[idx+3] = val[3];
1389: val += 4;
1390: }
1391: } else if (addv == ADD_VALUES) {
1392: for (i=0; i<n; i++) {
1393: idx = lindices[i];
1394: yv[idx] += val[0];
1395: yv[idx+1] += val[1];
1396: yv[idx+2] += val[2];
1397: yv[idx+3] += val[3];
1398: val += 4;
1399: }
1400: #if !defined(PETSC_USE_COMPLEX)
1401: } else if (addv == MAX_VALUES) {
1402: for (i=0; i<n; i++) {
1403: idx = lindices[i];
1404: yv[idx] = PetscMax(yv[idx],val[0]);
1405: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1406: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1407: yv[idx+3] = PetscMax(yv[idx+3],val[3]);
1408: val += 4;
1409: }
1410: #endif
1411: } else {SETERRQ(1,"Wrong insert option");}
1412: count--;
1413: }
1414: }
1416: /* wait on sends */
1417: if (nsends) {
1418: MPI_Waitall(nsends,swaits,sstatus);
1419: }
1420: VecRestoreArray(yin,&yv);
1421: return(0);
1422: }
1424: /* --------------------------------------------------------------------------------------*/
1428: int VecScatterBegin_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1429: {
1430: VecScatter_MPI_General *gen_to,*gen_from;
1431: PetscScalar *xv,*yv,*val,*svalues;
1432: MPI_Request *rwaits,*swaits;
1433: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1436: VecGetArray(xin,&xv);
1437: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1439: if (mode & SCATTER_REVERSE) {
1440: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1441: gen_from = (VecScatter_MPI_General*)ctx->todata;
1442: rwaits = gen_from->rev_requests;
1443: swaits = gen_to->rev_requests;
1444: } else {
1445: gen_to = (VecScatter_MPI_General*)ctx->todata;
1446: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1447: rwaits = gen_from->requests;
1448: swaits = gen_to->requests;
1449: }
1450: svalues = gen_to->values;
1451: nrecvs = gen_from->n;
1452: nsends = gen_to->n;
1453: indices = gen_to->indices;
1454: sstarts = gen_to->starts;
1456: if (!(mode & SCATTER_LOCAL)) {
1458: if (gen_to->sendfirst) {
1459: /* this version packs and sends one at a time */
1460: val = svalues;
1461: for (i=0; i<nsends; i++) {
1462: iend = sstarts[i+1]-sstarts[i];
1464: for (j=0; j<iend; j++) {
1465: idx = *indices++;
1466: val[0] = xv[idx];
1467: val[1] = xv[idx+1];
1468: val[2] = xv[idx+2];
1469: val += 3;
1470: }
1471: MPI_Start_isend(3*iend,swaits+i);
1472: }
1473: }
1475: if (!gen_from->use_readyreceiver) {
1476: /* post receives since they were not posted in VecScatterPostRecvs() */
1477: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1478: }
1480: if (!gen_to->sendfirst) {
1481: /* this version packs all the messages together and sends */
1482: /*
1483: len = 3*sstarts[nsends];
1484: val = svalues;
1485: for (i=0; i<len; i += 3) {
1486: idx = *indices++;
1487: val[0] = xv[idx];
1488: val[1] = xv[idx+1];
1489: val[2] = xv[idx+2];
1490: val += 3;
1491: }
1492: MPI_Startall_isend(len,nsends,swaits);
1493: */
1495: /* this version packs and sends one at a time */
1496: val = svalues;
1497: for (i=0; i<nsends; i++) {
1498: iend = sstarts[i+1]-sstarts[i];
1500: for (j=0; j<iend; j++) {
1501: idx = *indices++;
1502: val[0] = xv[idx];
1503: val[1] = xv[idx+1];
1504: val[2] = xv[idx+2];
1505: val += 3;
1506: }
1507: MPI_Start_isend(3*iend,swaits+i);
1508: }
1509: }
1510: }
1512: /* take care of local scatters */
1513: if (gen_to->local.n) {
1514: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1515: int n = gen_to->local.n,il,ir;
1516: if (addv == INSERT_VALUES) {
1517: if (gen_to->local.is_copy) {
1518: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1519: } else {
1520: for (i=0; i<n; i++) {
1521: il = fslots[i]; ir = tslots[i];
1522: yv[il] = xv[ir];
1523: yv[il+1] = xv[ir+1];
1524: yv[il+2] = xv[ir+2];
1525: yv[il+3] = xv[ir+3];
1526: }
1527: }
1528: } else if (addv == ADD_VALUES) {
1529: for (i=0; i<n; i++) {
1530: il = fslots[i]; ir = tslots[i];
1531: yv[il] += xv[ir];
1532: yv[il+1] += xv[ir+1];
1533: yv[il+2] += xv[ir+2];
1534: yv[il+3] += xv[ir+3];
1535: }
1536: #if !defined(PETSC_USE_COMPLEX)
1537: } else if (addv == MAX_VALUES) {
1538: for (i=0; i<n; i++) {
1539: il = fslots[i]; ir = tslots[i];
1540: yv[il] = PetscMax(yv[il],xv[ir]);
1541: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1542: yv[il+2] = PetscMax(yv[il+2],xv[ir+2]);
1543: yv[il+3] = PetscMax(yv[il+3],xv[ir+3]);
1544: }
1545: #endif
1546: } else {SETERRQ(1,"Wrong insert option");}
1547: }
1548: VecRestoreArray(xin,&xv);
1549: if (xin != yin) {VecRestoreArray(yin,&yv);}
1550: return(0);
1551: }
1553: /* --------------------------------------------------------------------------------------*/
1557: int VecScatterEnd_PtoP_3(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1558: {
1559: VecScatter_MPI_General *gen_to,*gen_from;
1560: PetscScalar *rvalues,*yv,*val;
1561: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1562: MPI_Request *rwaits,*swaits;
1563: MPI_Status rstatus,*sstatus;
1566: if (mode & SCATTER_LOCAL) return(0);
1567: VecGetArray(yin,&yv);
1569: if (mode & SCATTER_REVERSE) {
1570: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1571: gen_from = (VecScatter_MPI_General*)ctx->todata;
1572: rwaits = gen_from->rev_requests;
1573: swaits = gen_to->rev_requests;
1574: sstatus = gen_from->sstatus;
1575: } else {
1576: gen_to = (VecScatter_MPI_General*)ctx->todata;
1577: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1578: rwaits = gen_from->requests;
1579: swaits = gen_to->requests;
1580: sstatus = gen_to->sstatus;
1581: }
1582: rvalues = gen_from->values;
1583: nrecvs = gen_from->n;
1584: nsends = gen_to->n;
1585: indices = gen_from->indices;
1586: rstarts = gen_from->starts;
1588: /* wait on receives */
1589: count = nrecvs;
1590: while (count) {
1591: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1592: /* unpack receives into our local space */
1593: val = rvalues + 3*rstarts[imdex];
1594: lindices = indices + rstarts[imdex];
1595: n = rstarts[imdex+1] - rstarts[imdex];
1596: if (addv == INSERT_VALUES) {
1597: for (i=0; i<n; i++) {
1598: idx = lindices[i];
1599: yv[idx] = val[0];
1600: yv[idx+1] = val[1];
1601: yv[idx+2] = val[2];
1602: val += 3;
1603: }
1604: } else if (addv == ADD_VALUES) {
1605: for (i=0; i<n; i++) {
1606: idx = lindices[i];
1607: yv[idx] += val[0];
1608: yv[idx+1] += val[1];
1609: yv[idx+2] += val[2];
1610: val += 3;
1611: }
1612: #if !defined(PETSC_USE_COMPLEX)
1613: } else if (addv == MAX_VALUES) {
1614: for (i=0; i<n; i++) {
1615: idx = lindices[i];
1616: yv[idx] = PetscMax(yv[idx],val[0]);
1617: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1618: yv[idx+2] = PetscMax(yv[idx+2],val[2]);
1619: val += 3;
1620: }
1621: #endif
1622: } else {SETERRQ(1,"Wrong insert option");}
1623: count--;
1624: }
1625: /* wait on sends */
1626: if (nsends) {
1627: MPI_Waitall(nsends,swaits,sstatus);
1628: }
1629: VecRestoreArray(yin,&yv);
1630: return(0);
1631: }
1633: /* --------------------------------------------------------------------------------------*/
1637: int VecScatterBegin_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1638: {
1639: VecScatter_MPI_General *gen_to,*gen_from;
1640: PetscScalar *xv,*yv,*val,*svalues;
1641: MPI_Request *rwaits,*swaits;
1642: int ierr,i,*indices,*sstarts,iend,j,nrecvs,nsends,idx;
1645: VecGetArray(xin,&xv);
1646: if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}
1647: if (mode & SCATTER_REVERSE) {
1648: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1649: gen_from = (VecScatter_MPI_General*)ctx->todata;
1650: rwaits = gen_from->rev_requests;
1651: swaits = gen_to->rev_requests;
1652: } else {
1653: gen_to = (VecScatter_MPI_General*)ctx->todata;
1654: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1655: rwaits = gen_from->requests;
1656: swaits = gen_to->requests;
1657: }
1658: svalues = gen_to->values;
1659: nrecvs = gen_from->n;
1660: nsends = gen_to->n;
1661: indices = gen_to->indices;
1662: sstarts = gen_to->starts;
1664: if (!(mode & SCATTER_LOCAL)) {
1666: if (gen_to->sendfirst) {
1667: /* this version packs and sends one at a time */
1668: val = svalues;
1669: for (i=0; i<nsends; i++) {
1670: iend = sstarts[i+1]-sstarts[i];
1672: for (j=0; j<iend; j++) {
1673: idx = *indices++;
1674: val[0] = xv[idx];
1675: val[1] = xv[idx+1];
1676: val += 2;
1677: }
1678: MPI_Start_isend(2*iend,swaits+i);
1679: }
1680: }
1682: if (!gen_from->use_readyreceiver) {
1683: /* post receives since they were not posted in VecScatterPostRecvs() */
1684: MPI_Startall_irecv(gen_from->starts[nrecvs]*gen_from->bs,nrecvs,rwaits);
1685: }
1687: if (!gen_to->sendfirst) {
1688: /* this version packs all the messages together and sends */
1689: /*
1690: len = 2*sstarts[nsends];
1691: val = svalues;
1692: for (i=0; i<len; i += 2) {
1693: idx = *indices++;
1694: val[0] = xv[idx];
1695: val[1] = xv[idx+1];
1696: val += 2;
1697: }
1698: MPI_Startall_isend(len,nsends,swaits);
1699: */
1701: /* this version packs and sends one at a time */
1702: val = svalues;
1703: for (i=0; i<nsends; i++) {
1704: iend = sstarts[i+1]-sstarts[i];
1706: for (j=0; j<iend; j++) {
1707: idx = *indices++;
1708: val[0] = xv[idx];
1709: val[1] = xv[idx+1];
1710: val += 2;
1711: }
1712: MPI_Start_isend(2*iend,swaits+i);
1713: }
1714: }
1715: }
1717: /* take care of local scatters */
1718: if (gen_to->local.n) {
1719: int *tslots = gen_to->local.slots,*fslots = gen_from->local.slots;
1720: int n = gen_to->local.n,il,ir;
1721: if (addv == INSERT_VALUES) {
1722: if (gen_to->local.is_copy) {
1723: PetscMemcpy(yv + gen_from->local.copy_start,xv + gen_to->local.copy_start,gen_to->local.copy_length);
1724: } else {
1725: for (i=0; i<n; i++) {
1726: il = fslots[i]; ir = tslots[i];
1727: yv[il] = xv[ir];
1728: yv[il+1] = xv[ir+1];
1729: }
1730: }
1731: } else if (addv == ADD_VALUES) {
1732: for (i=0; i<n; i++) {
1733: il = fslots[i]; ir = tslots[i];
1734: yv[il] += xv[ir];
1735: yv[il+1] += xv[ir+1];
1736: }
1737: #if !defined(PETSC_USE_COMPLEX)
1738: } else if (addv == MAX_VALUES) {
1739: for (i=0; i<n; i++) {
1740: il = fslots[i]; ir = tslots[i];
1741: yv[il] = PetscMax(yv[il],xv[ir]);
1742: yv[il+1] = PetscMax(yv[il+1],xv[ir+1]);
1743: }
1744: #endif
1745: } else {SETERRQ(1,"Wrong insert option");}
1746: }
1747: VecRestoreArray(xin,&xv);
1748: if (xin != yin) {VecRestoreArray(yin,&yv);}
1749: return(0);
1750: }
1752: /* --------------------------------------------------------------------------------------*/
1756: int VecScatterEnd_PtoP_2(Vec xin,Vec yin,InsertMode addv,ScatterMode mode,VecScatter ctx)
1757: {
1758: VecScatter_MPI_General *gen_to,*gen_from;
1759: PetscScalar *rvalues,*yv,*val;
1760: int ierr,nrecvs,nsends,i,*indices,count,imdex,n,*rstarts,*lindices,idx;
1761: MPI_Request *rwaits,*swaits;
1762: MPI_Status rstatus,*sstatus;
1765: if (mode & SCATTER_LOCAL) return(0);
1766: VecGetArray(yin,&yv);
1768: if (mode & SCATTER_REVERSE) {
1769: gen_to = (VecScatter_MPI_General*)ctx->fromdata;
1770: gen_from = (VecScatter_MPI_General*)ctx->todata;
1771: rwaits = gen_from->rev_requests;
1772: swaits = gen_to->rev_requests;
1773: sstatus = gen_from->sstatus;
1774: } else {
1775: gen_to = (VecScatter_MPI_General*)ctx->todata;
1776: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1777: rwaits = gen_from->requests;
1778: swaits = gen_to->requests;
1779: sstatus = gen_to->sstatus;
1780: }
1781: rvalues = gen_from->values;
1782: nrecvs = gen_from->n;
1783: nsends = gen_to->n;
1784: indices = gen_from->indices;
1785: rstarts = gen_from->starts;
1787: /* wait on receives */
1788: count = nrecvs;
1789: while (count) {
1790: MPI_Waitany(nrecvs,rwaits,&imdex,&rstatus);
1791: /* unpack receives into our local space */
1792: val = rvalues + 2*rstarts[imdex];
1793: lindices = indices + rstarts[imdex];
1794: n = rstarts[imdex+1] - rstarts[imdex];
1795: if (addv == INSERT_VALUES) {
1796: for (i=0; i<n; i++) {
1797: idx = lindices[i];
1798: yv[idx] = val[0];
1799: yv[idx+1] = val[1];
1800: val += 2;
1801: }
1802: } else if (addv == ADD_VALUES) {
1803: for (i=0; i<n; i++) {
1804: idx = lindices[i];
1805: yv[idx] += val[0];
1806: yv[idx+1] += val[1];
1807: val += 2;
1808: }
1809: #if !defined(PETSC_USE_COMPLEX)
1810: } else if (addv == MAX_VALUES) {
1811: for (i=0; i<n; i++) {
1812: idx = lindices[i];
1813: yv[idx] = PetscMax(yv[idx],val[0]);
1814: yv[idx+1] = PetscMax(yv[idx+1],val[1]);
1815: val += 2;
1816: }
1817: #endif
1818: } else {SETERRQ(1,"Wrong insert option");}
1819: count--;
1820: }
1821: /* wait on sends */
1822: if (nsends) {
1823: MPI_Waitall(nsends,swaits,sstatus);
1824: }
1825: VecRestoreArray(yin,&yv);
1826: return(0);
1827: }
1829: /* ---------------------------------------------------------------------------------*/
1833: int VecScatterDestroy_PtoP_X(VecScatter ctx)
1834: {
1835: VecScatter_MPI_General *gen_to = (VecScatter_MPI_General*)ctx->todata;
1836: VecScatter_MPI_General *gen_from = (VecScatter_MPI_General*)ctx->fromdata;
1837: int i,ierr;
1840: if (gen_to->use_readyreceiver) {
1841: /*
1842: Since we have already posted sends we must cancel them before freeing
1843: the requests
1844: */
1845: for (i=0; i<gen_from->n; i++) {
1846: MPI_Cancel(gen_from->requests+i);
1847: }
1848: }
1850: if (gen_to->local.slots) {PetscFree(gen_to->local.slots);}
1851: if (gen_from->local.slots) {PetscFree(gen_from->local.slots);}
1852: if (gen_to->local.slots_nonmatching) {PetscFree(gen_to->local.slots_nonmatching);}
1853: if (gen_from->local.slots_nonmatching) {PetscFree(gen_from->local.slots_nonmatching);}
1855: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
1856: /*
1857: IBM's PE version of MPI has a bug where freeing these guys will screw up later
1858: message passing.
1859: */
1860: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
1861: for (i=0; i<gen_to->n; i++) {
1862: MPI_Request_free(gen_to->requests + i);
1863: MPI_Request_free(gen_to->rev_requests + i);
1864: }
1866: /*
1867: MPICH could not properly cancel requests thus with ready receiver mode we
1868: cannot free the requests. It may be fixed now, if not then put the following
1869: code inside a if !gen_to->use_readyreceiver) {
1870: */
1871: for (i=0; i<gen_from->n; i++) {
1872: MPI_Request_free(gen_from->requests + i);
1873: MPI_Request_free(gen_from->rev_requests + i);
1874: }
1875: #endif
1876:
1877: PetscFree(gen_to->sstatus);
1878: PetscFree(gen_to->values);
1879: PetscFree(gen_to->rev_requests);
1880: PetscFree(gen_to);
1881: PetscFree(gen_from->values);
1882: PetscFree(gen_from->rev_requests);
1883: PetscFree(gen_from);
1884: PetscLogObjectDestroy(ctx);
1885: PetscHeaderDestroy(ctx);
1886: return(0);
1887: }
1889: /* ==========================================================================================*/
1891: /* create parallel to sequential scatter context */
1892: /*
1893: bs indicates how many elements there are in each block. Normally
1894: this would be 1.
1895: */
1898: int VecScatterCreate_PtoS(int nx,int *inidx,int ny,int *inidy,Vec xin,Vec yin,int bs,VecScatter ctx)
1899: {
1900: VecScatter_MPI_General *from,*to;
1901: int *source,*lens,rank,*owners;
1902: int size,*lowner,*start,lengthy;
1903: int *nprocs,i,j,n,idx,nsends,nrecvs;
1904: int *owner,*starts,count,tag,slen,ierr;
1905: int *rvalues,*svalues,base,imdex,nmax,*values,len,*indx,nprocslocal;
1906: MPI_Comm comm;
1907: MPI_Request *send_waits,*recv_waits;
1908: MPI_Status recv_status,*send_status;
1909: PetscMap map;
1910: PetscTruth found;
1911:
1913: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1914: PetscObjectGetComm((PetscObject)xin,&comm);
1915: MPI_Comm_rank(comm,&rank);
1916: MPI_Comm_size(comm,&size);
1917: VecGetPetscMap(xin,&map);
1918: PetscMapGetGlobalRange(map,&owners);
1920: VecGetSize(yin,&lengthy);
1922: /* first count number of contributors to each processor */
1923: PetscMalloc(2*size*sizeof(int),&nprocs);
1924: PetscMemzero(nprocs,2*size*sizeof(int));
1925: PetscMalloc((nx+1)*sizeof(int),&owner);
1926: for (i=0; i<nx; i++) {
1927: idx = inidx[i];
1928: found = PETSC_FALSE;
1929: for (j=0; j<size; j++) {
1930: if (idx >= owners[j] && idx < owners[j+1]) {
1931: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1932: }
1933: }
1934: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
1935: }
1936: nprocslocal = nprocs[2*rank];
1937: nprocs[2*rank] = nprocs[2*rank+1] = 0;
1938: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1940: /* inform other processors of number of messages and max length*/
1941: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1943: /* post receives: */
1944: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1945: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1946: for (i=0; i<nrecvs; i++) {
1947: MPI_Irecv((rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1948: }
1950: /* do sends:
1951: 1) starts[i] gives the starting index in svalues for stuff going to
1952: the ith processor
1953: */
1954: PetscMalloc((nx+1)*sizeof(int),&svalues);
1955: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1956: PetscMalloc((size+1)*sizeof(int),&starts);
1957: starts[0] = 0;
1958: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1959: for (i=0; i<nx; i++) {
1960: if (owner[i] != rank) {
1961: svalues[starts[owner[i]]++] = inidx[i];
1962: }
1963: }
1965: starts[0] = 0;
1966: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1967: count = 0;
1968: for (i=0; i<size; i++) {
1969: if (nprocs[2*i+1]) {
1970: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1971: }
1972: }
1973: PetscFree(starts);
1975: base = owners[rank];
1977: /* wait on receives */
1978: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1979: source = lens + nrecvs;
1980: count = nrecvs;
1981: slen = 0;
1982: while (count) {
1983: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1984: /* unpack receives into our local space */
1985: MPI_Get_count(&recv_status,MPI_INT,&n);
1986: source[imdex] = recv_status.MPI_SOURCE;
1987: lens[imdex] = n;
1988: slen += n;
1989: count--;
1990: }
1991: PetscFree(recv_waits);
1992:
1993: /* allocate entire send scatter context */
1994: PetscNew(VecScatter_MPI_General,&to);
1995: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
1996: PetscMemzero(to,sizeof(VecScatter_MPI_General));
1997: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
1998: len = slen*sizeof(int) + bs*slen*sizeof(PetscScalar) + (nrecvs+1)*sizeof(int) +
1999: nrecvs*(sizeof(int) + sizeof(MPI_Request));
2000: to->n = nrecvs;
2001: PetscMalloc(len,&to->values);
2002: PetscLogObjectMemory(ctx,len);
2003: to->requests = (MPI_Request*)(to->values + bs*slen);
2004: to->indices = (int*)(to->requests + nrecvs);
2005: to->starts = (int*)(to->indices + slen);
2006: to->procs = (int*)(to->starts + nrecvs + 1);
2007: PetscMalloc(2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status),&to->sstatus);
2008: to->rstatus = to->sstatus + PetscMax(nrecvs,nsends) + 1;
2009: PetscLogObjectMemory(ctx,2*(PetscMax(nrecvs,nsends)+1)*sizeof(MPI_Status));
2010: ctx->todata = (void*)to;
2011: to->starts[0] = 0;
2013: if (nrecvs) {
2014: PetscMalloc(nrecvs*sizeof(int),&indx);
2015: for (i=0; i<nrecvs; i++) indx[i] = i;
2016: PetscSortIntWithPermutation(nrecvs,source,indx);
2018: /* move the data into the send scatter */
2019: for (i=0; i<nrecvs; i++) {
2020: to->starts[i+1] = to->starts[i] + lens[indx[i]];
2021: to->procs[i] = source[indx[i]];
2022: values = rvalues + indx[i]*nmax;
2023: for (j=0; j<lens[indx[i]]; j++) {
2024: to->indices[to->starts[i] + j] = values[j] - base;
2025: }
2026: }
2027: PetscFree(indx);
2028: }
2029: PetscFree(rvalues);
2030: PetscFree(lens);
2031:
2032: /* allocate entire receive scatter context */
2033: PetscNew(VecScatter_MPI_General,&from);
2034: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2035: PetscMemzero(from,sizeof(VecScatter_MPI_General));
2036: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2037: len = ny*sizeof(int) + ny*bs*sizeof(PetscScalar) + (nsends+1)*sizeof(int) +
2038: nsends*(sizeof(int) + sizeof(MPI_Request));
2039: from->n = nsends;
2040: PetscMalloc(len,&from->values);
2041: PetscLogObjectMemory(ctx,len);
2042: from->requests = (MPI_Request*)(from->values + bs*ny);
2043: from->indices = (int*)(from->requests + nsends);
2044: from->starts = (int*)(from->indices + ny);
2045: from->procs = (int*)(from->starts + nsends + 1);
2046: ctx->fromdata = (void*)from;
2048: /* move data into receive scatter */
2049: PetscMalloc((size+nsends+1)*sizeof(int),&lowner);
2050: start = lowner + size;
2051: count = 0; from->starts[0] = start[0] = 0;
2052: for (i=0; i<size; i++) {
2053: if (nprocs[2*i+1]) {
2054: lowner[i] = count;
2055: from->procs[count++] = i;
2056: from->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2057: }
2058: }
2059: for (i=0; i<nx; i++) {
2060: if (owner[i] != rank) {
2061: from->indices[start[lowner[owner[i]]]++] = inidy[i];
2062: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2063: }
2064: }
2065: PetscFree(lowner);
2066: PetscFree(owner);
2067: PetscFree(nprocs);
2068:
2069: /* wait on sends */
2070: if (nsends) {
2071: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2072: MPI_Waitall(nsends,send_waits,send_status);
2073: PetscFree(send_status);
2074: }
2075: PetscFree(send_waits);
2076: PetscFree(svalues);
2078: if (nprocslocal) {
2079: int nt;
2080: /* we have a scatter to ourselves */
2081: from->local.n = to->local.n = nt = nprocslocal;
2082: PetscMalloc(nt*sizeof(int),&from->local.slots);
2083: PetscMalloc(nt*sizeof(int),&to->local.slots);
2084: PetscLogObjectMemory(ctx,2*nt*sizeof(int));
2085: nt = 0;
2086: for (i=0; i<nx; i++) {
2087: idx = inidx[i];
2088: if (idx >= owners[rank] && idx < owners[rank+1]) {
2089: to->local.slots[nt] = idx - owners[rank];
2090: from->local.slots[nt++] = inidy[i];
2091: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2092: }
2093: }
2094: } else {
2095: from->local.n = 0;
2096: from->local.slots = 0;
2097: to->local.n = 0;
2098: to->local.slots = 0;
2099: }
2100: from->local.nonmatching_computed = PETSC_FALSE;
2101: from->local.n_nonmatching = 0;
2102: from->local.slots_nonmatching = 0;
2103: to->local.nonmatching_computed = PETSC_FALSE;
2104: to->local.n_nonmatching = 0;
2105: to->local.slots_nonmatching = 0;
2107: to->type = VEC_SCATTER_MPI_GENERAL;
2108: from->type = VEC_SCATTER_MPI_GENERAL;
2110: from->bs = bs;
2111: to->bs = bs;
2112: if (bs > 1) {
2113: PetscTruth flg,flgs = PETSC_FALSE;
2114: int *sstarts = to->starts, *rstarts = from->starts;
2115: int *sprocs = to->procs, *rprocs = from->procs;
2116: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2117: MPI_Request *rev_swaits,*rev_rwaits;
2118: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2120: ctx->destroy = VecScatterDestroy_PtoP_X;
2121: ctx->copy = VecScatterCopy_PtoP_X;
2122: ctx->view = VecScatterView_MPI;
2123:
2124: tag = ctx->tag;
2125: comm = ctx->comm;
2127: /* allocate additional wait variables for the "reverse" scatter */
2128: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&rev_rwaits);
2129: to->rev_requests = rev_rwaits;
2130: PetscMalloc((nsends+1)*sizeof(MPI_Request),&rev_swaits);
2131: from->rev_requests = rev_swaits;
2132: PetscLogObjectMemory(ctx,(nsends+nrecvs+2)*sizeof(MPI_Request));
2134: /* Register the receives that you will use later (sends for scatter reverse) */
2135: PetscOptionsHasName(PETSC_NULL,"-vecscatter_ssend",&flgs);
2136: if (flgs) {
2137: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter Ssend mode\n");
2138: }
2139: for (i=0; i<from->n; i++) {
2140: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2141: comm,rwaits+i);
2142: if (!flgs) {
2143: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2144: comm,rev_swaits+i);
2145: } else {
2146: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,
2147: comm,rev_swaits+i);
2148: }
2149: }
2151: PetscOptionsHasName(PETSC_NULL,"-vecscatter_rr",&flg);
2152: if (flg) {
2153: ctx->postrecvs = VecScatterPostRecvs_PtoP_X;
2154: to->use_readyreceiver = PETSC_TRUE;
2155: from->use_readyreceiver = PETSC_TRUE;
2156: for (i=0; i<to->n; i++) {
2157: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2158: comm,swaits+i);
2159: }
2160: PetscLogInfo(0,"VecScatterCreate_PtoS:Using VecScatter ready receiver mode\n");
2161: } else {
2162: ctx->postrecvs = 0;
2163: to->use_readyreceiver = PETSC_FALSE;
2164: from->use_readyreceiver = PETSC_FALSE;
2165: for (i=0; i<to->n; i++) {
2166: if (!flgs) {
2167: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2168: comm,swaits+i);
2169: } else {
2170: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2171: comm,swaits+i);
2172: }
2173: }
2174: }
2175: /* Register receives for scatter reverse */
2176: for (i=0; i<to->n; i++) {
2177: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,
2178: comm,rev_rwaits+i);
2179: }
2181: PetscLogInfo(0,"VecScatterCreate_PtoS:Using blocksize %d scatter\n",bs);
2182: switch (bs) {
2183: case 12:
2184: ctx->begin = VecScatterBegin_PtoP_12;
2185: ctx->end = VecScatterEnd_PtoP_12;
2186: break;
2187: case 5:
2188: ctx->begin = VecScatterBegin_PtoP_5;
2189: ctx->end = VecScatterEnd_PtoP_5;
2190: break;
2191: case 4:
2192: ctx->begin = VecScatterBegin_PtoP_4;
2193: ctx->end = VecScatterEnd_PtoP_4;
2194: break;
2195: case 3:
2196: ctx->begin = VecScatterBegin_PtoP_3;
2197: ctx->end = VecScatterEnd_PtoP_3;
2198: break;
2199: case 2:
2200: ctx->begin = VecScatterBegin_PtoP_2;
2201: ctx->end = VecScatterEnd_PtoP_2;
2202: break;
2203: default:
2204: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2205: }
2206: } else {
2207: ctx->postrecvs = 0;
2208: ctx->destroy = VecScatterDestroy_PtoP;
2209: ctx->begin = VecScatterBegin_PtoP;
2210: ctx->end = VecScatterEnd_PtoP;
2211: ctx->copy = VecScatterCopy_PtoP;
2212: ctx->view = VecScatterView_MPI;
2213: }
2215: /* Check if the local scatter is actually a copy; important special case */
2216: if (nprocslocal) {
2217: VecScatterLocalOptimizeCopy_Private(&to->local,&from->local,bs);
2218: }
2220: return(0);
2221: }
2223: /* ------------------------------------------------------------------------------------*/
2224: /*
2225: Scatter from local Seq vectors to a parallel vector.
2226: */
2229: int VecScatterCreate_StoP(int nx,int *inidx,int ny,int *inidy,Vec yin,VecScatter ctx)
2230: {
2231: VecScatter_MPI_General *from,*to;
2232: int *source,nprocslocal,*lens,rank = yin->stash.rank,*owners = yin->map->range;
2233: int ierr,size = yin->stash.size,*lowner,*start;
2234: int *nprocs,i,j,n,idx,nsends,nrecvs;
2235: int *owner,*starts,count,tag,slen;
2236: int *rvalues,*svalues,base,imdex,nmax,*values,len;
2237: PetscTruth found;
2238: MPI_Comm comm = yin->comm;
2239: MPI_Request *send_waits,*recv_waits;
2240: MPI_Status recv_status,*send_status;
2243: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2244: /* first count number of contributors to each processor */
2245: PetscMalloc(2*size*sizeof(int),&nprocs);
2246: PetscMemzero(nprocs,2*size*sizeof(int));
2247: PetscMalloc((nx+1)*sizeof(int),&owner);
2248: for (i=0; i<nx; i++) {
2249: idx = inidy[i];
2250: found = PETSC_FALSE;
2251: for (j=0; j<size; j++) {
2252: if (idx >= owners[j] && idx < owners[j+1]) {
2253: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2254: }
2255: }
2256: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
2257: }
2258: nprocslocal = nprocs[2*rank];
2259: nprocs[2*rank] = nprocs[2*rank+1] = 0;
2260: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2262: /* inform other processors of number of messages and max length*/
2263: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2265: /* post receives: */
2266: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
2267: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
2268: for (i=0; i<nrecvs; i++) {
2269: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2270: }
2272: /* do sends:
2273: 1) starts[i] gives the starting index in svalues for stuff going to
2274: the ith processor
2275: */
2276: PetscMalloc((nx+1)*sizeof(int),&svalues);
2277: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
2278: PetscMalloc((size+1)*sizeof(int),&starts);
2279: starts[0] = 0;
2280: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2281: for (i=0; i<nx; i++) {
2282: if (owner[i] != rank) {
2283: svalues[starts[owner[i]]++] = inidy[i];
2284: }
2285: }
2287: starts[0] = 0;
2288: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2289: count = 0;
2290: for (i=0; i<size; i++) {
2291: if (nprocs[2*i+1]) {
2292: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count);
2293: count++;
2294: }
2295: }
2296: PetscFree(starts);
2298: /* allocate entire send scatter context */
2299: PetscNew(VecScatter_MPI_General,&to);
2300: PetscMemzero(to,sizeof(VecScatter_MPI_General));
2301: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2302: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst);
2303: len = ny*(sizeof(int) + sizeof(PetscScalar)) + (nsends+1)*sizeof(int) +
2304: nsends*(sizeof(int) + sizeof(MPI_Request));
2305: to->n = nsends;
2306: PetscMalloc(len,&to->values);
2307: PetscLogObjectMemory(ctx,len);
2308: to->requests = (MPI_Request*)(to->values + ny);
2309: to->indices = (int*)(to->requests + nsends);
2310: to->starts = (int*)(to->indices + ny);
2311: to->procs = (int*)(to->starts + nsends + 1);
2312: PetscMalloc((PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status),&to->sstatus);
2313: to->rstatus = to->sstatus + PetscMax(nsends,nrecvs) + 1;
2314: PetscLogObjectMemory(ctx,2*(PetscMax(nsends,nrecvs) + 1)*sizeof(MPI_Status));
2315: ctx->todata = (void*)to;
2317: /* move data into send scatter context */
2318: PetscMalloc((size+nsends+1)*sizeof(int),&lowner);
2319: start = lowner + size;
2320: count = 0;
2321: to->starts[0] = start[0] = 0;
2322: for (i=0; i<size; i++) {
2323: if (nprocs[2*i+1]) {
2324: lowner[i] = count;
2325: to->procs[count++] = i;
2326: to->starts[count] = start[count] = start[count-1] + nprocs[2*i];
2327: }
2328: }
2329: for (i=0; i<nx; i++) {
2330: if (owner[i] != rank) {
2331: to->indices[start[lowner[owner[i]]]++] = inidx[i];
2332: }
2333: }
2334: PetscFree(lowner);
2335: PetscFree(owner);
2336: PetscFree(nprocs);
2338: base = owners[rank];
2340: /* wait on receives */
2341: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
2342: source = lens + nrecvs;
2343: count = nrecvs; slen = 0;
2344: while (count) {
2345: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2346: /* unpack receives into our local space */
2347: MPI_Get_count(&recv_status,MPI_INT,&n);
2348: source[imdex] = recv_status.MPI_SOURCE;
2349: lens[imdex] = n;
2350: slen += n;
2351: count--;
2352: }
2353: PetscFree(recv_waits);
2354:
2355: /* allocate entire receive scatter context */
2356: PetscNew(VecScatter_MPI_General,&from);
2357: PetscMemzero(from,sizeof(VecScatter_MPI_General));
2358: PetscLogObjectMemory(ctx,sizeof(VecScatter_MPI_General));
2359: PetscOptionsHasName(PETSC_NULL,"-vecscatter_sendfirst",&from->sendfirst);
2360: len = slen*(sizeof(int) + sizeof(PetscScalar)) + (nrecvs+1)*sizeof(int) +
2361: nrecvs*(sizeof(int) + sizeof(MPI_Request));
2362: from->n = nrecvs;
2363: PetscMalloc(len,&from->values);
2364: PetscLogObjectMemory(ctx,len);
2365: from->requests = (MPI_Request*)(from->values + slen);
2366: from->indices = (int*)(from->requests + nrecvs);
2367: from->starts = (int*)(from->indices + slen);
2368: from->procs = (int*)(from->starts + nrecvs + 1);
2369: ctx->fromdata = (void*)from;
2371: /* move the data into the receive scatter context*/
2372: from->starts[0] = 0;
2373: for (i=0; i<nrecvs; i++) {
2374: from->starts[i+1] = from->starts[i] + lens[i];
2375: from->procs[i] = source[i];
2376: values = rvalues + i*nmax;
2377: for (j=0; j<lens[i]; j++) {
2378: from->indices[from->starts[i] + j] = values[j] - base;
2379: }
2380: }
2381: PetscFree(rvalues);
2382: PetscFree(lens);
2383:
2384: /* wait on sends */
2385: if (nsends) {
2386: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2387: MPI_Waitall(nsends,send_waits,send_status);
2388: PetscFree(send_status);
2389: }
2390: PetscFree(send_waits);
2391: PetscFree(svalues);
2393: if (nprocslocal) {
2394: int nt;
2395: /* we have a scatter to ourselves */
2396: from->local.n = to->local.n = nt = nprocslocal;
2397: PetscMalloc(nt*sizeof(int),&from->local.slots);
2398: PetscMalloc(nt*sizeof(int),&to->local.slots);
2399: PetscLogObjectMemory(ctx,2*nt*sizeof(int));
2400: nt = 0;
2401: for (i=0; i<ny; i++) {
2402: idx = inidy[i];
2403: if (idx >= owners[rank] && idx < owners[rank+1]) {
2404: from->local.slots[nt] = idx - owners[rank];
2405: to->local.slots[nt++] = inidx[i];
2406: }
2407: }
2408: } else {
2409: from->local.n = 0;
2410: from->local.slots = 0;
2411: to->local.n = 0;
2412: to->local.slots = 0;
2414: }
2415: from->local.nonmatching_computed = PETSC_FALSE;
2416: from->local.n_nonmatching = 0;
2417: from->local.slots_nonmatching = 0;
2418: to->local.nonmatching_computed = PETSC_FALSE;
2419: to->local.n_nonmatching = 0;
2420: to->local.slots_nonmatching = 0;
2422: to->type = VEC_SCATTER_MPI_GENERAL;
2423: from->type = VEC_SCATTER_MPI_GENERAL;
2425: ctx->destroy = VecScatterDestroy_PtoP;
2426: ctx->postrecvs = 0;
2427: ctx->begin = VecScatterBegin_PtoP;
2428: ctx->end = VecScatterEnd_PtoP;
2429: ctx->copy = 0;
2430: ctx->view = VecScatterView_MPI;
2432: to->bs = 1;
2433: from->bs = 1;
2434: return(0);
2435: }
2437: /* ---------------------------------------------------------------------------------*/
2440: int VecScatterCreate_PtoP(int nx,int *inidx,int ny,int *inidy,Vec xin,Vec yin,VecScatter ctx)
2441: {
2442: int *lens,rank,*owners = xin->map->range,size;
2443: int *nprocs,i,j,n,idx,nsends,nrecvs,*local_inidx,*local_inidy;
2444: int *owner,*starts,count,tag,slen,ierr;
2445: int *rvalues,*svalues,base,imdex,nmax,*values;
2446: MPI_Comm comm;
2447: MPI_Request *send_waits,*recv_waits;
2448: MPI_Status recv_status;
2449: PetscTruth duplicate = PETSC_FALSE,found;
2452: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2453: PetscObjectGetComm((PetscObject)xin,&comm);
2454: MPI_Comm_size(comm,&size);
2455: MPI_Comm_rank(comm,&rank);
2456: if (size == 1) {
2457: VecScatterCreate_StoP(nx,inidx,ny,inidy,yin,ctx);
2458: return(0);
2459: }
2461: /*
2462: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2463: They then call the StoPScatterCreate()
2464: */
2465: /* first count number of contributors to each processor */
2466: PetscMalloc(2*size*sizeof(int),&nprocs);
2467: PetscMemzero(nprocs,2*size*sizeof(int));
2468: PetscMalloc((nx+1)*sizeof(int),&owner);
2469: for (i=0; i<nx; i++) {
2470: idx = inidx[i];
2471: found = PETSC_FALSE;
2472: for (j=0; j<size; j++) {
2473: if (idx >= owners[j] && idx < owners[j+1]) {
2474: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
2475: }
2476: }
2477: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %d out of range",idx);
2478: }
2479: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
2481: /* inform other processors of number of messages and max length*/
2482: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
2484: /* post receives: */
2485: PetscMalloc(2*(nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
2486: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
2487: for (i=0; i<nrecvs; i++) {
2488: MPI_Irecv(rvalues+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
2489: }
2491: /* do sends:
2492: 1) starts[i] gives the starting index in svalues for stuff going to
2493: the ith processor
2494: */
2495: PetscMalloc(2*(nx+1)*sizeof(int),&svalues);
2496: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
2497: PetscMalloc((size+1)*sizeof(int),&starts);
2498: starts[0] = 0;
2499: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2500: for (i=0; i<nx; i++) {
2501: svalues[2*starts[owner[i]]] = inidx[i];
2502: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2503: }
2504: PetscFree(owner);
2506: starts[0] = 0;
2507: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
2508: count = 0;
2509: for (i=0; i<size; i++) {
2510: if (nprocs[2*i+1]) {
2511: MPI_Isend(svalues+2*starts[i],2*nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count);
2512: count++;
2513: }
2514: }
2515: PetscFree(starts);
2516: PetscFree(nprocs);
2518: base = owners[rank];
2520: /* wait on receives */
2521: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
2522: count = nrecvs;
2523: slen = 0;
2524: while (count) {
2525: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2526: /* unpack receives into our local space */
2527: MPI_Get_count(&recv_status,MPI_INT,&n);
2528: lens[imdex] = n/2;
2529: slen += n/2;
2530: count--;
2531: }
2532: PetscFree(recv_waits);
2533:
2534: PetscMalloc(2*(slen+1)*sizeof(int),&local_inidx);
2535: local_inidy = local_inidx + slen;
2537: count = 0;
2538: for (i=0; i<nrecvs; i++) {
2539: values = rvalues + 2*i*nmax;
2540: for (j=0; j<lens[i]; j++) {
2541: local_inidx[count] = values[2*j] - base;
2542: local_inidy[count++] = values[2*j+1];
2543: }
2544: }
2545: PetscFree(rvalues);
2546: PetscFree(lens);
2548: /* wait on sends */
2549: if (nsends) {
2550: MPI_Status *send_status;
2551: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
2552: MPI_Waitall(nsends,send_waits,send_status);
2553: PetscFree(send_status);
2554: }
2555: PetscFree(send_waits);
2556: PetscFree(svalues);
2558: /*
2559: should sort and remove duplicates from local_inidx,local_inidy
2560: */
2562: #if defined(do_it_slow)
2563: /* sort on the from index */
2564: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2565: start = 0;
2566: while (start < slen) {
2567: count = start+1;
2568: last = local_inidx[start];
2569: while (count < slen && last == local_inidx[count]) count++;
2570: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2571: /* sort on to index */
2572: PetscSortInt(count-start,local_inidy+start);
2573: }
2574: /* remove duplicates; not most efficient way, but probably good enough */
2575: i = start;
2576: while (i < count-1) {
2577: if (local_inidy[i] != local_inidy[i+1]) {
2578: i++;
2579: } else { /* found a duplicate */
2580: duplicate = PETSC_TRUE;
2581: for (j=i; j<slen-1; j++) {
2582: local_inidx[j] = local_inidx[j+1];
2583: local_inidy[j] = local_inidy[j+1];
2584: }
2585: slen--;
2586: count--;
2587: /* printf("found dup %d %d\n",local_inidx[i],local_inidy[i]);*/
2588: }
2589: }
2590: start = count;
2591: }
2592: #endif
2593: if (duplicate) {
2594: PetscLogInfo(0,"VecScatterCreate_PtoP:Duplicate to from indices passed in VecScatterCreate(), they are ignored\n");
2595: }
2596: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,yin,ctx);
2597: PetscFree(local_inidx);
2598: return(0);
2599: }