Actual source code: vecstash.c
1: /*$Id: vecstash.c,v 1.29 2001/09/07 20:08:55 bsmith Exp $*/
3: #include vecimpl.h
5: #define DEFAULT_STASH_SIZE 100
7: /*
8: VecStashCreate_Private - Creates a stash,currently used for all the parallel
9: matrix implementations. The stash is where elements of a matrix destined
10: to be stored on other processors are kept until matrix assembly is done.
12: This is a simple minded stash. Simply adds entries to end of stash.
14: Input Parameters:
15: comm - communicator, required for scatters.
16: bs - stash block size. used when stashing blocks of values
18: Output Parameters:
19: stash - the newly created stash
20: */
23: int VecStashCreate_Private(MPI_Comm comm,int bs,VecStash *stash)
24: {
25: int ierr,max,*opt,nopt;
26: PetscTruth flg;
29: /* Require 2 tags, get the second using PetscCommGetNewTag() */
30: stash->comm = comm;
31: PetscCommGetNewTag(stash->comm,&stash->tag1);
32: PetscCommGetNewTag(stash->comm,&stash->tag2);
33: MPI_Comm_size(stash->comm,&stash->size);
34: MPI_Comm_rank(stash->comm,&stash->rank);
36: nopt = stash->size;
37: PetscMalloc(nopt*sizeof(int),&opt);
38: PetscOptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);
39: if (flg) {
40: if (nopt == 1) max = opt[0];
41: else if (nopt == stash->size) max = opt[stash->rank];
42: else if (stash->rank < nopt) max = opt[stash->rank];
43: else max = 0; /* use default */
44: stash->umax = max;
45: } else {
46: stash->umax = 0;
47: }
48: PetscFree(opt);
50: if (bs <= 0) bs = 1;
52: stash->bs = bs;
53: stash->nmax = 0;
54: stash->oldnmax = 0;
55: stash->n = 0;
56: stash->reallocs = -1;
57: stash->idx = 0;
58: stash->array = 0;
60: stash->send_waits = 0;
61: stash->recv_waits = 0;
62: stash->send_status = 0;
63: stash->nsends = 0;
64: stash->nrecvs = 0;
65: stash->svalues = 0;
66: stash->rvalues = 0;
67: stash->rmax = 0;
68: stash->nprocs = 0;
69: stash->nprocessed = 0;
70: stash->donotstash = PETSC_FALSE;
71: return(0);
72: }
74: /*
75: VecStashDestroy_Private - Destroy the stash
76: */
79: int VecStashDestroy_Private(VecStash *stash)
80: {
84: if (stash->array) {
85: PetscFree(stash->array);
86: stash->array = 0;
87: }
88: if (stash->bowners) {
89: PetscFree(stash->bowners);
90: }
91: return(0);
92: }
94: /*
95: VecStashScatterEnd_Private - This is called as the fial stage of
96: scatter. The final stages of message passing is done here, and
97: all the memory used for message passing is cleanedu up. This
98: routine also resets the stash, and deallocates the memory used
99: for the stash. It also keeps track of the current memory usage
100: so that the same value can be used the next time through.
101: */
104: int VecStashScatterEnd_Private(VecStash *stash)
105: {
106: int nsends=stash->nsends,ierr,oldnmax;
107: MPI_Status *send_status;
110: /* wait on sends */
111: if (nsends) {
112: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
113: MPI_Waitall(2*nsends,stash->send_waits,send_status);
114: PetscFree(send_status);
115: }
117: /* Now update nmaxold to be app 10% more than max n, this way the
118: wastage of space is reduced the next time this stash is used.
119: Also update the oldmax, only if it increases */
120: if (stash->n) {
121: oldnmax = ((int)(stash->n * 1.1) + 5)*stash->bs;
122: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
123: }
125: stash->nmax = 0;
126: stash->n = 0;
127: stash->reallocs = -1;
128: stash->rmax = 0;
129: stash->nprocessed = 0;
131: if (stash->array) {
132: PetscFree(stash->array);
133: stash->array = 0;
134: stash->idx = 0;
135: }
136: if (stash->send_waits) {
137: PetscFree(stash->send_waits);
138: stash->send_waits = 0;
139: }
140: if (stash->recv_waits) {
141: PetscFree(stash->recv_waits);
142: stash->recv_waits = 0;
143: }
144: if (stash->svalues) {
145: PetscFree(stash->svalues);
146: stash->svalues = 0;
147: }
148: if (stash->rvalues) {
149: PetscFree(stash->rvalues);
150: stash->rvalues = 0;
151: }
152: if (stash->nprocs) {
153: PetscFree(stash->nprocs);
154: stash->nprocs = 0;
155: }
157: return(0);
158: }
160: /*
161: VecStashGetInfo_Private - Gets the relavant statistics of the stash
163: Input Parameters:
164: stash - the stash
165: nstash - the size of the stash
166: reallocs - the number of additional mallocs incurred.
167:
168: */
171: int VecStashGetInfo_Private(VecStash *stash,int *nstash,int *reallocs)
172: {
175: *nstash = stash->n*stash->bs;
176: if (stash->reallocs < 0) *reallocs = 0;
177: else *reallocs = stash->reallocs;
179: return(0);
180: }
183: /*
184: VecStashSetInitialSize_Private - Sets the initial size of the stash
186: Input Parameters:
187: stash - the stash
188: max - the value that is used as the max size of the stash.
189: this value is used while allocating memory. It specifies
190: the number of vals stored, even with the block-stash
191: */
194: int VecStashSetInitialSize_Private(VecStash *stash,int max)
195: {
197: stash->umax = max;
198: return(0);
199: }
201: /* VecStashExpand_Private - Expand the stash. This function is called
202: when the space in the stash is not sufficient to add the new values
203: being inserted into the stash.
204:
205: Input Parameters:
206: stash - the stash
207: incr - the minimum increase requested
208:
209: Notes:
210: This routine doubles the currently used memory.
211: */
214: int VecStashExpand_Private(VecStash *stash,int incr)
215: {
216: int *n_idx,newnmax,bs=stash->bs,ierr;
217: PetscScalar *n_array;
220: /* allocate a larger stash. */
221: if (!stash->oldnmax && !stash->nmax) { /* new stash */
222: if (stash->umax) newnmax = stash->umax/bs;
223: else newnmax = DEFAULT_STASH_SIZE/bs;
224: } else if (!stash->nmax) { /* resuing stash */
225: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
226: else newnmax = stash->oldnmax/bs;
227: } else newnmax = stash->nmax*2;
229: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
231: PetscMalloc((newnmax)*(sizeof(int)+bs*sizeof(PetscScalar)),&n_array);
232: n_idx = (int*)(n_array + bs*newnmax);
233: PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
234: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
235: if (stash->array) {PetscFree(stash->array);}
236: stash->array = n_array;
237: stash->idx = n_idx;
238: stash->nmax = newnmax;
239: stash->reallocs++;
240: return(0);
241: }
242: /*
243: VecStashScatterBegin_Private - Initiates the transfer of values to the
244: correct owners. This function goes through the stash, and check the
245: owners of each stashed value, and sends the values off to the owner
246: processors.
248: Input Parameters:
249: stash - the stash
250: owners - an array of size 'no-of-procs' which gives the ownership range
251: for each node.
253: Notes: The 'owners' array in the cased of the blocked-stash has the
254: ranges specified blocked global indices, and for the regular stash in
255: the proper global indices.
256: */
259: int VecStashScatterBegin_Private(VecStash *stash,int *owners)
260: {
261: int *owner,*start,tag1=stash->tag1,tag2=stash->tag2;
262: int size=stash->size,*nprocs,nsends,nreceives;
263: int nmax,count,ierr,*sindices,*rindices,i,j,idx,bs=stash->bs;
264: PetscScalar *rvalues,*svalues;
265: MPI_Comm comm = stash->comm;
266: MPI_Request *send_waits,*recv_waits;
270: /* first count number of contributors to each processor */
271: PetscMalloc(2*size*sizeof(int),&nprocs);
272: PetscMemzero(nprocs,2*size*sizeof(int));
273: PetscMalloc((stash->n+1)*sizeof(int),&owner);
275: for (i=0; i<stash->n; i++) {
276: idx = stash->idx[i];
277: for (j=0; j<size; j++) {
278: if (idx >= owners[j] && idx < owners[j+1]) {
279: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
280: }
281: }
282: }
283: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
284:
285: /* inform other processors of number of messages and max length*/
286: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
288: /* post receives:
289: since we don't know how long each individual message is we
290: allocate the largest needed buffer for each receive. Potentially
291: this is a lot of wasted space.
292: */
293: PetscMalloc((nreceives+1)*(nmax+1)*(bs*sizeof(PetscScalar)+sizeof(int)),&rvalues);
294: rindices = (int*)(rvalues + bs*nreceives*nmax);
295: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
296: for (i=0,count=0; i<nreceives; i++) {
297: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
298: MPI_Irecv(rindices+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
299: }
301: /* do sends:
302: 1) starts[i] gives the starting index in svalues for stuff going to
303: the ith processor
304: */
305: PetscMalloc((stash->n+1)*(bs*sizeof(PetscScalar)+sizeof(int)),&svalues);
306: sindices = (int*)(svalues + bs*stash->n);
307: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
308: PetscMalloc(size*sizeof(int),&start);
309: /* use 2 sends the first with all_v, the next with all_i */
310: start[0] = 0;
311: for (i=1; i<size; i++) {
312: start[i] = start[i-1] + nprocs[2*i-2];
313: }
314: for (i=0; i<stash->n; i++) {
315: j = owner[i];
316: if (bs == 1) {
317: svalues[start[j]] = stash->array[i];
318: } else {
319: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
320: }
321: sindices[start[j]] = stash->idx[i];
322: start[j]++;
323: }
324: start[0] = 0;
325: for (i=1; i<size; i++) { start[i] = start[i-1] + nprocs[2*i-2];}
326: for (i=0,count=0; i<size; i++) {
327: if (nprocs[2*i+1]) {
328: MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
329: MPI_Isend(sindices+start[i],nprocs[2*i],MPI_INT,i,tag2,comm,send_waits+count++);
330: }
331: }
332: PetscFree(owner);
333: PetscFree(start);
334: /* This memory is reused in scatter end for a different purpose*/
335: for (i=0; i<2*size; i++) nprocs[i] = -1;
336: stash->nprocs = nprocs;
338: stash->svalues = svalues; stash->rvalues = rvalues;
339: stash->nsends = nsends; stash->nrecvs = nreceives;
340: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
341: stash->rmax = nmax;
342: return(0);
343: }
345: /*
346: VecStashScatterGetMesg_Private - This function waits on the receives posted
347: in the function VecStashScatterBegin_Private() and returns one message at
348: a time to the calling function. If no messages are left, it indicates this
349: by setting flg = 0, else it sets flg = 1.
351: Input Parameters:
352: stash - the stash
354: Output Parameters:
355: nvals - the number of entries in the current message.
356: rows - an array of row indices (or blocked indices) corresponding to the values
357: cols - an array of columnindices (or blocked indices) corresponding to the values
358: vals - the values
359: flg - 0 indicates no more message left, and the current call has no values associated.
360: 1 indicates that the current call successfully received a message, and the
361: other output parameters nvals,rows,cols,vals are set appropriately.
362: */
365: int VecStashScatterGetMesg_Private(VecStash *stash,int *nvals,int **rows,PetscScalar **vals,int *flg)
366: {
367: int i,ierr,*flg_v;
368: int i1,i2,*rindices,bs=stash->bs;
369: MPI_Status recv_status;
370: PetscTruth match_found = PETSC_FALSE;
374: *flg = 0; /* When a message is discovered this is reset to 1 */
375: /* Return if no more messages to process */
376: if (stash->nprocessed == stash->nrecvs) { return(0); }
378: flg_v = stash->nprocs;
379: /* If a matching pair of receieves are found, process them, and return the data to
380: the calling function. Until then keep receiving messages */
381: while (!match_found) {
382: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
383: /* Now pack the received message into a structure which is useable by others */
384: if (i % 2) {
385: MPI_Get_count(&recv_status,MPI_INT,nvals);
386: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
387: } else {
388: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
389: flg_v[2*recv_status.MPI_SOURCE] = i/2;
390: *nvals = *nvals/bs;
391: }
392:
393: /* Check if we have both the messages from this proc */
394: i1 = flg_v[2*recv_status.MPI_SOURCE];
395: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
396: if (i1 != -1 && i2 != -1) {
397: rindices = (int*)(stash->rvalues + bs*stash->rmax*stash->nrecvs);
398: *rows = rindices + i2*stash->rmax;
399: *vals = stash->rvalues + i1*bs*stash->rmax;
400: *flg = 1;
401: stash->nprocessed ++;
402: match_found = PETSC_TRUE;
403: }
404: }
405: return(0);
406: }