Actual source code: comm.c
1: /*$Id: comm.c,v 1.3 2001/04/12 14:33:53 balay Exp $*/
2: /***********************************comm.c*************************************
3: SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
5: Author: Henry M. Tufo III
7: e-mail: hmt@cs.brown.edu
9: snail-mail:
10: Division of Applied Mathematics
11: Brown University
12: Providence, RI 02912
14: Last Modification:
15: 11.21.97
16: ***********************************comm.c*************************************/
18: /***********************************comm.c*************************************
19: File Description:
20: -----------------
22: ***********************************comm.c*************************************/
23: #include <stdio.h>
25: #if defined NXSRC
26: #ifndef DELTA
27: #include <nx.h>
28: #endif
30: #elif defined MPISRC
31: #include <mpi.h>
33: #endif
36: #include const.h
37: #include types.h
38: #include error.h
39: #include ivec.h
40: #include "bss_malloc.h"
41: #include comm.h
44: /* global program control variables - explicitly exported */
45: int my_id = 0;
46: int num_nodes = 1;
47: int floor_num_nodes = 0;
48: int i_log2_num_nodes = 0;
50: /* global program control variables */
51: static int p_init = 0;
52: static int modfl_num_nodes;
53: static int edge_not_pow_2;
55: static unsigned int edge_node[INT_LEN*32];
57: static void sgl_iadd(int *vals, int level);
58: static void sgl_radd(double *vals, int level);
59: static void hmt_concat(REAL *vals, REAL *work, int size);
62: /***********************************comm.c*************************************
63: Function: giop()
65: Input :
66: Output:
67: Return:
68: Description:
69: ***********************************comm.c*************************************/
70: void
71: comm_init (void)
72: {
73: #ifdef DEBUG
74: error_msg_warning("c_init() :: start\n");
75: #endif
77: if (p_init++) return;
79: #if PETSC_SIZEOF_INT != 4
80: error_msg_warning("Int != 4 Bytes!");
81: #endif
84: MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
85: MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
87: if (num_nodes> (INT_MAX >> 1))
88: {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
90: ivec_zero((int *)edge_node,INT_LEN*32);
92: floor_num_nodes = 1;
93: i_log2_num_nodes = modfl_num_nodes = 0;
94: while (floor_num_nodes <= num_nodes)
95: {
96: edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
97: floor_num_nodes <<= 1;
98: i_log2_num_nodes++;
99: }
101: i_log2_num_nodes--;
102: floor_num_nodes >>= 1;
103: modfl_num_nodes = (num_nodes - floor_num_nodes);
105: if ((my_id > 0) && (my_id <= modfl_num_nodes))
106: {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
107: else if (my_id >= floor_num_nodes)
108: {edge_not_pow_2=((my_id^floor_num_nodes)+1);
109: }
110: else
111: {edge_not_pow_2 = 0;}
113: #ifdef DEBUG
114: error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
115: #endif
116: }
120: /***********************************comm.c*************************************
121: Function: giop()
123: Input :
124: Output:
125: Return:
126: Description: fan-in/out version
127: ***********************************comm.c*************************************/
128: void
129: giop(int *vals, int *work, int n, int *oprs)
130: {
131: register int mask, edge;
132: int type, dest;
133: vfp fp;
134: #if defined MPISRC
135: MPI_Status status;
136: #elif defined NXSRC
137: int len;
138: #endif
141: #ifdef SAFE
142: /* ok ... should have some data, work, and operator(s) */
143: if (!vals||!work||!oprs)
144: {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
146: /* non-uniform should have at least two entries */
147: if ((oprs[0] == NON_UNIFORM)&&(n<2))
148: {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
150: /* check to make sure comm package has been initialized */
151: if (!p_init)
152: {comm_init();}
153: #endif
155: /* if there's nothing to do return */
156: if ((num_nodes<2)||(!n))
157: {
158: #ifdef DEBUG
159: error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
160: #endif
161: return;
162: }
164: /* a negative number if items to send ==> fatal */
165: if (n<0)
166: {error_msg_fatal("giop() :: n=%d<0?",n);}
168: /* advance to list of n operations for custom */
169: if ((type=oprs[0])==NON_UNIFORM)
170: {oprs++;}
172: /* major league hack */
173: if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
174: {
175: error_msg_warning("giop() :: hope you passed in a rbfp!\n");
176: fp = (vfp) oprs;
177: }
179: #if defined NXSRC
180: /* all msgs will be of the same length */
181: len = n*INT_LEN;
183: /* if not a hypercube must colapse partial dim */
184: if (edge_not_pow_2)
185: {
186: if (my_id >= floor_num_nodes)
187: {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
188: else
189: {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
190: }
192: /* implement the mesh fan in/out exchange algorithm */
193: if (my_id<floor_num_nodes)
194: {
195: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
196: {
197: dest = my_id^mask;
198: if (my_id > dest)
199: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
200: else
201: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
202: }
204: mask=floor_num_nodes>>1;
205: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
206: {
207: if (my_id%mask)
208: {continue;}
209:
210: dest = my_id^mask;
211: if (my_id < dest)
212: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
213: else
214: {crecv(MSGTAG4+dest,(char *)vals,len);}
215: }
216: }
218: /* if not a hypercube must expand to partial dim */
219: if (edge_not_pow_2)
220: {
221: if (my_id >= floor_num_nodes)
222: {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
223: else
224: {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
225: }
227: #elif defined MPISRC
228: /* all msgs will be of the same length */
229: /* if not a hypercube must colapse partial dim */
230: if (edge_not_pow_2)
231: {
232: if (my_id >= floor_num_nodes)
233: {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
234: else
235: {
236: MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
237: MPI_COMM_WORLD,&status);
238: (*fp)(vals,work,n,oprs);
239: }
240: }
242: /* implement the mesh fan in/out exchange algorithm */
243: if (my_id<floor_num_nodes)
244: {
245: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
246: {
247: dest = my_id^mask;
248: if (my_id > dest)
249: {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
250: else
251: {
252: MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
253: MPI_COMM_WORLD, &status);
254: (*fp)(vals, work, n, oprs);
255: }
256: }
258: mask=floor_num_nodes>>1;
259: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
260: {
261: if (my_id%mask)
262: {continue;}
263:
264: dest = my_id^mask;
265: if (my_id < dest)
266: {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
267: else
268: {
269: MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
270: MPI_COMM_WORLD, &status);
271: }
272: }
273: }
275: /* if not a hypercube must expand to partial dim */
276: if (edge_not_pow_2)
277: {
278: if (my_id >= floor_num_nodes)
279: {
280: MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
281: MPI_COMM_WORLD,&status);
282: }
283: else
284: {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
285: }
286: #else
287: return;
288: #endif
289: }
291: /***********************************comm.c*************************************
292: Function: grop()
294: Input :
295: Output:
296: Return:
297: Description: fan-in/out version
298: ***********************************comm.c*************************************/
299: void
300: grop(REAL *vals, REAL *work, int n, int *oprs)
301: {
302: register int mask, edge;
303: int type, dest;
304: vfp fp;
305: #if defined MPISRC
306: MPI_Status status;
307: #elif defined NXSRC
308: int len;
309: #endif
312: #ifdef SAFE
313: /* ok ... should have some data, work, and operator(s) */
314: if (!vals||!work||!oprs)
315: {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
317: /* non-uniform should have at least two entries */
318: if ((oprs[0] == NON_UNIFORM)&&(n<2))
319: {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
321: /* check to make sure comm package has been initialized */
322: if (!p_init)
323: {comm_init();}
324: #endif
326: /* if there's nothing to do return */
327: if ((num_nodes<2)||(!n))
328: {return;}
330: /* a negative number of items to send ==> fatal */
331: if (n<0)
332: {error_msg_fatal("gdop() :: n=%d<0?",n);}
334: /* advance to list of n operations for custom */
335: if ((type=oprs[0])==NON_UNIFORM)
336: {oprs++;}
338: if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
339: {
340: error_msg_warning("grop() :: hope you passed in a rbfp!\n");
341: fp = (vfp) oprs;
342: }
344: #if defined NXSRC
345: /* all msgs will be of the same length */
346: len = n*REAL_LEN;
348: /* if not a hypercube must colapse partial dim */
349: if (edge_not_pow_2)
350: {
351: if (my_id >= floor_num_nodes)
352: {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
353: else
354: {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
355: }
357: /* implement the mesh fan in/out exchange algorithm */
358: if (my_id<floor_num_nodes)
359: {
360: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
361: {
362: dest = my_id^mask;
363: if (my_id > dest)
364: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
365: else
366: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
367: }
369: mask=floor_num_nodes>>1;
370: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
371: {
372: if (my_id%mask)
373: {continue;}
374:
375: dest = my_id^mask;
376: if (my_id < dest)
377: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
378: else
379: {crecv(MSGTAG4+dest,(char *)vals,len);}
380: }
381: }
383: /* if not a hypercube must expand to partial dim */
384: if (edge_not_pow_2)
385: {
386: if (my_id >= floor_num_nodes)
387: {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
388: else
389: {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
390: }
392: #elif defined MPISRC
393: /* all msgs will be of the same length */
394: /* if not a hypercube must colapse partial dim */
395: if (edge_not_pow_2)
396: {
397: if (my_id >= floor_num_nodes)
398: {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
399: MPI_COMM_WORLD);}
400: else
401: {
402: MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
403: MPI_COMM_WORLD,&status);
404: (*fp)(vals,work,n,oprs);
405: }
406: }
408: /* implement the mesh fan in/out exchange algorithm */
409: if (my_id<floor_num_nodes)
410: {
411: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
412: {
413: dest = my_id^mask;
414: if (my_id > dest)
415: {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
416: else
417: {
418: MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
419: MPI_COMM_WORLD, &status);
420: (*fp)(vals, work, n, oprs);
421: }
422: }
424: mask=floor_num_nodes>>1;
425: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
426: {
427: if (my_id%mask)
428: {continue;}
429:
430: dest = my_id^mask;
431: if (my_id < dest)
432: {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
433: else
434: {
435: MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
436: MPI_COMM_WORLD, &status);
437: }
438: }
439: }
441: /* if not a hypercube must expand to partial dim */
442: if (edge_not_pow_2)
443: {
444: if (my_id >= floor_num_nodes)
445: {
446: MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
447: MPI_COMM_WORLD,&status);
448: }
449: else
450: {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
451: MPI_COMM_WORLD);}
452: }
453: #else
454: return;
455: #endif
456: }
459: /***********************************comm.c*************************************
460: Function: grop()
462: Input :
463: Output:
464: Return:
465: Description: fan-in/out version
467: note good only for num_nodes=2^k!!!
469: ***********************************comm.c*************************************/
470: void
471: grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
472: {
473: register int mask, edge;
474: int type, dest;
475: vfp fp;
476: #if defined MPISRC
477: MPI_Status status;
478: #elif defined NXSRC
479: int len;
480: #endif
483: #ifdef SAFE
484: /* ok ... should have some data, work, and operator(s) */
485: if (!vals||!work||!oprs)
486: {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
488: /* non-uniform should have at least two entries */
489: if ((oprs[0] == NON_UNIFORM)&&(n<2))
490: {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
492: /* check to make sure comm package has been initialized */
493: if (!p_init)
494: {comm_init();}
495: #endif
497: /* if there's nothing to do return */
498: if ((num_nodes<2)||(!n)||(dim<=0))
499: {return;}
501: /* the error msg says it all!!! */
502: if (modfl_num_nodes)
503: {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
505: /* a negative number of items to send ==> fatal */
506: if (n<0)
507: {error_msg_fatal("grop_hc() :: n=%d<0?",n);}
509: /* can't do more dimensions then exist */
510: dim = MIN(dim,i_log2_num_nodes);
512: /* advance to list of n operations for custom */
513: if ((type=oprs[0])==NON_UNIFORM)
514: {oprs++;}
516: if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
517: {
518: error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
519: fp = (vfp) oprs;
520: }
522: #if defined NXSRC
523: /* all msgs will be of the same length */
524: len = n*REAL_LEN;
526: /* implement the mesh fan in/out exchange algorithm */
527: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
528: {
529: dest = my_id^mask;
530: if (my_id > dest)
531: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
532: else
533: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
534: }
535:
536: /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
537: if (edge==dim)
538: {mask>>=1;}
539: else
540: {while (++edge<dim) {mask<<=1;}}
541:
542: for (edge=0; edge<dim; edge++,mask>>=1)
543: {
544: if (my_id%mask)
545: {continue;}
546:
547: dest = my_id^mask;
548: if (my_id < dest)
549: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
550: else
551: {crecv(MSGTAG4+dest,(char *)vals,len);}
552: }
554: #elif defined MPISRC
555: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
556: {
557: dest = my_id^mask;
558: if (my_id > dest)
559: {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
560: else
561: {
562: MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
563: &status);
564: (*fp)(vals, work, n, oprs);
565: }
566: }
568: if (edge==dim)
569: {mask>>=1;}
570: else
571: {while (++edge<dim) {mask<<=1;}}
573: for (edge=0; edge<dim; edge++,mask>>=1)
574: {
575: if (my_id%mask)
576: {continue;}
577:
578: dest = my_id^mask;
579: if (my_id < dest)
580: {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
581: else
582: {
583: MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
584: &status);
585: }
586: }
587: #else
588: return;
589: #endif
590: }
593: /***********************************comm.c*************************************
594: Function: gop()
596: Input :
597: Output:
598: Return:
599: Description: fan-in/out version
600: ***********************************comm.c*************************************/
601: void
602: gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
603: {
604: register int mask, edge;
605: int dest;
606: #if defined MPISRC
607: MPI_Status status;
608: MPI_Op op;
609: #elif defined NXSRC
610: int len;
611: #endif
614: #ifdef SAFE
615: /* check to make sure comm package has been initialized */
616: if (!p_init)
617: {comm_init();}
619: /* ok ... should have some data, work, and operator(s) */
620: if (!vals||!work||!fp)
621: {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
622: #endif
624: /* if there's nothing to do return */
625: if ((num_nodes<2)||(!n))
626: {return;}
628: /* a negative number of items to send ==> fatal */
629: if (n<0)
630: {error_msg_fatal("gop() :: n=%d<0?",n);}
632: #if defined NXSRC
633: switch (dt) {
634: case REAL_TYPE:
635: len = n*REAL_LEN;
636: break;
637: case INT_TYPE:
638: len = n*INT_LEN;
639: break;
640: default:
641: error_msg_fatal("gop() :: unrecognized datatype!!!\n");
642: }
644: /* if not a hypercube must colapse partial dim */
645: if (edge_not_pow_2)
646: {
647: if (my_id >= floor_num_nodes)
648: {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
649: else
650: {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);}
651: }
653: /* implement the mesh fan in/out exchange algorithm */
654: if (my_id<floor_num_nodes)
655: {
656: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
657: {
658: dest = my_id^mask;
659: if (my_id > dest)
660: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
661: else
662: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);}
663: }
665: mask=floor_num_nodes>>1;
666: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
667: {
668: if (my_id%mask)
669: {continue;}
670:
671: dest = my_id^mask;
672: if (my_id < dest)
673: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
674: else
675: {crecv(MSGTAG4+dest,(char *)vals,len);}
676: }
677: }
679: /* if not a hypercube must expand to partial dim */
680: if (edge_not_pow_2)
681: {
682: if (my_id >= floor_num_nodes)
683: {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
684: else
685: {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
686: }
688: #elif defined MPISRC
690: if (comm_type==MPI)
691: {
692: MPI_Op_create(fp,TRUE,&op);
693: MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
694: MPI_Op_free(&op);
695: return;
696: }
698: /* if not a hypercube must colapse partial dim */
699: if (edge_not_pow_2)
700: {
701: if (my_id >= floor_num_nodes)
702: {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
703: MPI_COMM_WORLD);}
704: else
705: {
706: MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
707: MPI_COMM_WORLD,&status);
708: (*fp)(vals,work,&n,&dt);
709: }
710: }
712: /* implement the mesh fan in/out exchange algorithm */
713: if (my_id<floor_num_nodes)
714: {
715: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
716: {
717: dest = my_id^mask;
718: if (my_id > dest)
719: {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
720: else
721: {
722: MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
723: MPI_COMM_WORLD, &status);
724: (*fp)(vals, work, &n, &dt);
725: }
726: }
728: mask=floor_num_nodes>>1;
729: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
730: {
731: if (my_id%mask)
732: {continue;}
733:
734: dest = my_id^mask;
735: if (my_id < dest)
736: {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
737: else
738: {
739: MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
740: MPI_COMM_WORLD, &status);
741: }
742: }
743: }
745: /* if not a hypercube must expand to partial dim */
746: if (edge_not_pow_2)
747: {
748: if (my_id >= floor_num_nodes)
749: {
750: MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
751: MPI_COMM_WORLD,&status);
752: }
753: else
754: {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
755: MPI_COMM_WORLD);}
756: }
757: #else
758: return;
759: #endif
760: }
767: /******************************************************************************
768: Function: giop()
770: Input :
771: Output:
772: Return:
773: Description:
774:
775: ii+1 entries in seg :: 0 .. ii
777: ******************************************************************************/
778: void
779: ssgl_radd(register REAL *vals, register REAL *work, register int level,
780: register int *segs)
781: {
782: register int edge, type, dest, mask;
783: register int stage_n;
784: #if defined MPISRC
785: MPI_Status status;
786: #endif
789: #ifdef DEBUG
790: if (level > i_log2_num_nodes)
791: {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
792: #endif
794: #ifdef SAFE
795: /* check to make sure comm package has been initialized */
796: if (!p_init)
797: {comm_init();}
798: #endif
801: #if defined NXSRC
802: /* all msgs are *NOT* the same length */
803: /* implement the mesh fan in/out exchange algorithm */
804: for (mask=0, edge=0; edge<level; edge++, mask++)
805: {
806: stage_n = (segs[level] - segs[edge]);
807: if (stage_n && !(my_id & mask))
808: {
809: dest = edge_node[edge];
810: type = MSGTAG3 + my_id + (num_nodes*edge);
811: if (my_id>dest)
812: {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
813: else
814: {
815: type = type - my_id + dest;
816: crecv(type,work,stage_n*REAL_LEN);
817: rvec_add(vals+segs[edge], work, stage_n);
818: /* daxpy(vals+segs[edge], work, stage_n); */
819: }
820: }
821: mask <<= 1;
822: }
823: mask>>=1;
824: for (edge=0; edge<level; edge++)
825: {
826: stage_n = (segs[level] - segs[level-1-edge]);
827: if (stage_n && !(my_id & mask))
828: {
829: dest = edge_node[level-edge-1];
830: type = MSGTAG6 + my_id + (num_nodes*edge);
831: if (my_id<dest)
832: {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
833: else
834: {
835: type = type - my_id + dest;
836: crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
837: }
838: }
839: mask >>= 1;
840: }
842: #elif defined MPISRC
843: /* all msgs are *NOT* the same length */
844: /* implement the mesh fan in/out exchange algorithm */
845: for (mask=0, edge=0; edge<level; edge++, mask++)
846: {
847: stage_n = (segs[level] - segs[edge]);
848: if (stage_n && !(my_id & mask))
849: {
850: dest = edge_node[edge];
851: type = MSGTAG3 + my_id + (num_nodes*edge);
852: if (my_id>dest)
853: /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
854: {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
855: MPI_COMM_WORLD);}
856: else
857: {
858: type = type - my_id + dest;
859: /* crecv(type,work,stage_n*REAL_LEN); */
860: MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
861: MPI_COMM_WORLD,&status);
862: rvec_add(vals+segs[edge], work, stage_n);
863: /* daxpy(vals+segs[edge], work, stage_n); */
864: }
865: }
866: mask <<= 1;
867: }
868: mask>>=1;
869: for (edge=0; edge<level; edge++)
870: {
871: stage_n = (segs[level] - segs[level-1-edge]);
872: if (stage_n && !(my_id & mask))
873: {
874: dest = edge_node[level-edge-1];
875: type = MSGTAG6 + my_id + (num_nodes*edge);
876: if (my_id<dest)
877: /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
878: {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
879: MPI_COMM_WORLD);}
880: else
881: {
882: type = type - my_id + dest;
883: /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
884: MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
885: MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
886: }
887: }
888: mask >>= 1;
889: }
890: #else
891: return;
892: #endif
893: }
897: /***********************************comm.c*************************************
898: Function: grop_hc_vvl()
900: Input :
901: Output:
902: Return:
903: Description: fan-in/out version
905: note good only for num_nodes=2^k!!!
907: ***********************************comm.c*************************************/
908: void
909: grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
910: {
911: register int mask, edge, n;
912: int type, dest;
913: vfp fp;
914: #if defined MPISRC
915: MPI_Status status;
916: #elif defined NXSRC
917: int len;
918: #endif
921: error_msg_fatal("grop_hc_vvl() :: is not working!\n");
923: #ifdef SAFE
924: /* ok ... should have some data, work, and operator(s) */
925: if (!vals||!work||!oprs||!segs)
926: {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
928: /* non-uniform should have at least two entries */
929: #if defined(not_used)
930: if ((oprs[0] == NON_UNIFORM)&&(n<2))
931: {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
932: #endif
934: /* check to make sure comm package has been initialized */
935: if (!p_init)
936: {comm_init();}
937: #endif
939: /* if there's nothing to do return */
940: if ((num_nodes<2)||(dim<=0))
941: {return;}
943: /* the error msg says it all!!! */
944: if (modfl_num_nodes)
945: {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
947: /* can't do more dimensions then exist */
948: dim = MIN(dim,i_log2_num_nodes);
950: /* advance to list of n operations for custom */
951: if ((type=oprs[0])==NON_UNIFORM)
952: {oprs++;}
954: if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
955: {
956: error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
957: fp = (vfp) oprs;
958: }
960: #if defined NXSRC
961: /* all msgs are *NOT* the same length */
962: /* implement the mesh fan in/out exchange algorithm */
963: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
964: {
965: n = segs[dim]-segs[edge];
966: /*
967: error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
968: edge,segs[edge]);
969: */
970: len = n*REAL_LEN;
971: dest = my_id^mask;
972: if (my_id > dest)
973: {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;}
974: else
975: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);}
976: }
978: if (edge==dim)
979: {mask>>=1;}
980: else
981: {while (++edge<dim) {mask<<=1;}}
983: for (edge=0; edge<dim; edge++,mask>>=1)
984: {
985: if (my_id%mask)
986: {continue;}
987: len = (segs[dim]-segs[dim-1-edge])*REAL_LEN;
988: /*
989: error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
990: dim-1-edge,segs[dim-1-edge]);
991: */
992: dest = my_id^mask;
993: if (my_id < dest)
994: {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);}
995: else
996: {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);}
997: }
999: #elif defined MPISRC
1000: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
1001: {
1002: n = segs[dim]-segs[edge];
1003: dest = my_id^mask;
1004: if (my_id > dest)
1005: {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1006: else
1007: {
1008: MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
1009: MPI_COMM_WORLD, &status);
1010: (*fp)(vals+segs[edge], work, n, oprs);
1011: }
1012: }
1014: if (edge==dim)
1015: {mask>>=1;}
1016: else
1017: {while (++edge<dim) {mask<<=1;}}
1019: for (edge=0; edge<dim; edge++,mask>>=1)
1020: {
1021: if (my_id%mask)
1022: {continue;}
1023:
1024: n = (segs[dim]-segs[dim-1-edge]);
1025:
1026: dest = my_id^mask;
1027: if (my_id < dest)
1028: {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
1029: MPI_COMM_WORLD);}
1030: else
1031: {
1032: MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
1033: MSGTAG4+dest,MPI_COMM_WORLD, &status);
1034: }
1035: }
1036: #else
1037: return;
1038: #endif
1039: }
1042: #ifdef INPROG
1044: /***********************************comm.c*************************************
1045: Function: proc_sync()
1047: Input :
1048: Output:
1049: Return:
1050: Description: hc bassed version
1051: ***********************************comm.c*************************************/
1052: void
1053: proc_sync()
1054: {
1055: register int mask, edge;
1056: int type, dest;
1057: #if defined MPISRC
1058: MPI_Status status;
1059: #endif
1062: #ifdef DEBUG
1063: error_msg_warning("begin proc_sync()\n");
1064: #endif
1066: #ifdef SAFE
1067: /* check to make sure comm package has been initialized */
1068: if (!p_init)
1069: {comm_init();}
1070: #endif
1072: #if defined NXSRC
1073: /* all msgs will be of zero length */
1075: /* if not a hypercube must colapse partial dim */
1076: if (edge_not_pow_2)
1077: {
1078: if (my_id >= floor_num_nodes)
1079: {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
1080: else
1081: {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
1082: }
1084: /* implement the mesh fan in/out exchange algorithm */
1085: if (my_id<floor_num_nodes)
1086: {
1087: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1088: {
1089: dest = my_id^mask;
1090: if (my_id > dest)
1091: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
1092: else
1093: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
1094: }
1096: mask=floor_num_nodes>>1;
1097: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1098: {
1099: if (my_id%mask)
1100: {continue;}
1101:
1102: dest = my_id^mask;
1103: if (my_id < dest)
1104: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
1105: else
1106: {crecv(MSGTAG4+dest,(char *)vals,len);}
1107: }
1108: }
1110: /* if not a hypercube must expand to partial dim */
1111: if (edge_not_pow_2)
1112: {
1113: if (my_id >= floor_num_nodes)
1114: {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
1115: else
1116: {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
1117: }
1119: #elif defined MPISRC
1120: /* all msgs will be of the same length */
1121: /* if not a hypercube must colapse partial dim */
1122: if (edge_not_pow_2)
1123: {
1124: if (my_id >= floor_num_nodes)
1125: {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
1126: else
1127: {
1128: MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
1129: MPI_COMM_WORLD,&status);
1130: (*fp)(vals,work,n,oprs);
1131: }
1132: }
1134: /* implement the mesh fan in/out exchange algorithm */
1135: if (my_id<floor_num_nodes)
1136: {
1137: for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1138: {
1139: dest = my_id^mask;
1140: if (my_id > dest)
1141: {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1142: else
1143: {
1144: MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
1145: MPI_COMM_WORLD, &status);
1146: (*fp)(vals, work, n, oprs);
1147: }
1148: }
1150: mask=floor_num_nodes>>1;
1151: for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1152: {
1153: if (my_id%mask)
1154: {continue;}
1155:
1156: dest = my_id^mask;
1157: if (my_id < dest)
1158: {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1159: else
1160: {
1161: MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
1162: MPI_COMM_WORLD, &status);
1163: }
1164: }
1165: }
1167: /* if not a hypercube must expand to partial dim */
1168: if (edge_not_pow_2)
1169: {
1170: if (my_id >= floor_num_nodes)
1171: {
1172: MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
1173: MPI_COMM_WORLD,&status);
1174: }
1175: else
1176: {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
1177: }
1178: #else
1179: return;
1180: #endif
1181: }
1182: #endif
1185: /* hmt hack */
1186: int
1187: hmt_xor_ (register int *i1, register int *i2)
1188: {
1189: return(*i1^*i2);
1190: }
1193: /******************************************************************************
1194: Function: giop()
1196: Input :
1197: Output:
1198: Return:
1199: Description:
1200:
1201: ii+1 entries in seg :: 0 .. ii
1203: ******************************************************************************/
1204: void
1205: staged_gs_ (register REAL *vals, register REAL *work, register int *level,
1206: register int *segs)
1207: {
1208: ssgl_radd(vals, work, *level, segs);
1209: }
1211: /******************************************************************************
1212: Function: giop()
1214: Input :
1215: Output:
1216: Return:
1217: Description:
1218: ******************************************************************************/
1219: void
1220: staged_iadd_ (int *gl_num, int *level)
1221: {
1222: sgl_iadd(gl_num,*level);
1223: }
1227: /******************************************************************************
1228: Function: giop()
1230: Input :
1231: Output:
1232: Return:
1233: Description:
1234: ******************************************************************************/
1235: static void
1236: sgl_iadd(int *vals, int level)
1237: {
1238: int edge, type, dest, source, len, mask = -1;
1239: int tmp, *work;
1242: #ifdef SAFE
1243: /* check to make sure comm package has been initialized */
1244: if (!p_init)
1245: {comm_init();}
1246: #endif
1249: /* all msgs will be of the same length */
1250: work = &tmp;
1251: len = INT_LEN;
1253: if (level > i_log2_num_nodes)
1254: {error_msg_fatal("sgl_add() :: level too big?");}
1256: if (level<=0)
1257: {return;}
1259: #if defined NXSRC
1260: {
1261: long msg_id;
1262: /* implement the mesh fan in/out exchange algorithm */
1263: if (my_id<floor_num_nodes)
1264: {
1265: mask = 0;
1266: for (edge = 0; edge < level; edge++)
1267: {
1268: if (!(my_id & mask))
1269: {
1270: source = dest = edge_node[edge];
1271: type = MSGTAG1 + my_id + (num_nodes*edge);
1272: if (my_id > dest)
1273: {
1274: msg_id = isend(type,vals,len,dest,0);
1275: msgwait(msg_id);
1276: }
1277: else
1278: {
1279: type = type - my_id + source;
1280: msg_id = irecv(type,work,len);
1281: msgwait(msg_id);
1282: vals[0] += work[0];
1283: }
1284: }
1285: mask <<= 1;
1286: mask += 1;
1287: }
1288: }
1289:
1290: if (my_id<floor_num_nodes)
1291: {
1292: mask >>= 1;
1293: for (edge = 0; edge < level; edge++)
1294: {
1295: if (!(my_id & mask))
1296: {
1297: source = dest = edge_node[level-edge-1];
1298: type = MSGTAG1 + my_id + (num_nodes*edge);
1299: if (my_id < dest)
1300: {
1301: msg_id = isend(type,vals,len,dest,0);
1302: msgwait(msg_id);
1303: }
1304: else
1305: {
1306: type = type - my_id + source;
1307: msg_id = irecv(type,work,len);
1308: msgwait(msg_id);
1309: vals[0] = work[0];
1310: }
1311: }
1312: mask >>= 1;
1313: }
1314: }
1315: }
1316: #elif defined MPISRC
1317: {
1318: MPI_Request msg_id;
1319: MPI_Status status;
1320:
1321: /* implement the mesh fan in/out exchange algorithm */
1322: if (my_id<floor_num_nodes)
1323: {
1324: mask = 0;
1325: for (edge = 0; edge < level; edge++)
1326: {
1327: if (!(my_id & mask))
1328: {
1329: source = dest = edge_node[edge];
1330: type = MSGTAG1 + my_id + (num_nodes*edge);
1331: if (my_id > dest)
1332: {
1333: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1334: &msg_id);
1335: MPI_Wait(&msg_id,&status);
1336: /* msg_id = isend(type,vals,len,dest,0);
1337: msgwait(msg_id); */
1338: }
1339: else
1340: {
1341: type = type - my_id + source;
1342: MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1343: type,MPI_COMM_WORLD,&msg_id);
1344: MPI_Wait(&msg_id,&status);
1345: /* msg_id = irecv(type,work,len);
1346: msgwait(msg_id); */
1347: vals[0] += work[0];
1348: }
1349: }
1350: mask <<= 1;
1351: mask += 1;
1352: }
1353: }
1354:
1355: if (my_id<floor_num_nodes)
1356: {
1357: mask >>= 1;
1358: for (edge = 0; edge < level; edge++)
1359: {
1360: if (!(my_id & mask))
1361: {
1362: source = dest = edge_node[level-edge-1];
1363: type = MSGTAG1 + my_id + (num_nodes*edge);
1364: if (my_id < dest)
1365: {
1366: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1367: &msg_id);
1368: MPI_Wait(&msg_id,&status);
1369: /* msg_id = isend(type,vals,len,dest,0);
1370: msgwait(msg_id);*/
1371: }
1372: else
1373: {
1374: type = type - my_id + source;
1375: MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1376: type,MPI_COMM_WORLD,&msg_id);
1377: MPI_Wait(&msg_id,&status);
1378: /* msg_id = irecv(type,work,len);
1379: msgwait(msg_id); */
1380: vals[0] = work[0];
1381: }
1382: }
1383: mask >>= 1;
1384: }
1385: }
1386: }
1387: #else
1388: return;
1389: #endif
1391: }
1395: /******************************************************************************
1396: Function: giop()
1398: Input :
1399: Output:
1400: Return:
1401: Description:
1402: ******************************************************************************/
1403: void
1404: staged_radd_ (double *gl_num, int *level)
1405: {
1406: sgl_radd(gl_num,*level);
1407: }
1409: /******************************************************************************
1410: Function: giop()
1412: Input :
1413: Output:
1414: Return:
1415: Description:
1416: ******************************************************************************/
1417: static void
1418: sgl_radd(double *vals, int level)
1419: {
1420: #if defined NXSRC
1421: int edge, type, dest, source, len, mask;
1422: double tmp, *work;
1423: #endif
1425: #ifdef SAFE
1426: /* check to make sure comm package has been initialized */
1427: if (!p_init)
1428: {comm_init();}
1429: #endif
1432: #if defined NXSRC
1433: {
1434: long msg_id;
1435:
1436: /* all msgs will be of the same length */
1437: work = &tmp;
1438: len = REAL_LEN;
1439:
1440: if (level > i_log2_num_nodes)
1441: {error_msg_fatal("sgl_add() :: level too big?");}
1442:
1443: if (level<=0)
1444: {return;}
1445:
1446: /* implement the mesh fan in/out exchange algorithm */
1447: if (my_id<floor_num_nodes)
1448: {
1449: mask = 0;
1450: for (edge = 0; edge < level; edge++)
1451: {
1452: if (!(my_id & mask))
1453: {
1454: source = dest = edge_node[edge];
1455: type = MSGTAG3 + my_id + (num_nodes*edge);
1456: if (my_id > dest)
1457: {
1458: msg_id = isend(type,vals,len,dest,0);
1459: msgwait(msg_id);
1460: }
1461: else
1462: {
1463: type = type - my_id + source;
1464: msg_id = irecv(type,work,len);
1465: msgwait(msg_id);
1466: vals[0] += work[0];
1467: }
1468: }
1469: mask <<= 1;
1470: mask += 1;
1471: }
1472: }
1473:
1474: if (my_id<floor_num_nodes)
1475: {
1476: mask >>= 1;
1477: for (edge = 0; edge < level; edge++)
1478: {
1479: if (!(my_id & mask))
1480: {
1481: source = dest = edge_node[level-edge-1];
1482: type = MSGTAG3 + my_id + (num_nodes*edge);
1483: if (my_id < dest)
1484: {
1485: msg_id = isend(type,vals,len,dest,0);
1486: msgwait(msg_id);
1487: }
1488: else
1489: {
1490: type = type - my_id + source;
1491: msg_id = irecv(type,work,len);
1492: msgwait(msg_id);
1493: vals[0] = work[0];
1494: }
1495: }
1496: mask >>= 1;
1497: }
1498: }
1499: }
1500: #elif defined MRISRC
1501: {
1502: MPI_Request msg_id;
1503: MPI_Status status;
1504:
1505: /* all msgs will be of the same length */
1506: work = &tmp;
1507: len = REAL_LEN;
1508:
1509: if (level > i_log2_num_nodes)
1510: {error_msg_fatal("sgl_add() :: level too big?");}
1511:
1512: if (level<=0)
1513: {return;}
1514:
1515: /* implement the mesh fan in/out exchange algorithm */
1516: if (my_id<floor_num_nodes)
1517: {
1518: mask = 0;
1519: for (edge = 0; edge < level; edge++)
1520: {
1521: if (!(my_id & mask))
1522: {
1523: source = dest = edge_node[edge];
1524: type = MSGTAG3 + my_id + (num_nodes*edge);
1525: if (my_id > dest)
1526: {
1527: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1528: &msg_id);
1529: MPI_Wait(&msg_id,&status);
1530: /*msg_id = isend(type,vals,len,dest,0);
1531: msgwait(msg_id);*/
1532: }
1533: else
1534: {
1535: type = type - my_id + source;
1536: MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1537: type,MPI_COMM_WORLD,&msg_id);
1538: MPI_Wait(&msg_id,&status);
1539: /*msg_id = irecv(type,work,len);
1540: msgwait(msg_id); */
1541: vals[0] += work[0];
1542: }
1543: }
1544: mask <<= 1;
1545: mask += 1;
1546: }
1547: }
1548:
1549: if (my_id<floor_num_nodes)
1550: {
1551: mask >>= 1;
1552: for (edge = 0; edge < level; edge++)
1553: {
1554: if (!(my_id & mask))
1555: {
1556: source = dest = edge_node[level-edge-1];
1557: type = MSGTAG3 + my_id + (num_nodes*edge);
1558: if (my_id < dest)
1559: {
1560: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1561: &msg_id);
1562: MPI_Wait(&msg_id,&status);
1563: /* msg_id = isend(type,vals,len,dest,0);
1564: msgwait(msg_id); */
1565: }
1566: else
1567: {
1568: type = type - my_id + source;
1569: MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1570: type,MPI_COMM_WORLD,&msg_id);
1571: MPI_Wait(&msg_id,&status);
1572: /*msg_id = irecv(type,work,len);
1573: msgwait(msg_id); */
1574: vals[0] = work[0];
1575: }
1576: }
1577: mask >>= 1;
1578: }
1579: }
1580: }
1581: #else
1582: return;
1583: #endif
1584: }
1588: /******************************************************************************
1589: Function: giop()
1591: Input :
1592: Output:
1593: Return:
1594: Description:
1595:
1596: ii+1 entries in seg :: 0 .. ii
1597: ******************************************************************************/
1598: void
1599: hmt_concat_ (REAL *vals, REAL *work, int *size)
1600: {
1601: hmt_concat(vals, work, *size);
1602: }
1606: /******************************************************************************
1607: Function: giop()
1609: Input :
1610: Output:
1611: Return:
1612: Description:
1613:
1614: ii+1 entries in seg :: 0 .. ii
1616: ******************************************************************************/
1617: static void
1618: hmt_concat(REAL *vals, REAL *work, int size)
1619: {
1620: #if defined NXSRC
1621: int mask,stage_n;
1622: int edge, type, dest, source, len;
1623: int offset;
1624: double *dptr;
1625: #endif
1627: #ifdef SAFE
1628: /* check to make sure comm package has been initialized */
1629: if (!p_init)
1630: {comm_init();}
1631: #endif
1633: /* all msgs are *NOT* the same length */
1634: /* implement the mesh fan in/out exchange algorithm */
1635: rvec_copy(work,vals,size);
1637: #if defined NXSRC
1638: mask = 0;
1639: stage_n = size;
1640: {
1641: long msg_id;
1642:
1643: dptr = work+size;
1644: for (edge = 0; edge < i_log2_num_nodes; edge++)
1645: {
1646: len = stage_n * REAL_LEN;
1647:
1648: if (!(my_id & mask))
1649: {
1650: source = dest = edge_node[edge];
1651: type = MSGTAG3 + my_id + (num_nodes*edge);
1652: if (my_id > dest)
1653: {
1654: msg_id = isend(type, work, len,dest,0);
1655: msgwait(msg_id);
1656: }
1657: else
1658: {
1659: type = type - my_id + source;
1660: msg_id = irecv(type, dptr,len);
1661: msgwait(msg_id);
1662: }
1663: }
1664:
1665: #ifdef DEBUG_1
1666: ierror_msg_warning_n0("stage_n = ",stage_n);
1667: #endif
1669: dptr += stage_n;
1670: stage_n <<=1;
1671: mask <<= 1;
1672: mask += 1;
1673: }
1674:
1675: size = stage_n;
1676: stage_n >>=1;
1677: dptr -= stage_n;
1678:
1679: mask >>= 1;
1680:
1681: for (edge = 0; edge < i_log2_num_nodes; edge++)
1682: {
1683: len = (size-stage_n) * REAL_LEN;
1684:
1685: if (!(my_id & mask) && stage_n)
1686: {
1687: source = dest = edge_node[i_log2_num_nodes-edge-1];
1688: type = MSGTAG6 + my_id + (num_nodes*edge);
1689: if (my_id < dest)
1690: {
1691: msg_id = isend(type,dptr,len,dest,0);
1692: msgwait(msg_id);
1693: }
1694: else
1695: {
1696: type = type - my_id + source;
1697: msg_id = irecv(type,dptr,len);
1698: msgwait(msg_id);
1699: }
1700: }
1701:
1702: #ifdef DEBUG_1
1703: ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1704: #endif
1705:
1706: stage_n >>= 1;
1707: dptr -= stage_n;
1708: mask >>= 1;
1709: }
1710: }
1711: #elif defined MRISRC
1712: {
1713: MPI_Request msg_id;
1714: MPI_Status status;
1715:
1716: dptr = work+size;
1717: for (edge = 0; edge < i_log2_num_nodes; edge++)
1718: {
1719: len = stage_n * REAL_LEN;
1720:
1721: if (!(my_id & mask))
1722: {
1723: source = dest = edge_node[edge];
1724: type = MSGTAG3 + my_id + (num_nodes*edge);
1725: if (my_id > dest)
1726: {
1727: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1728: &msg_id);
1729: MPI_Wait(&msg_id,&status);
1730: /*msg_id = isend(type, work, len,dest,0);
1731: msgwait(msg_id);*/
1732: }
1733: else
1734: {
1735: type = type - my_id + source;
1736: MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1737: type,MPI_COMM_WORLD,&msg_id);
1738: MPI_Wait(&msg_id,&status);
1739: /* msg_id = irecv(type, dptr,len);
1740: msgwait(msg_id);*/
1741: }
1742: }
1743:
1744: #ifdef DEBUG_1
1745: ierror_msg_warning_n0("stage_n = ",stage_n);
1746: #endif
1748: dptr += stage_n;
1749: stage_n <<=1;
1750: mask <<= 1;
1751: mask += 1;
1752: }
1753:
1754: size = stage_n;
1755: stage_n >>=1;
1756: dptr -= stage_n;
1757:
1758: mask >>= 1;
1759:
1760: for (edge = 0; edge < i_log2_num_nodes; edge++)
1761: {
1762: len = (size-stage_n) * REAL_LEN;
1763:
1764: if (!(my_id & mask) && stage_n)
1765: {
1766: source = dest = edge_node[i_log2_num_nodes-edge-1];
1767: type = MSGTAG6 + my_id + (num_nodes*edge);
1768: if (my_id < dest)
1769: {
1770: MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1771: &msg_id);
1772: MPI_Wait(&msg_id,&status);
1773: /*msg_id = isend(type, work, len,dest,0);
1774: msg_id = isend(type,dptr,len,dest,0); */
1775: msgwait(msg_id);
1776: }
1777: else
1778: {
1779: type = type - my_id + source;
1780: MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1781: type,MPI_COMM_WORLD,&msg_id);
1782: MPI_Wait(&msg_id,&status);
1783: /*msg_id = irecv(type,dptr,len);
1784: msgwait(msg_id);*/
1785: }
1786: }
1787:
1788: #ifdef DEBUG_1
1789: ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1790: #endif
1791:
1792: stage_n >>= 1;
1793: dptr -= stage_n;
1794: mask >>= 1;
1795: }
1796: }
1797: #else
1798: return;
1799: #endif
1800: }
1804: /******************************************************************************
1805: Function: giop()
1807: Input :
1808: Output:
1809: Return:
1810: Description:
1811:
1812: ii+1 entries in seg :: 0 .. ii
1814: ******************************************************************************/
1815: void
1816: new_ssgl_radd(register REAL *vals, register REAL *work, register int level,
1817: #if defined MPISRC
1818: register int *segs, MPI_Comm ssgl_comm)
1819: #else
1820: register int *segs)
1821: #endif
1822: {
1823: register int edge, type, dest, mask;
1824: register int stage_n;
1825: #if defined MPISRC
1826: MPI_Status status;
1827: #endif
1830: #ifdef DEBUG
1831: if (level > i_log2_num_nodes)
1832: {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1833: #endif
1835: #ifdef SAFE
1836: /* check to make sure comm package has been initialized */
1837: if (!p_init)
1838: {comm_init();}
1839: #endif
1842: #if defined NXSRC
1843: /* all msgs are *NOT* the same length */
1844: /* implement the mesh fan in/out exchange algorithm */
1845: for (mask=0, edge=0; edge<level; edge++, mask++)
1846: {
1847: stage_n = (segs[level] - segs[edge]);
1848: if (stage_n && !(my_id & mask))
1849: {
1850: dest = edge_node[edge];
1851: type = MSGTAG3 + my_id + (num_nodes*edge);
1852: if (my_id>dest)
1853: {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
1854: else
1855: {
1856: type = type - my_id + dest;
1857: crecv(type,work,stage_n*REAL_LEN);
1858: rvec_add(vals+segs[edge], work, stage_n);
1859: /* daxpy(vals+segs[edge], work, stage_n); */
1860: }
1861: }
1862: mask <<= 1;
1863: }
1864: mask>>=1;
1865: for (edge=0; edge<level; edge++)
1866: {
1867: stage_n = (segs[level] - segs[level-1-edge]);
1868: if (stage_n && !(my_id & mask))
1869: {
1870: dest = edge_node[level-edge-1];
1871: type = MSGTAG6 + my_id + (num_nodes*edge);
1872: if (my_id<dest)
1873: {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
1874: else
1875: {
1876: type = type - my_id + dest;
1877: crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
1878: }
1879: }
1880: mask >>= 1;
1881: }
1883: #elif defined MPISRC
1884: /* all msgs are *NOT* the same length */
1885: /* implement the mesh fan in/out exchange algorithm */
1886: for (mask=0, edge=0; edge<level; edge++, mask++)
1887: {
1888: stage_n = (segs[level] - segs[edge]);
1889: if (stage_n && !(my_id & mask))
1890: {
1891: dest = edge_node[edge];
1892: type = MSGTAG3 + my_id + (num_nodes*edge);
1893: if (my_id>dest)
1894: /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1895: {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1896: ssgl_comm);}
1897: else
1898: {
1899: type = type - my_id + dest;
1900: /* crecv(type,work,stage_n*REAL_LEN); */
1901: MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1902: ssgl_comm,&status);
1903: rvec_add(vals+segs[edge], work, stage_n);
1904: /* daxpy(vals+segs[edge], work, stage_n); */
1905: }
1906: }
1907: mask <<= 1;
1908: }
1909: mask>>=1;
1910: for (edge=0; edge<level; edge++)
1911: {
1912: stage_n = (segs[level] - segs[level-1-edge]);
1913: if (stage_n && !(my_id & mask))
1914: {
1915: dest = edge_node[level-edge-1];
1916: type = MSGTAG6 + my_id + (num_nodes*edge);
1917: if (my_id<dest)
1918: /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1919: {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1920: ssgl_comm);}
1921: else
1922: {
1923: type = type - my_id + dest;
1924: /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1925: MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1926: MPI_ANY_SOURCE,type,ssgl_comm,&status);
1927: }
1928: }
1929: mask >>= 1;
1930: }
1931: #else
1932: return;
1933: #endif
1934: }
1938: /***********************************comm.c*************************************
1939: Function: giop()
1941: Input :
1942: Output:
1943: Return:
1944: Description: fan-in/out version
1946: note good only for num_nodes=2^k!!!
1948: ***********************************comm.c*************************************/
1949: void
1950: giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1951: {
1952: register int mask, edge;
1953: int type, dest;
1954: vfp fp;
1955: #if defined MPISRC
1956: MPI_Status status;
1957: #elif defined NXSRC
1958: int len;
1959: #endif
1962: #ifdef SAFE
1963: /* ok ... should have some data, work, and operator(s) */
1964: if (!vals||!work||!oprs)
1965: {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}
1967: /* non-uniform should have at least two entries */
1968: if ((oprs[0] == NON_UNIFORM)&&(n<2))
1969: {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
1971: /* check to make sure comm package has been initialized */
1972: if (!p_init)
1973: {comm_init();}
1974: #endif
1976: /* if there's nothing to do return */
1977: if ((num_nodes<2)||(!n)||(dim<=0))
1978: {return;}
1980: /* the error msg says it all!!! */
1981: if (modfl_num_nodes)
1982: {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
1984: /* a negative number of items to send ==> fatal */
1985: if (n<0)
1986: {error_msg_fatal("giop_hc() :: n=%d<0?",n);}
1988: /* can't do more dimensions then exist */
1989: dim = MIN(dim,i_log2_num_nodes);
1991: /* advance to list of n operations for custom */
1992: if ((type=oprs[0])==NON_UNIFORM)
1993: {oprs++;}
1995: if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
1996: {
1997: error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1998: fp = (vfp) oprs;
1999: }
2001: #if defined NXSRC
2002: /* all msgs will be of the same length */
2003: len = n*INT_LEN;
2005: /* implement the mesh fan in/out exchange algorithm */
2006: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2007: {
2008: dest = my_id^mask;
2009: if (my_id > dest)
2010: {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
2011: else
2012: {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
2013: }
2014:
2015: /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
2016: if (edge==dim)
2017: {mask>>=1;}
2018: else
2019: {while (++edge<dim) {mask<<=1;}}
2020:
2021: for (edge=0; edge<dim; edge++,mask>>=1)
2022: {
2023: if (my_id%mask)
2024: {continue;}
2025:
2026: dest = my_id^mask;
2027: if (my_id < dest)
2028: {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
2029: else
2030: {crecv(MSGTAG4+dest,(char *)vals,len);}
2031: }
2033: #elif defined MPISRC
2034: for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2035: {
2036: dest = my_id^mask;
2037: if (my_id > dest)
2038: {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
2039: else
2040: {
2041: MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
2042: &status);
2043: (*fp)(vals, work, n, oprs);
2044: }
2045: }
2047: if (edge==dim)
2048: {mask>>=1;}
2049: else
2050: {while (++edge<dim) {mask<<=1;}}
2052: for (edge=0; edge<dim; edge++,mask>>=1)
2053: {
2054: if (my_id%mask)
2055: {continue;}
2056:
2057: dest = my_id^mask;
2058: if (my_id < dest)
2059: {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
2060: else
2061: {
2062: MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
2063: &status);
2064: }
2065: }
2066: #else
2067: return;
2068: #endif
2069: }