Actual source code: comm.c

  1: /*$Id: comm.c,v 1.3 2001/04/12 14:33:53 balay Exp $*/
  2: /***********************************comm.c*************************************
  3: SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 11.21.97
 16: ***********************************comm.c*************************************/

 18: /***********************************comm.c*************************************
 19: File Description:
 20: -----------------

 22: ***********************************comm.c*************************************/
 23: #include <stdio.h>

 25: #if   defined NXSRC
 26: #ifndef DELTA
 27: #include <nx.h>
 28: #endif

 30: #elif defined MPISRC
 31: #include <mpi.h>

 33: #endif


 36:  #include const.h
 37:  #include types.h
 38:  #include error.h
 39:  #include ivec.h
 40: #include "bss_malloc.h"
 41:  #include comm.h


 44: /* global program control variables - explicitly exported */
 45: int my_id            = 0;
 46: int num_nodes        = 1;
 47: int floor_num_nodes  = 0;
 48: int i_log2_num_nodes = 0;

 50: /* global program control variables */
 51: static int p_init = 0;
 52: static int modfl_num_nodes;
 53: static int edge_not_pow_2;

 55: static unsigned int edge_node[INT_LEN*32];

 57: static void sgl_iadd(int    *vals, int level);
 58: static void sgl_radd(double *vals, int level);
 59: static void hmt_concat(REAL *vals, REAL *work, int size);


 62: /***********************************comm.c*************************************
 63: Function: giop()

 65: Input : 
 66: Output: 
 67: Return: 
 68: Description: 
 69: ***********************************comm.c*************************************/
 70: void
 71: comm_init (void)
 72: {
 73: #ifdef DEBUG  
 74:   error_msg_warning("c_init() :: start\n");
 75: #endif

 77:   if (p_init++) return;

 79: #if PETSC_SIZEOF_INT != 4
 80:   error_msg_warning("Int != 4 Bytes!");
 81: #endif


 84:   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
 85:   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);

 87:   if (num_nodes> (INT_MAX >> 1))
 88:   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}

 90:   ivec_zero((int *)edge_node,INT_LEN*32);

 92:   floor_num_nodes = 1;
 93:   i_log2_num_nodes = modfl_num_nodes = 0;
 94:   while (floor_num_nodes <= num_nodes)
 95:     {
 96:       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
 97:       floor_num_nodes <<= 1;
 98:       i_log2_num_nodes++;
 99:     }

101:   i_log2_num_nodes--;
102:   floor_num_nodes >>= 1;
103:   modfl_num_nodes = (num_nodes - floor_num_nodes);

105:   if ((my_id > 0) && (my_id <= modfl_num_nodes))
106:     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
107:   else if (my_id >= floor_num_nodes)
108:     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
109:     }
110:   else
111:     {edge_not_pow_2 = 0;}

113: #ifdef DEBUG  
114:   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes);
115: #endif
116: }



120: /***********************************comm.c*************************************
121: Function: giop()

123: Input : 
124: Output: 
125: Return: 
126: Description: fan-in/out version
127: ***********************************comm.c*************************************/
128: void
129: giop(int *vals, int *work, int n, int *oprs)
130: {
131:   register int mask, edge;
132:   int type, dest;
133:   vfp fp;
134: #if defined MPISRC
135:   MPI_Status  status;
136: #elif defined NXSRC
137:   int len;
138: #endif


141: #ifdef SAFE
142:   /* ok ... should have some data, work, and operator(s) */
143:   if (!vals||!work||!oprs)
144:     {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}

146:   /* non-uniform should have at least two entries */
147:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
148:     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}

150:   /* check to make sure comm package has been initialized */
151:   if (!p_init)
152:     {comm_init();}
153: #endif

155:   /* if there's nothing to do return */
156:   if ((num_nodes<2)||(!n))
157:     {
158: #ifdef DEBUG
159:       error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes);
160: #endif
161:       return;
162:     }

164:   /* a negative number if items to send ==> fatal */
165:   if (n<0)
166:     {error_msg_fatal("giop() :: n=%d<0?",n);}

168:   /* advance to list of n operations for custom */
169:   if ((type=oprs[0])==NON_UNIFORM)
170:     {oprs++;}

172:   /* major league hack */
173:   if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
174:     {
175:       error_msg_warning("giop() :: hope you passed in a rbfp!\n");
176:       fp = (vfp) oprs;
177:     }

179: #if  defined NXSRC
180:   /* all msgs will be of the same length */
181:   len = n*INT_LEN;

183:   /* if not a hypercube must colapse partial dim */
184:   if (edge_not_pow_2)
185:     {
186:       if (my_id >= floor_num_nodes)
187:         {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
188:       else
189:         {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
190:     }

192:   /* implement the mesh fan in/out exchange algorithm */
193:   if (my_id<floor_num_nodes)
194:     {
195:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
196:         {
197:           dest = my_id^mask;
198:           if (my_id > dest)
199:             {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
200:           else
201:             {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
202:         }

204:       mask=floor_num_nodes>>1;
205:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
206:         {
207:           if (my_id%mask)
208:             {continue;}
209: 
210:           dest = my_id^mask;
211:           if (my_id < dest)
212:             {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
213:           else
214:             {crecv(MSGTAG4+dest,(char *)vals,len);}
215:         }
216:     }

218:   /* if not a hypercube must expand to partial dim */
219:   if (edge_not_pow_2)
220:     {
221:       if (my_id >= floor_num_nodes)
222:         {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
223:       else
224:         {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
225:     }

227: #elif defined MPISRC
228:   /* all msgs will be of the same length */
229:   /* if not a hypercube must colapse partial dim */
230:   if (edge_not_pow_2)
231:     {
232:       if (my_id >= floor_num_nodes)
233:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
234:       else
235:         {
236:           MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
237:                    MPI_COMM_WORLD,&status);
238:           (*fp)(vals,work,n,oprs);
239:         }
240:     }

242:   /* implement the mesh fan in/out exchange algorithm */
243:   if (my_id<floor_num_nodes)
244:     {
245:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
246:         {
247:           dest = my_id^mask;
248:           if (my_id > dest)
249:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
250:           else
251:             {
252:               MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
253:                        MPI_COMM_WORLD, &status);
254:               (*fp)(vals, work, n, oprs);
255:             }
256:         }

258:       mask=floor_num_nodes>>1;
259:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
260:         {
261:           if (my_id%mask)
262:             {continue;}
263: 
264:           dest = my_id^mask;
265:           if (my_id < dest)
266:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
267:           else
268:             {
269:               MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
270:                        MPI_COMM_WORLD, &status);
271:             }
272:         }
273:     }

275:   /* if not a hypercube must expand to partial dim */
276:   if (edge_not_pow_2)
277:     {
278:       if (my_id >= floor_num_nodes)
279:         {
280:           MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
281:                    MPI_COMM_WORLD,&status);
282:         }
283:       else
284:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
285:     }
286: #else
287:   return;
288: #endif
289: }

291: /***********************************comm.c*************************************
292: Function: grop()

294: Input : 
295: Output: 
296: Return: 
297: Description: fan-in/out version
298: ***********************************comm.c*************************************/
299: void
300: grop(REAL *vals, REAL *work, int n, int *oprs)
301: {
302:   register int mask, edge;
303:   int type, dest;
304:   vfp fp;
305: #if defined MPISRC
306:   MPI_Status  status;
307: #elif defined NXSRC
308:   int len;
309: #endif


312: #ifdef SAFE
313:   /* ok ... should have some data, work, and operator(s) */
314:   if (!vals||!work||!oprs)
315:     {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}

317:   /* non-uniform should have at least two entries */
318:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
319:     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}

321:   /* check to make sure comm package has been initialized */
322:   if (!p_init)
323:     {comm_init();}
324: #endif

326:   /* if there's nothing to do return */
327:   if ((num_nodes<2)||(!n))
328:     {return;}

330:   /* a negative number of items to send ==> fatal */
331:   if (n<0)
332:     {error_msg_fatal("gdop() :: n=%d<0?",n);}

334:   /* advance to list of n operations for custom */
335:   if ((type=oprs[0])==NON_UNIFORM)
336:     {oprs++;}

338:   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
339:     {
340:       error_msg_warning("grop() :: hope you passed in a rbfp!\n");
341:       fp = (vfp) oprs;
342:     }

344: #if  defined NXSRC
345:   /* all msgs will be of the same length */
346:   len = n*REAL_LEN;

348:   /* if not a hypercube must colapse partial dim */
349:   if (edge_not_pow_2)
350:     {
351:       if (my_id >= floor_num_nodes)
352:         {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
353:       else
354:         {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
355:     }

357:   /* implement the mesh fan in/out exchange algorithm */
358:   if (my_id<floor_num_nodes)
359:     {
360:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
361:         {
362:           dest = my_id^mask;
363:           if (my_id > dest)
364:             {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
365:           else
366:             {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
367:         }

369:       mask=floor_num_nodes>>1;
370:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
371:         {
372:           if (my_id%mask)
373:             {continue;}
374: 
375:           dest = my_id^mask;
376:           if (my_id < dest)
377:             {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
378:           else
379:             {crecv(MSGTAG4+dest,(char *)vals,len);}
380:         }
381:     }

383:   /* if not a hypercube must expand to partial dim */
384:   if (edge_not_pow_2)
385:     {
386:       if (my_id >= floor_num_nodes)
387:         {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
388:       else
389:         {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
390:     }

392: #elif defined MPISRC 
393:   /* all msgs will be of the same length */
394:   /* if not a hypercube must colapse partial dim */
395:   if (edge_not_pow_2)
396:     {
397:       if (my_id >= floor_num_nodes)
398:         {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
399:                   MPI_COMM_WORLD);}
400:       else
401:         {
402:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
403:                    MPI_COMM_WORLD,&status);
404:           (*fp)(vals,work,n,oprs);
405:         }
406:     }

408:   /* implement the mesh fan in/out exchange algorithm */
409:   if (my_id<floor_num_nodes)
410:     {
411:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
412:         {
413:           dest = my_id^mask;
414:           if (my_id > dest)
415:             {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
416:           else
417:             {
418:               MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
419:                        MPI_COMM_WORLD, &status);
420:               (*fp)(vals, work, n, oprs);
421:             }
422:         }

424:       mask=floor_num_nodes>>1;
425:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
426:         {
427:           if (my_id%mask)
428:             {continue;}
429: 
430:           dest = my_id^mask;
431:           if (my_id < dest)
432:             {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
433:           else
434:             {
435:               MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
436:                        MPI_COMM_WORLD, &status);
437:             }
438:         }
439:     }

441:   /* if not a hypercube must expand to partial dim */
442:   if (edge_not_pow_2)
443:     {
444:       if (my_id >= floor_num_nodes)
445:         {
446:           MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
447:                    MPI_COMM_WORLD,&status);
448:         }
449:       else
450:         {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
451:                   MPI_COMM_WORLD);}
452:     }
453: #else
454:   return;
455: #endif
456: }


459: /***********************************comm.c*************************************
460: Function: grop()

462: Input : 
463: Output: 
464: Return: 
465: Description: fan-in/out version

467: note good only for num_nodes=2^k!!!

469: ***********************************comm.c*************************************/
470: void
471: grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
472: {
473:   register int mask, edge;
474:   int type, dest;
475:   vfp fp;
476: #if defined MPISRC
477:   MPI_Status  status;
478: #elif defined NXSRC
479:   int len;
480: #endif


483: #ifdef SAFE
484:   /* ok ... should have some data, work, and operator(s) */
485:   if (!vals||!work||!oprs)
486:     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}

488:   /* non-uniform should have at least two entries */
489:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
490:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}

492:   /* check to make sure comm package has been initialized */
493:   if (!p_init)
494:     {comm_init();}
495: #endif

497:   /* if there's nothing to do return */
498:   if ((num_nodes<2)||(!n)||(dim<=0))
499:     {return;}

501:   /* the error msg says it all!!! */
502:   if (modfl_num_nodes)
503:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

505:   /* a negative number of items to send ==> fatal */
506:   if (n<0)
507:     {error_msg_fatal("grop_hc() :: n=%d<0?",n);}

509:   /* can't do more dimensions then exist */
510:   dim = MIN(dim,i_log2_num_nodes);

512:   /* advance to list of n operations for custom */
513:   if ((type=oprs[0])==NON_UNIFORM)
514:     {oprs++;}

516:   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
517:     {
518:       error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
519:       fp = (vfp) oprs;
520:     }

522: #if  defined NXSRC
523:   /* all msgs will be of the same length */
524:   len = n*REAL_LEN;

526:   /* implement the mesh fan in/out exchange algorithm */
527:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
528:     {
529:       dest = my_id^mask;
530:       if (my_id > dest)
531:         {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
532:       else
533:         {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
534:     }
535: 
536:   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
537:   if (edge==dim)
538:     {mask>>=1;}
539:   else
540:     {while (++edge<dim) {mask<<=1;}}
541: 
542:   for (edge=0; edge<dim; edge++,mask>>=1)
543:     {
544:       if (my_id%mask)
545:         {continue;}
546: 
547:       dest = my_id^mask;
548:       if (my_id < dest)
549:         {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
550:       else
551:         {crecv(MSGTAG4+dest,(char *)vals,len);}
552:     }

554: #elif defined MPISRC 
555:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
556:     {
557:       dest = my_id^mask;
558:       if (my_id > dest)
559:         {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
560:       else
561:         {
562:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
563:                    &status);
564:           (*fp)(vals, work, n, oprs);
565:         }
566:     }

568:   if (edge==dim)
569:     {mask>>=1;}
570:   else
571:     {while (++edge<dim) {mask<<=1;}}

573:   for (edge=0; edge<dim; edge++,mask>>=1)
574:     {
575:       if (my_id%mask)
576:         {continue;}
577: 
578:       dest = my_id^mask;
579:       if (my_id < dest)
580:         {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
581:       else
582:         {
583:           MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
584:                    &status);
585:         }
586:     }
587: #else
588:   return;
589: #endif
590: }


593: /***********************************comm.c*************************************
594: Function: gop()

596: Input : 
597: Output: 
598: Return: 
599: Description: fan-in/out version
600: ***********************************comm.c*************************************/
601: void
602: gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
603: {
604:   register int mask, edge;
605:   int dest;
606: #if defined MPISRC
607:   MPI_Status  status;
608:   MPI_Op op;
609: #elif defined NXSRC
610:   int len;
611: #endif


614: #ifdef SAFE
615:   /* check to make sure comm package has been initialized */
616:   if (!p_init)
617:     {comm_init();}

619:   /* ok ... should have some data, work, and operator(s) */
620:   if (!vals||!work||!fp)
621:     {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);}
622: #endif

624:   /* if there's nothing to do return */
625:   if ((num_nodes<2)||(!n))
626:     {return;}

628:   /* a negative number of items to send ==> fatal */
629:   if (n<0)
630:     {error_msg_fatal("gop() :: n=%d<0?",n);}

632: #if  defined NXSRC
633:   switch (dt) {
634:   case REAL_TYPE:
635:     len = n*REAL_LEN;
636:     break;
637:   case INT_TYPE:
638:     len = n*INT_LEN;
639:     break;
640:   default:
641:     error_msg_fatal("gop() :: unrecognized datatype!!!\n");
642:   }

644:   /* if not a hypercube must colapse partial dim */
645:   if (edge_not_pow_2)
646:     {
647:       if (my_id >= floor_num_nodes)
648:         {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
649:       else
650:         {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);}
651:     }

653:   /* implement the mesh fan in/out exchange algorithm */
654:   if (my_id<floor_num_nodes)
655:     {
656:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
657:         {
658:           dest = my_id^mask;
659:           if (my_id > dest)
660:             {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
661:           else
662:             {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);}
663:         }

665:       mask=floor_num_nodes>>1;
666:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
667:         {
668:           if (my_id%mask)
669:             {continue;}
670: 
671:           dest = my_id^mask;
672:           if (my_id < dest)
673:             {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
674:           else
675:             {crecv(MSGTAG4+dest,(char *)vals,len);}
676:         }
677:     }

679:   /* if not a hypercube must expand to partial dim */
680:   if (edge_not_pow_2)
681:     {
682:       if (my_id >= floor_num_nodes)
683:         {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
684:       else
685:         {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
686:     }

688: #elif defined MPISRC 

690:   if (comm_type==MPI)
691:     {
692:       MPI_Op_create(fp,TRUE,&op);
693:       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
694:       MPI_Op_free(&op);
695:       return;
696:     }

698:   /* if not a hypercube must colapse partial dim */
699:   if (edge_not_pow_2)
700:     {
701:       if (my_id >= floor_num_nodes)
702:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
703:                   MPI_COMM_WORLD);}
704:       else
705:         {
706:           MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
707:                    MPI_COMM_WORLD,&status);
708:           (*fp)(vals,work,&n,&dt);
709:         }
710:     }

712:   /* implement the mesh fan in/out exchange algorithm */
713:   if (my_id<floor_num_nodes)
714:     {
715:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
716:         {
717:           dest = my_id^mask;
718:           if (my_id > dest)
719:             {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
720:           else
721:             {
722:               MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
723:                        MPI_COMM_WORLD, &status);
724:               (*fp)(vals, work, &n, &dt);
725:             }
726:         }

728:       mask=floor_num_nodes>>1;
729:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
730:         {
731:           if (my_id%mask)
732:             {continue;}
733: 
734:           dest = my_id^mask;
735:           if (my_id < dest)
736:             {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
737:           else
738:             {
739:               MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
740:                        MPI_COMM_WORLD, &status);
741:             }
742:         }
743:     }

745:   /* if not a hypercube must expand to partial dim */
746:   if (edge_not_pow_2)
747:     {
748:       if (my_id >= floor_num_nodes)
749:         {
750:           MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
751:                    MPI_COMM_WORLD,&status);
752:         }
753:       else
754:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
755:                   MPI_COMM_WORLD);}
756:     }
757: #else
758:   return;
759: #endif
760: }






767: /******************************************************************************
768: Function: giop()

770: Input : 
771: Output: 
772: Return: 
773: Description: 
774:  
775: ii+1 entries in seg :: 0 .. ii

777: ******************************************************************************/
778: void 
779: ssgl_radd(register REAL *vals, register REAL *work, register int level, 
780:           register int *segs)
781: {
782:   register int edge, type, dest, mask;
783:   register int stage_n;
784: #if defined MPISRC
785:   MPI_Status  status;
786: #endif


789: #ifdef DEBUG
790:   if (level > i_log2_num_nodes)
791:     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
792: #endif

794: #ifdef SAFE
795:   /* check to make sure comm package has been initialized */
796:   if (!p_init)
797:     {comm_init();}
798: #endif


801: #if defined NXSRC
802:   /* all msgs are *NOT* the same length */
803:   /* implement the mesh fan in/out exchange algorithm */
804:   for (mask=0, edge=0; edge<level; edge++, mask++)
805:     {
806:       stage_n = (segs[level] - segs[edge]);
807:       if (stage_n && !(my_id & mask))
808:         {
809:           dest = edge_node[edge];
810:           type = MSGTAG3 + my_id + (num_nodes*edge);
811:           if (my_id>dest)
812:             {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
813:           else
814:             {
815:               type =  type - my_id + dest;
816:               crecv(type,work,stage_n*REAL_LEN);
817:               rvec_add(vals+segs[edge], work, stage_n);
818: /*            daxpy(vals+segs[edge], work, stage_n); */
819:             }
820:         }
821:       mask <<= 1;
822:     }
823:   mask>>=1;
824:   for (edge=0; edge<level; edge++)
825:     {
826:       stage_n = (segs[level] - segs[level-1-edge]);
827:       if (stage_n && !(my_id & mask))
828:         {
829:           dest = edge_node[level-edge-1];
830:           type = MSGTAG6 + my_id + (num_nodes*edge);
831:           if (my_id<dest)
832:             {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
833:           else
834:             {
835:               type =  type - my_id + dest;
836:               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
837:             }
838:         }
839:       mask >>= 1;
840:     }

842: #elif defined MPISRC
843:   /* all msgs are *NOT* the same length */
844:   /* implement the mesh fan in/out exchange algorithm */
845:   for (mask=0, edge=0; edge<level; edge++, mask++)
846:     {
847:       stage_n = (segs[level] - segs[edge]);
848:       if (stage_n && !(my_id & mask))
849:         {
850:           dest = edge_node[edge];
851:           type = MSGTAG3 + my_id + (num_nodes*edge);
852:           if (my_id>dest)
853: /*            {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
854:           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
855:                       MPI_COMM_WORLD);}
856:           else
857:             {
858:               type =  type - my_id + dest;
859: /*              crecv(type,work,stage_n*REAL_LEN); */
860:               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
861:                        MPI_COMM_WORLD,&status);
862:               rvec_add(vals+segs[edge], work, stage_n);
863: /*            daxpy(vals+segs[edge], work, stage_n); */
864:             }
865:         }
866:       mask <<= 1;
867:     }
868:   mask>>=1;
869:   for (edge=0; edge<level; edge++)
870:     {
871:       stage_n = (segs[level] - segs[level-1-edge]);
872:       if (stage_n && !(my_id & mask))
873:         {
874:           dest = edge_node[level-edge-1];
875:           type = MSGTAG6 + my_id + (num_nodes*edge);
876:           if (my_id<dest)
877: /*            {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
878:             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
879:                       MPI_COMM_WORLD);}
880:           else
881:             {
882:               type =  type - my_id + dest;
883: /*               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
884:               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
885:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
886:             }
887:         }
888:       mask >>= 1;
889:     }
890: #else
891:   return;
892: #endif
893: }



897: /***********************************comm.c*************************************
898: Function: grop_hc_vvl()

900: Input : 
901: Output: 
902: Return: 
903: Description: fan-in/out version

905: note good only for num_nodes=2^k!!!

907: ***********************************comm.c*************************************/
908: void
909: grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
910: {
911:   register int mask, edge, n;
912:   int type, dest;
913:   vfp fp;
914: #if defined MPISRC
915:   MPI_Status  status;
916: #elif defined NXSRC
917:   int len;
918: #endif


921:   error_msg_fatal("grop_hc_vvl() :: is not working!\n");

923: #ifdef SAFE
924:   /* ok ... should have some data, work, and operator(s) */
925:   if (!vals||!work||!oprs||!segs)
926:     {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}

928:   /* non-uniform should have at least two entries */
929: #if defined(not_used)
930:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
931:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
932: #endif

934:   /* check to make sure comm package has been initialized */
935:   if (!p_init)
936:     {comm_init();}
937: #endif

939:   /* if there's nothing to do return */
940:   if ((num_nodes<2)||(dim<=0))
941:     {return;}

943:   /* the error msg says it all!!! */
944:   if (modfl_num_nodes)
945:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

947:   /* can't do more dimensions then exist */
948:   dim = MIN(dim,i_log2_num_nodes);

950:   /* advance to list of n operations for custom */
951:   if ((type=oprs[0])==NON_UNIFORM)
952:     {oprs++;}

954:   if ((fp = (vfp) rvec_fct_addr(type)) == NULL)
955:     {
956:       error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
957:       fp = (vfp) oprs;
958:     }

960: #if  defined NXSRC
961:   /* all msgs are *NOT* the same length */
962:   /* implement the mesh fan in/out exchange algorithm */
963:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
964:     {
965:       n = segs[dim]-segs[edge];
966:       /*
967:       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
968:                         edge,segs[edge]);
969:       */
970:       len = n*REAL_LEN;
971:       dest = my_id^mask;
972:       if (my_id > dest)
973:         {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;}
974:       else
975:         {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);}
976:     }

978:   if (edge==dim)
979:     {mask>>=1;}
980:   else
981:     {while (++edge<dim) {mask<<=1;}}

983:   for (edge=0; edge<dim; edge++,mask>>=1)
984:     {
985:       if (my_id%mask)
986:         {continue;}
987:       len = (segs[dim]-segs[dim-1-edge])*REAL_LEN;
988:       /*
989:       error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim],
990:                          dim-1-edge,segs[dim-1-edge]);
991:       */
992:       dest = my_id^mask;
993:       if (my_id < dest)
994:         {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);}
995:       else
996:         {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);}
997:     }

999: #elif defined MPISRC 
1000:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
1001:     {
1002:       n = segs[dim]-segs[edge];
1003:       dest = my_id^mask;
1004:       if (my_id > dest)
1005:         {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1006:       else
1007:         {
1008:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
1009:                    MPI_COMM_WORLD, &status);
1010:           (*fp)(vals+segs[edge], work, n, oprs);
1011:         }
1012:     }

1014:   if (edge==dim)
1015:     {mask>>=1;}
1016:   else
1017:     {while (++edge<dim) {mask<<=1;}}

1019:   for (edge=0; edge<dim; edge++,mask>>=1)
1020:     {
1021:       if (my_id%mask)
1022:         {continue;}
1023: 
1024:       n = (segs[dim]-segs[dim-1-edge]);
1025: 
1026:       dest = my_id^mask;
1027:       if (my_id < dest)
1028:         {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
1029:                   MPI_COMM_WORLD);}
1030:       else
1031:         {
1032:           MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
1033:                    MSGTAG4+dest,MPI_COMM_WORLD, &status);
1034:         }
1035:     }
1036: #else
1037:   return;
1038: #endif
1039: }


1042: #ifdef INPROG

1044: /***********************************comm.c*************************************
1045: Function: proc_sync()

1047: Input : 
1048: Output: 
1049: Return: 
1050: Description: hc bassed version
1051: ***********************************comm.c*************************************/
1052: void
1053: proc_sync()
1054: {
1055:   register int mask, edge;
1056:   int type, dest;
1057: #if defined MPISRC
1058:   MPI_Status  status;
1059: #endif


1062: #ifdef DEBUG
1063:   error_msg_warning("begin proc_sync()\n");
1064: #endif

1066: #ifdef SAFE
1067:   /* check to make sure comm package has been initialized */
1068:   if (!p_init)
1069:     {comm_init();}
1070: #endif

1072: #if  defined NXSRC
1073:   /* all msgs will be of zero length */

1075:   /* if not a hypercube must colapse partial dim */
1076:   if (edge_not_pow_2)
1077:     {
1078:       if (my_id >= floor_num_nodes)
1079:         {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);}
1080:       else
1081:         {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);}
1082:     }

1084:   /* implement the mesh fan in/out exchange algorithm */
1085:   if (my_id<floor_num_nodes)
1086:     {
1087:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1088:         {
1089:           dest = my_id^mask;
1090:           if (my_id > dest)
1091:             {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
1092:           else
1093:             {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
1094:         }

1096:       mask=floor_num_nodes>>1;
1097:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1098:         {
1099:           if (my_id%mask)
1100:             {continue;}
1101: 
1102:           dest = my_id^mask;
1103:           if (my_id < dest)
1104:             {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
1105:           else
1106:             {crecv(MSGTAG4+dest,(char *)vals,len);}
1107:         }
1108:     }

1110:   /* if not a hypercube must expand to partial dim */
1111:   if (edge_not_pow_2)
1112:     {
1113:       if (my_id >= floor_num_nodes)
1114:         {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);}
1115:       else
1116:         {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);}
1117:     }

1119: #elif defined MPISRC
1120:   /* all msgs will be of the same length */
1121:   /* if not a hypercube must colapse partial dim */
1122:   if (edge_not_pow_2)
1123:     {
1124:       if (my_id >= floor_num_nodes)
1125:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
1126:       else
1127:         {
1128:           MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
1129:                    MPI_COMM_WORLD,&status);
1130:           (*fp)(vals,work,n,oprs);
1131:         }
1132:     }

1134:   /* implement the mesh fan in/out exchange algorithm */
1135:   if (my_id<floor_num_nodes)
1136:     {
1137:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
1138:         {
1139:           dest = my_id^mask;
1140:           if (my_id > dest)
1141:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
1142:           else
1143:             {
1144:               MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
1145:                        MPI_COMM_WORLD, &status);
1146:               (*fp)(vals, work, n, oprs);
1147:             }
1148:         }

1150:       mask=floor_num_nodes>>1;
1151:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
1152:         {
1153:           if (my_id%mask)
1154:             {continue;}
1155: 
1156:           dest = my_id^mask;
1157:           if (my_id < dest)
1158:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
1159:           else
1160:             {
1161:               MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
1162:                        MPI_COMM_WORLD, &status);
1163:             }
1164:         }
1165:     }

1167:   /* if not a hypercube must expand to partial dim */
1168:   if (edge_not_pow_2)
1169:     {
1170:       if (my_id >= floor_num_nodes)
1171:         {
1172:           MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
1173:                    MPI_COMM_WORLD,&status);
1174:         }
1175:       else
1176:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
1177:     }
1178: #else
1179:   return;
1180: #endif
1181: }
1182: #endif


1185: /* hmt hack */
1186: int 
1187: hmt_xor_ (register int *i1, register int *i2)
1188: {
1189:   return(*i1^*i2);
1190: }


1193: /******************************************************************************
1194: Function: giop()

1196: Input : 
1197: Output: 
1198: Return: 
1199: Description: 
1200:  
1201: ii+1 entries in seg :: 0 .. ii

1203: ******************************************************************************/
1204: void 
1205: staged_gs_ (register REAL *vals, register REAL *work, register int *level,
1206:             register int *segs)
1207: {
1208:   ssgl_radd(vals, work, *level, segs);
1209: }

1211: /******************************************************************************
1212: Function: giop()

1214: Input : 
1215: Output: 
1216: Return: 
1217: Description: 
1218: ******************************************************************************/
1219: void
1220: staged_iadd_ (int *gl_num, int *level)
1221: {
1222:   sgl_iadd(gl_num,*level);
1223: }



1227: /******************************************************************************
1228: Function: giop()

1230: Input : 
1231: Output: 
1232: Return: 
1233: Description: 
1234: ******************************************************************************/
1235: static void
1236: sgl_iadd(int *vals, int level)
1237: {
1238:   int edge, type, dest, source, len, mask = -1;
1239:   int tmp, *work;


1242: #ifdef SAFE
1243:   /* check to make sure comm package has been initialized */
1244:   if (!p_init)
1245:     {comm_init();}
1246: #endif


1249:   /* all msgs will be of the same length */
1250:   work = &tmp;
1251:   len = INT_LEN;

1253:   if (level > i_log2_num_nodes)
1254:     {error_msg_fatal("sgl_add() :: level too big?");}

1256:   if (level<=0)
1257:     {return;}

1259: #if defined NXSRC
1260:   {
1261:     long msg_id;
1262:     /* implement the mesh fan in/out exchange algorithm */
1263:     if (my_id<floor_num_nodes)
1264:       {
1265:         mask = 0;
1266:         for (edge = 0; edge < level; edge++)
1267:           {
1268:             if (!(my_id & mask))
1269:               {
1270:                 source = dest = edge_node[edge];
1271:                 type = MSGTAG1 + my_id + (num_nodes*edge);
1272:                 if (my_id > dest)
1273:                   {
1274:                     msg_id = isend(type,vals,len,dest,0);
1275:                     msgwait(msg_id);
1276:                   }
1277:                 else
1278:                   {
1279:                     type =  type - my_id + source;
1280:                     msg_id = irecv(type,work,len);
1281:                     msgwait(msg_id);
1282:                     vals[0] += work[0];
1283:                   }
1284:               }
1285:             mask <<= 1;
1286:             mask += 1;
1287:           }
1288:       }
1289: 
1290:     if (my_id<floor_num_nodes)
1291:       {
1292:         mask >>= 1;
1293:         for (edge = 0; edge < level; edge++)
1294:           {
1295:             if (!(my_id & mask))
1296:               {
1297:                 source = dest = edge_node[level-edge-1];
1298:                 type = MSGTAG1 + my_id + (num_nodes*edge);
1299:                 if (my_id < dest)
1300:                   {
1301:                     msg_id = isend(type,vals,len,dest,0);
1302:                     msgwait(msg_id);
1303:                   }
1304:                 else
1305:                   {
1306:                     type =  type - my_id + source;
1307:                     msg_id = irecv(type,work,len);
1308:                     msgwait(msg_id);
1309:                     vals[0] = work[0];
1310:                   }
1311:               }
1312:             mask >>= 1;
1313:           }
1314:       }
1315:   }
1316: #elif defined MPISRC
1317:   {
1318:     MPI_Request msg_id;
1319:     MPI_Status status;
1320: 
1321:     /* implement the mesh fan in/out exchange algorithm */
1322:     if (my_id<floor_num_nodes)
1323:       {
1324:         mask = 0;
1325:         for (edge = 0; edge < level; edge++)
1326:           {
1327:             if (!(my_id & mask))
1328:               {
1329:                 source = dest = edge_node[edge];
1330:                 type = MSGTAG1 + my_id + (num_nodes*edge);
1331:                 if (my_id > dest)
1332:                   {
1333:                     MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1334:                               &msg_id);
1335:                     MPI_Wait(&msg_id,&status);
1336:                     /* msg_id = isend(type,vals,len,dest,0);
1337:                        msgwait(msg_id); */
1338:                   }
1339:                 else
1340:                   {
1341:                     type =  type - my_id + source;
1342:                     MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1343:                               type,MPI_COMM_WORLD,&msg_id);
1344:                     MPI_Wait(&msg_id,&status);
1345:                     /* msg_id = irecv(type,work,len);
1346:                        msgwait(msg_id); */
1347:                     vals[0] += work[0];
1348:                   }
1349:               }
1350:             mask <<= 1;
1351:             mask += 1;
1352:           }
1353:       }
1354: 
1355:     if (my_id<floor_num_nodes)
1356:       {
1357:         mask >>= 1;
1358:         for (edge = 0; edge < level; edge++)
1359:           {
1360:             if (!(my_id & mask))
1361:               {
1362:                 source = dest = edge_node[level-edge-1];
1363:                 type = MSGTAG1 + my_id + (num_nodes*edge);
1364:                 if (my_id < dest)
1365:                   {
1366:                     MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1367:                               &msg_id);
1368:                     MPI_Wait(&msg_id,&status);
1369:                     /* msg_id = isend(type,vals,len,dest,0);
1370:                     msgwait(msg_id);*/
1371:                   }
1372:                 else
1373:                   {
1374:                     type =  type - my_id + source;
1375:                     MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1376:                               type,MPI_COMM_WORLD,&msg_id);
1377:                     MPI_Wait(&msg_id,&status);
1378:                     /* msg_id = irecv(type,work,len);
1379:                     msgwait(msg_id); */
1380:                     vals[0] = work[0];
1381:                   }
1382:               }
1383:             mask >>= 1;
1384:           }
1385:       }
1386:   }
1387: #else
1388:   return;
1389: #endif

1391: }



1395: /******************************************************************************
1396: Function: giop()

1398: Input : 
1399: Output: 
1400: Return: 
1401: Description: 
1402: ******************************************************************************/
1403: void
1404: staged_radd_ (double *gl_num, int *level)
1405: {
1406:   sgl_radd(gl_num,*level);
1407: }

1409: /******************************************************************************
1410: Function: giop()

1412: Input : 
1413: Output: 
1414: Return: 
1415: Description: 
1416: ******************************************************************************/
1417: static void
1418: sgl_radd(double *vals, int level)
1419: {
1420: #if defined NXSRC
1421:   int edge, type, dest, source, len, mask;
1422:   double tmp, *work;
1423: #endif

1425: #ifdef SAFE
1426:   /* check to make sure comm package has been initialized */
1427:   if (!p_init)
1428:     {comm_init();}
1429: #endif


1432: #if defined NXSRC
1433:   {
1434:     long msg_id;
1435: 
1436:     /* all msgs will be of the same length */
1437:     work = &tmp;
1438:     len = REAL_LEN;
1439: 
1440:     if (level > i_log2_num_nodes)
1441:       {error_msg_fatal("sgl_add() :: level too big?");}
1442: 
1443:     if (level<=0)
1444:       {return;}
1445: 
1446:     /* implement the mesh fan in/out exchange algorithm */
1447:     if (my_id<floor_num_nodes)
1448:       {
1449:         mask = 0;
1450:         for (edge = 0; edge < level; edge++)
1451:           {
1452:             if (!(my_id & mask))
1453:               {
1454:                 source = dest = edge_node[edge];
1455:                 type = MSGTAG3 + my_id + (num_nodes*edge);
1456:                 if (my_id > dest)
1457:                   {
1458:                     msg_id = isend(type,vals,len,dest,0);
1459:                     msgwait(msg_id);
1460:                   }
1461:                 else
1462:                   {
1463:                     type =  type - my_id + source;
1464:                     msg_id = irecv(type,work,len);
1465:                     msgwait(msg_id);
1466:                     vals[0] += work[0];
1467:                   }
1468:               }
1469:             mask <<= 1;
1470:             mask += 1;
1471:           }
1472:       }
1473: 
1474:     if (my_id<floor_num_nodes)
1475:       {
1476:         mask >>= 1;
1477:         for (edge = 0; edge < level; edge++)
1478:           {
1479:             if (!(my_id & mask))
1480:               {
1481:                 source = dest = edge_node[level-edge-1];
1482:                 type = MSGTAG3 + my_id + (num_nodes*edge);
1483:                 if (my_id < dest)
1484:                   {
1485:                     msg_id = isend(type,vals,len,dest,0);
1486:                     msgwait(msg_id);
1487:                   }
1488:                 else
1489:                   {
1490:                     type =  type - my_id + source;
1491:                     msg_id = irecv(type,work,len);
1492:                     msgwait(msg_id);
1493:                     vals[0] = work[0];
1494:                   }
1495:               }
1496:             mask >>= 1;
1497:           }
1498:       }
1499:   }
1500: #elif defined MRISRC
1501:   {
1502:     MPI_Request msg_id;
1503:     MPI_Status status;
1504: 
1505:     /* all msgs will be of the same length */
1506:     work = &tmp;
1507:     len = REAL_LEN;
1508: 
1509:     if (level > i_log2_num_nodes)
1510:       {error_msg_fatal("sgl_add() :: level too big?");}
1511: 
1512:     if (level<=0)
1513:       {return;}
1514: 
1515:     /* implement the mesh fan in/out exchange algorithm */
1516:     if (my_id<floor_num_nodes)
1517:       {
1518:         mask = 0;
1519:         for (edge = 0; edge < level; edge++)
1520:           {
1521:             if (!(my_id & mask))
1522:               {
1523:                 source = dest = edge_node[edge];
1524:                 type = MSGTAG3 + my_id + (num_nodes*edge);
1525:                 if (my_id > dest)
1526:                   {
1527:                     MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1528:                               &msg_id);
1529:                     MPI_Wait(&msg_id,&status);
1530:                     /*msg_id = isend(type,vals,len,dest,0);
1531:                     msgwait(msg_id);*/
1532:                   }
1533:                 else
1534:                   {
1535:                     type =  type - my_id + source;
1536:                     MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1537:                               type,MPI_COMM_WORLD,&msg_id);
1538:                     MPI_Wait(&msg_id,&status);
1539:                     /*msg_id = irecv(type,work,len);
1540:                     msgwait(msg_id); */
1541:                     vals[0] += work[0];
1542:                   }
1543:               }
1544:             mask <<= 1;
1545:             mask += 1;
1546:           }
1547:       }
1548: 
1549:     if (my_id<floor_num_nodes)
1550:       {
1551:         mask >>= 1;
1552:         for (edge = 0; edge < level; edge++)
1553:           {
1554:             if (!(my_id & mask))
1555:               {
1556:                 source = dest = edge_node[level-edge-1];
1557:                 type = MSGTAG3 + my_id + (num_nodes*edge);
1558:                 if (my_id < dest)
1559:                   {
1560:                     MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1561:                               &msg_id);
1562:                     MPI_Wait(&msg_id,&status);
1563:                     /* msg_id = isend(type,vals,len,dest,0);
1564:                     msgwait(msg_id); */
1565:                   }
1566:                 else
1567:                   {
1568:                     type =  type - my_id + source;
1569:                     MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE,
1570:                               type,MPI_COMM_WORLD,&msg_id);
1571:                     MPI_Wait(&msg_id,&status);
1572:                     /*msg_id = irecv(type,work,len);
1573:                     msgwait(msg_id); */
1574:                     vals[0] = work[0];
1575:                   }
1576:               }
1577:             mask >>= 1;
1578:           }
1579:       }
1580:   }
1581: #else
1582:   return;
1583: #endif
1584: }



1588: /******************************************************************************
1589: Function: giop()

1591: Input : 
1592: Output: 
1593: Return: 
1594: Description: 
1595:  
1596: ii+1 entries in seg :: 0 .. ii
1597: ******************************************************************************/
1598: void 
1599: hmt_concat_ (REAL *vals, REAL *work, int *size)
1600: {
1601:   hmt_concat(vals, work, *size);
1602: }



1606: /******************************************************************************
1607: Function: giop()

1609: Input : 
1610: Output: 
1611: Return: 
1612: Description: 
1613:  
1614: ii+1 entries in seg :: 0 .. ii

1616: ******************************************************************************/
1617: static void 
1618: hmt_concat(REAL *vals, REAL *work, int size)
1619: {
1620: #if defined NXSRC  
1621:   int  mask,stage_n;
1622:   int edge, type, dest, source, len;
1623:   int offset;
1624:   double *dptr;
1625: #endif

1627: #ifdef SAFE
1628:   /* check to make sure comm package has been initialized */
1629:   if (!p_init)
1630:     {comm_init();}
1631: #endif

1633:   /* all msgs are *NOT* the same length */
1634:   /* implement the mesh fan in/out exchange algorithm */
1635:   rvec_copy(work,vals,size);

1637: #if defined NXSRC  
1638:   mask = 0;
1639:   stage_n = size;
1640:   {
1641:     long msg_id;
1642: 
1643:     dptr  = work+size;
1644:     for (edge = 0; edge < i_log2_num_nodes; edge++)
1645:       {
1646:         len = stage_n * REAL_LEN;
1647: 
1648:         if (!(my_id & mask))
1649:           {
1650:             source = dest = edge_node[edge];
1651:             type = MSGTAG3 + my_id + (num_nodes*edge);
1652:             if (my_id > dest)
1653:               {
1654:                 msg_id = isend(type, work, len,dest,0);
1655:                 msgwait(msg_id);
1656:               }
1657:             else
1658:               {
1659:                 type =  type - my_id + source;
1660:                 msg_id = irecv(type, dptr,len);
1661:                 msgwait(msg_id);
1662:               }
1663:           }
1664: 
1665: #ifdef DEBUG_1      
1666:         ierror_msg_warning_n0("stage_n = ",stage_n);
1667: #endif

1669:         dptr += stage_n;
1670:         stage_n <<=1;
1671:         mask <<= 1;
1672:         mask += 1;
1673:       }
1674: 
1675:     size = stage_n;
1676:     stage_n >>=1;
1677:     dptr -= stage_n;
1678: 
1679:     mask >>= 1;
1680: 
1681:     for (edge = 0; edge < i_log2_num_nodes; edge++)
1682:       {
1683:         len = (size-stage_n) * REAL_LEN;
1684: 
1685:         if (!(my_id & mask) && stage_n)
1686:           {
1687:             source = dest = edge_node[i_log2_num_nodes-edge-1];
1688:             type = MSGTAG6 + my_id + (num_nodes*edge);
1689:             if (my_id < dest)
1690:               {
1691:                 msg_id = isend(type,dptr,len,dest,0);
1692:                 msgwait(msg_id);
1693:               }
1694:             else
1695:               {
1696:                 type =  type - my_id + source;
1697:                 msg_id = irecv(type,dptr,len);
1698:                 msgwait(msg_id);
1699:               }
1700:           }
1701: 
1702: #ifdef DEBUG_1      
1703:         ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1704: #endif
1705: 
1706:         stage_n >>= 1;
1707:         dptr -= stage_n;
1708:         mask >>= 1;
1709:       }
1710:   }
1711: #elif defined MRISRC
1712:   {
1713:     MPI_Request msg_id;
1714:     MPI_Status status;
1715: 
1716:     dptr  = work+size;
1717:     for (edge = 0; edge < i_log2_num_nodes; edge++)
1718:       {
1719:         len = stage_n * REAL_LEN;
1720: 
1721:         if (!(my_id & mask))
1722:           {
1723:             source = dest = edge_node[edge];
1724:             type = MSGTAG3 + my_id + (num_nodes*edge);
1725:             if (my_id > dest)
1726:               {
1727:                 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1728:                           &msg_id);
1729:                 MPI_Wait(&msg_id,&status);
1730:                 /*msg_id = isend(type, work, len,dest,0);
1731:                   msgwait(msg_id);*/
1732:               }
1733:             else
1734:               {
1735:                 type =  type - my_id + source;
1736:                 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1737:                           type,MPI_COMM_WORLD,&msg_id);
1738:                 MPI_Wait(&msg_id,&status);
1739:                 /* msg_id = irecv(type, dptr,len);
1740:                 msgwait(msg_id);*/
1741:               }
1742:           }
1743: 
1744: #ifdef DEBUG_1      
1745:         ierror_msg_warning_n0("stage_n = ",stage_n);
1746: #endif

1748:         dptr += stage_n;
1749:         stage_n <<=1;
1750:         mask <<= 1;
1751:         mask += 1;
1752:       }
1753: 
1754:     size = stage_n;
1755:     stage_n >>=1;
1756:     dptr -= stage_n;
1757: 
1758:     mask >>= 1;
1759: 
1760:     for (edge = 0; edge < i_log2_num_nodes; edge++)
1761:       {
1762:         len = (size-stage_n) * REAL_LEN;
1763: 
1764:         if (!(my_id & mask) && stage_n)
1765:           {
1766:             source = dest = edge_node[i_log2_num_nodes-edge-1];
1767:             type = MSGTAG6 + my_id + (num_nodes*edge);
1768:             if (my_id < dest)
1769:               {
1770:                 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD,
1771:                           &msg_id);
1772:                 MPI_Wait(&msg_id,&status);
1773:                 /*msg_id = isend(type, work, len,dest,0);
1774:                 msg_id = isend(type,dptr,len,dest,0); */
1775:                 msgwait(msg_id);
1776:               }
1777:             else
1778:               {
1779:                 type =  type - my_id + source;
1780:                 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE,
1781:                           type,MPI_COMM_WORLD,&msg_id);
1782:                 MPI_Wait(&msg_id,&status);
1783:                 /*msg_id = irecv(type,dptr,len);
1784:                 msgwait(msg_id);*/
1785:               }
1786:           }
1787: 
1788: #ifdef DEBUG_1      
1789:         ierror_msg_warning_n0("size-stage_n = ",size-stage_n);
1790: #endif
1791: 
1792:         stage_n >>= 1;
1793:         dptr -= stage_n;
1794:         mask >>= 1;
1795:       }
1796:   }
1797: #else
1798:   return;
1799: #endif
1800: }



1804: /******************************************************************************
1805: Function: giop()

1807: Input : 
1808: Output: 
1809: Return: 
1810: Description: 
1811:  
1812: ii+1 entries in seg :: 0 .. ii

1814: ******************************************************************************/
1815: void
1816: new_ssgl_radd(register REAL *vals, register REAL *work, register int level,
1817: #if defined MPISRC
1818:           register int *segs, MPI_Comm ssgl_comm)
1819: #else
1820:           register int *segs)
1821: #endif
1822: {
1823:   register int edge, type, dest, mask;
1824:   register int stage_n;
1825: #if defined MPISRC
1826:   MPI_Status  status;
1827: #endif


1830: #ifdef DEBUG
1831:   if (level > i_log2_num_nodes)
1832:     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
1833: #endif

1835: #ifdef SAFE
1836:   /* check to make sure comm package has been initialized */
1837:   if (!p_init)
1838:     {comm_init();}
1839: #endif


1842: #if defined NXSRC
1843:   /* all msgs are *NOT* the same length */
1844:   /* implement the mesh fan in/out exchange algorithm */
1845:   for (mask=0, edge=0; edge<level; edge++, mask++)
1846:     {
1847:       stage_n = (segs[level] - segs[edge]);
1848:       if (stage_n && !(my_id & mask))
1849:         {
1850:           dest = edge_node[edge];
1851:           type = MSGTAG3 + my_id + (num_nodes*edge);
1852:           if (my_id>dest)
1853:             {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);}
1854:           else
1855:             {
1856:               type =  type - my_id + dest;
1857:               crecv(type,work,stage_n*REAL_LEN);
1858:               rvec_add(vals+segs[edge], work, stage_n);
1859: /*            daxpy(vals+segs[edge], work, stage_n); */
1860:             }
1861:         }
1862:       mask <<= 1;
1863:     }
1864:   mask>>=1;
1865:   for (edge=0; edge<level; edge++)
1866:     {
1867:       stage_n = (segs[level] - segs[level-1-edge]);
1868:       if (stage_n && !(my_id & mask))
1869:         {
1870:           dest = edge_node[level-edge-1];
1871:           type = MSGTAG6 + my_id + (num_nodes*edge);
1872:           if (my_id<dest)
1873:             {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);}
1874:           else
1875:             {
1876:               type =  type - my_id + dest;
1877:               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN);
1878:             }
1879:         }
1880:       mask >>= 1;
1881:     }

1883: #elif defined MPISRC
1884:   /* all msgs are *NOT* the same length */
1885:   /* implement the mesh fan in/out exchange algorithm */
1886:   for (mask=0, edge=0; edge<level; edge++, mask++)
1887:     {
1888:       stage_n = (segs[level] - segs[edge]);
1889:       if (stage_n && !(my_id & mask))
1890:         {
1891:           dest = edge_node[edge];
1892:           type = MSGTAG3 + my_id + (num_nodes*edge);
1893:           if (my_id>dest)
1894: /*            {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
1895:           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
1896:                       ssgl_comm);}
1897:           else
1898:             {
1899:               type =  type - my_id + dest;
1900: /*              crecv(type,work,stage_n*REAL_LEN); */
1901:               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
1902:                        ssgl_comm,&status);
1903:               rvec_add(vals+segs[edge], work, stage_n);
1904: /*            daxpy(vals+segs[edge], work, stage_n); */
1905:             }
1906:         }
1907:       mask <<= 1;
1908:     }
1909:   mask>>=1;
1910:   for (edge=0; edge<level; edge++)
1911:     {
1912:       stage_n = (segs[level] - segs[level-1-edge]);
1913:       if (stage_n && !(my_id & mask))
1914:         {
1915:           dest = edge_node[level-edge-1];
1916:           type = MSGTAG6 + my_id + (num_nodes*edge);
1917:           if (my_id<dest)
1918: /*            {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
1919:             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
1920:                       ssgl_comm);}
1921:           else
1922:             {
1923:               type =  type - my_id + dest;
1924: /*               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
1925:               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
1926:                        MPI_ANY_SOURCE,type,ssgl_comm,&status);
1927:             }
1928:         }
1929:       mask >>= 1;
1930:     }
1931: #else
1932:   return;
1933: #endif
1934: }



1938: /***********************************comm.c*************************************
1939: Function: giop()

1941: Input : 
1942: Output: 
1943: Return: 
1944: Description: fan-in/out version

1946: note good only for num_nodes=2^k!!!

1948: ***********************************comm.c*************************************/
1949: void
1950: giop_hc(int *vals, int *work, int n, int *oprs, int dim)
1951: {
1952:   register int mask, edge;
1953:   int type, dest;
1954:   vfp fp;
1955: #if defined MPISRC
1956:   MPI_Status  status;
1957: #elif defined NXSRC
1958:   int len;
1959: #endif


1962: #ifdef SAFE
1963:   /* ok ... should have some data, work, and operator(s) */
1964:   if (!vals||!work||!oprs)
1965:     {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);}

1967:   /* non-uniform should have at least two entries */
1968:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
1969:     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}

1971:   /* check to make sure comm package has been initialized */
1972:   if (!p_init)
1973:     {comm_init();}
1974: #endif

1976:   /* if there's nothing to do return */
1977:   if ((num_nodes<2)||(!n)||(dim<=0))
1978:     {return;}

1980:   /* the error msg says it all!!! */
1981:   if (modfl_num_nodes)
1982:     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}

1984:   /* a negative number of items to send ==> fatal */
1985:   if (n<0)
1986:     {error_msg_fatal("giop_hc() :: n=%d<0?",n);}

1988:   /* can't do more dimensions then exist */
1989:   dim = MIN(dim,i_log2_num_nodes);

1991:   /* advance to list of n operations for custom */
1992:   if ((type=oprs[0])==NON_UNIFORM)
1993:     {oprs++;}

1995:   if ((fp = (vfp) ivec_fct_addr(type)) == NULL)
1996:     {
1997:       error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
1998:       fp = (vfp) oprs;
1999:     }

2001: #if  defined NXSRC
2002:   /* all msgs will be of the same length */
2003:   len = n*INT_LEN;

2005:   /* implement the mesh fan in/out exchange algorithm */
2006:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2007:     {
2008:       dest = my_id^mask;
2009:       if (my_id > dest)
2010:         {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;}
2011:       else
2012:         {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);}
2013:     }
2014: 
2015:   /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */
2016:   if (edge==dim)
2017:     {mask>>=1;}
2018:   else
2019:     {while (++edge<dim) {mask<<=1;}}
2020: 
2021:   for (edge=0; edge<dim; edge++,mask>>=1)
2022:     {
2023:       if (my_id%mask)
2024:         {continue;}
2025: 
2026:       dest = my_id^mask;
2027:       if (my_id < dest)
2028:         {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);}
2029:       else
2030:         {crecv(MSGTAG4+dest,(char *)vals,len);}
2031:     }

2033: #elif defined MPISRC 
2034:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
2035:     {
2036:       dest = my_id^mask;
2037:       if (my_id > dest)
2038:         {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
2039:       else
2040:         {
2041:           MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
2042:                    &status);
2043:           (*fp)(vals, work, n, oprs);
2044:         }
2045:     }

2047:   if (edge==dim)
2048:     {mask>>=1;}
2049:   else
2050:     {while (++edge<dim) {mask<<=1;}}

2052:   for (edge=0; edge<dim; edge++,mask>>=1)
2053:     {
2054:       if (my_id%mask)
2055:         {continue;}
2056: 
2057:       dest = my_id^mask;
2058:       if (my_id < dest)
2059:         {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
2060:       else
2061:         {
2062:           MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
2063:                    &status);
2064:         }
2065:     }
2066: #else
2067:   return;
2068: #endif
2069: }