Actual source code: gs.c

  1: /*$Id: gs.c,v 1.6 2001/05/10 18:50:11 balay Exp $*/
  2: /***********************************gs.c***************************************
  3: SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 6.21.97
 16: ************************************gs.c**************************************/

 18: /***********************************gs.c***************************************
 19: File Description:
 20: -----------------

 22: ************************************gs.c**************************************/
 23: #include <stdio.h>
 24: #include <math.h>
 25: #include <float.h>
 26: #include <limits.h>


 29: #if   defined NXSRC
 30: #ifndef DELTA
 31: #include <nx.h>
 32: #endif

 34: #elif defined MPISRC
 35: #include <mpi.h>
 36: #endif

 38: #include "petscconf.h"
 39: #if defined(PETSC_HAVE_STRINGS_H)
 40: #include <strings.h>
 41: #endif
 42: #if defined(PETSC_HAVE_STRING_H)
 43: #include <string.h>
 44: #endif

 46:  #include const.h
 47:  #include types.h
 48:  #include comm.h
 49:  #include ivec.h
 50: #include "bss_malloc.h"
 51: #include "bit_mask.h"
 52:  #include error.h
 53:  #include queue.h
 54:  #include blas.h
 55:  #include gs.h

 57: /* default length of number of items via tree - doubles if exceeded */
 58: #define TREE_BUF_SZ 2048;
 59: #define GS_VEC_SZ   1



 63: /***********************************gs.c***************************************
 64: Type: struct gather_scatter_id 
 65: ------------------------------

 67: ************************************gs.c**************************************/
 68: typedef struct gather_scatter_id {
 69:   int id;
 70:   int nel_min;
 71:   int nel_max;
 72:   int nel_sum;
 73:   int negl;
 74:   int gl_max;
 75:   int gl_min;
 76:   int repeats;
 77:   int ordered;
 78:   int positive;
 79:   REAL *vals;

 81:   /* bit mask info */
 82:   int *my_proc_mask;
 83:   int mask_sz;
 84:   int *ngh_buf;
 85:   int ngh_buf_sz;
 86:   int *nghs;
 87:   int num_nghs;
 88:   int max_nghs;
 89:   int *pw_nghs;
 90:   int num_pw_nghs;
 91:   int *tree_nghs;
 92:   int num_tree_nghs;

 94:   int num_loads;

 96:   /* repeats == true -> local info */
 97:   int nel;         /* number of unique elememts */
 98:   int *elms;       /* of size nel */
 99:   int nel_total;
100:   int *local_elms; /* of size nel_total */
101:   int *companion;  /* of size nel_total */

103:   /* local info */
104:   int num_local_total;
105:   int local_strength;
106:   int num_local;
107:   int *num_local_reduce;
108:   int **local_reduce;
109:   int num_local_gop;
110:   int *num_gop_local_reduce;
111:   int **gop_local_reduce;

113:   /* pairwise info */
114:   int level;
115:   int num_pairs;
116:   int max_pairs;
117:   int loc_node_pairs;
118:   int max_node_pairs;
119:   int min_node_pairs;
120:   int avg_node_pairs;
121:   int *pair_list;
122:   int *msg_sizes;
123:   int **node_list;
124:   int len_pw_list;
125:   int *pw_elm_list;
126:   REAL *pw_vals;

128: #ifdef MPISRC
129:   MPI_Request *msg_ids_in;
130:   MPI_Request *msg_ids_out;

132: #else
133:   int *msg_ids_in;
134:   int *msg_ids_out;

136: #endif

138:   REAL *out;
139:   REAL *in;
140:   int msg_total;

142:   /* tree - crystal accumulator info */
143:   int max_left_over;
144:   int *pre;
145:   int *in_num;
146:   int *out_num;
147:   int **in_list;
148:   int **out_list;

150:   /* new tree work*/
151:   int  tree_nel;
152:   int *tree_elms;
153:   REAL *tree_buf;
154:   REAL *tree_work;

156:   int  tree_map_sz;
157:   int *tree_map_in;
158:   int *tree_map_out;

160:   /* current memory status */
161:   int gl_bss_min;
162:   int gl_perm_min;

164:   /* max segment size for gs_gop_vec() */
165:   int vec_sz;

167:   /* hack to make paul happy */
168: #ifdef MPISRC
169:   MPI_Comm gs_comm;
170: #endif

172: } gs_id;


175: /* to be made public */
176: #if defined(not_used)
177: static int  gs_dump_ngh(gs_id *id, int loc_num, int *num, int *ngh_list);
178: static void gsi_via_int_list(gs_id *gs);
179: static int in_sub_tree(int *ptr3, int p_mask_size, int *buf2, int buf_size);
180: #endif

182: /* PRIVATE - and definitely not exported */
183: /*static void gs_print_template(register gs_id* gs, int who);*/
184: /*static void gs_print_stemplate(register gs_id* gs, int who);*/

186: static gs_id *gsi_check_args(int *elms, int nel, int level);
187: static void gsi_via_bit_mask(gs_id *gs);
188: static void get_ngh_buf(gs_id *gs);
189: static void set_pairwise(gs_id *gs);
190: static gs_id * gsi_new(void);
191: static void set_tree(gs_id *gs);

193: /* same for all but vector flavor */
194: static void gs_gop_local_out(gs_id *gs, REAL *vals);
195: /* vector flavor */
196: static void gs_gop_vec_local_out(gs_id *gs, REAL *vals, int step);

198: static void gs_gop_vec_plus(gs_id *gs, REAL *in_vals, int step);
199: static void gs_gop_vec_pairwise_plus(gs_id *gs, REAL *in_vals, int step);
200: static void gs_gop_vec_local_plus(gs_id *gs, REAL *vals, int step);
201: static void gs_gop_vec_local_in_plus(gs_id *gs, REAL *vals, int step);
202: static void gs_gop_vec_tree_plus(gs_id *gs, REAL *vals, int step);


205: static void gs_gop_plus(gs_id *gs, REAL *in_vals);
206: static void gs_gop_pairwise_plus(gs_id *gs, REAL *in_vals);
207: static void gs_gop_local_plus(gs_id *gs, REAL *vals);
208: static void gs_gop_local_in_plus(gs_id *gs, REAL *vals);
209: static void gs_gop_tree_plus(gs_id *gs, REAL *vals);

211: static void gs_gop_plus_hc(gs_id *gs, REAL *in_vals, int dim);
212: static void gs_gop_pairwise_plus_hc(gs_id *gs, REAL *in_vals, int dim);
213: static void gs_gop_tree_plus_hc(gs_id *gs, REAL *vals, int dim);

215: static void gs_gop_times(gs_id *gs, REAL *in_vals);
216: static void gs_gop_pairwise_times(gs_id *gs, REAL *in_vals);
217: static void gs_gop_local_times(gs_id *gs, REAL *vals);
218: static void gs_gop_local_in_times(gs_id *gs, REAL *vals);
219: static void gs_gop_tree_times(gs_id *gs, REAL *vals);

221: static void gs_gop_min(gs_id *gs, REAL *in_vals);
222: static void gs_gop_pairwise_min(gs_id *gs, REAL *in_vals);
223: static void gs_gop_local_min(gs_id *gs, REAL *vals);
224: static void gs_gop_local_in_min(gs_id *gs, REAL *vals);
225: static void gs_gop_tree_min(gs_id *gs, REAL *vals);

227: static void gs_gop_min_abs(gs_id *gs, REAL *in_vals);
228: static void gs_gop_pairwise_min_abs(gs_id *gs, REAL *in_vals);
229: static void gs_gop_local_min_abs(gs_id *gs, REAL *vals);
230: static void gs_gop_local_in_min_abs(gs_id *gs, REAL *vals);
231: static void gs_gop_tree_min_abs(gs_id *gs, REAL *vals);

233: static void gs_gop_max(gs_id *gs, REAL *in_vals);
234: static void gs_gop_pairwise_max(gs_id *gs, REAL *in_vals);
235: static void gs_gop_local_max(gs_id *gs, REAL *vals);
236: static void gs_gop_local_in_max(gs_id *gs, REAL *vals);
237: static void gs_gop_tree_max(gs_id *gs, REAL *vals);

239: static void gs_gop_max_abs(gs_id *gs, REAL *in_vals);
240: static void gs_gop_pairwise_max_abs(gs_id *gs, REAL *in_vals);
241: static void gs_gop_local_max_abs(gs_id *gs, REAL *vals);
242: static void gs_gop_local_in_max_abs(gs_id *gs, REAL *vals);
243: static void gs_gop_tree_max_abs(gs_id *gs, REAL *vals);

245: static void gs_gop_exists(gs_id *gs, REAL *in_vals);
246: static void gs_gop_pairwise_exists(gs_id *gs, REAL *in_vals);
247: static void gs_gop_local_exists(gs_id *gs, REAL *vals);
248: static void gs_gop_local_in_exists(gs_id *gs, REAL *vals);
249: static void gs_gop_tree_exists(gs_id *gs, REAL *vals);

251: static void gs_gop_pairwise_binary(gs_id *gs, REAL *in_vals, rbfp fct);
252: static void gs_gop_local_binary(gs_id *gs, REAL *vals, rbfp fct);
253: static void gs_gop_local_in_binary(gs_id *gs, REAL *vals, rbfp fct);
254: static void gs_gop_tree_binary(gs_id *gs, REAL *vals, rbfp fct);



258: /* global vars */
259: /* from comm.c module */

261: /* module state inf and fortran interface */
262: static int num_gs_ids = 0;

264: /* should make this dynamic ... later */
265: /*static queue_ADT elms_q, mask_q;*/
266: static int msg_buf=MAX_MSG_BUF;
267: /*static int msg_ch=FALSE;*/

269: static int vec_sz=GS_VEC_SZ;
270: /*static int vec_ch=FALSE; */

272: static int *tree_buf=NULL;
273: static int tree_buf_sz=0;
274: static int ntree=0;


277: /******************************************************************************
278: Function: gs_init_()

280: Input : 
281: Output: 
282: Return: 
283: Description:  
284: ******************************************************************************/
285: void gs_init_vec_sz(int size)
286: {
287:   /*  vec_ch = TRUE; */

289:   vec_sz = size;
290: }

292: /******************************************************************************
293: Function: gs_init_()

295: Input : 
296: Output: 
297: Return: 
298: Description:  
299: ******************************************************************************/
300: void gs_init_msg_buf_sz(int buf_size)
301: {
302:   /*  msg_ch = TRUE; */

304:   msg_buf = buf_size;
305: }

307: /******************************************************************************
308: Function: gs_init()

310: Input : 

312: Output: 

314: RETURN: 

316: Description:  
317: ******************************************************************************/
318: gs_id *
319: gs_init(register int *elms, int nel, int level)
320: {
321:   register gs_id *gs;
322: #ifdef INFO1
323:   int i;
324: #endif
325: #ifdef MPISRC
326:   MPI_Group gs_group;
327:   MPI_Comm  gs_comm;
328: #endif


331:   bss_init();
332:   perm_init();

334: #ifdef DEBUG  
335:   error_msg_warning("gs_init() start w/(%d,%d)\n",my_id,num_nodes);
336: #endif
337: 
338:   /* ensure that communication package has been initialized */
339:   comm_init();

341: #ifdef INFO1
342:   bss_stats();
343:   perm_stats();
344: #endif

346:   /* determines if we have enough dynamic/semi-static memory */
347:   /* checks input, allocs and sets gd_id template            */
348:   gs = gsi_check_args(elms,nel,level);

350:   /* only bit mask version up and working for the moment    */
351:   /* LATER :: get int list version working for sparse pblms */
352:   gsi_via_bit_mask(gs);

354: #ifdef INFO1
355:   /* print out P0's template as well as malloc stats */
356:   gs_print_template(gs,0);

358: #if   defined NXSRC
359:       gsync();
360: #elif defined MPISRC
361:       MPI_Barrier(MPI_COMM_WORLD);
362: #endif

364:   for (i=1;i<num_nodes;i++)
365:     {
366:       gs_print_stemplate(gs,i);
367: #if   defined NXSRC
368:       gsync();
369: #elif defined MPISRC
370:       MPI_Barrier(MPI_COMM_WORLD);
371: #endif
372:     }
373: #endif

375: #ifdef DEBUG  
376:   error_msg_warning("gs_init() end w/(%d,%d)\n",my_id,num_nodes);
377: #endif

379: #ifdef INFO
380:   bss_stats();
381:   perm_stats();
382: #endif

384: #ifdef MPISRC
385:   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
386:   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
387:   gs->gs_comm=gs_comm;
388: #endif

390:   return(gs);
391: }



395: /******************************************************************************
396: Function: gsi_new()

398: Input : 
399: Output: 
400: Return: 
401: Description: 

403: elm list must >= 0!!!
404: elm repeats allowed
405: ******************************************************************************/
406: static
407: gs_id *
408: gsi_new(void)
409: {
410:   int size=sizeof(gs_id);
411:   gs_id *gs;

413: 
414: #ifdef DEBUG
415:   error_msg_warning("gsi_new() :: size=%d\n",size);
416: #endif

418:   gs = (gs_id *) perm_malloc(size);
419: 
420:   if (!(size%REAL_LEN))
421:     {rvec_zero((REAL *)gs,size/REAL_LEN);}
422:   else if (!(size%INT_LEN))
423:     {ivec_zero((INT *)gs,size/INT_LEN);}
424:   else
425:     {memset((char *)gs,0,size/sizeof(char));}

427:   return(gs);
428: }



432: /******************************************************************************
433: Function: gsi_check_args()

435: Input : 
436: Output: 
437: Return: 
438: Description: 

440: elm list must >= 0!!!
441: elm repeats allowed
442: local working copy of elms is sorted
443: ******************************************************************************/
444: static
445: gs_id *
446: gsi_check_args(int *in_elms, int nel, int level)
447: {
448:   register int i, j, k, t2;
449:   int *companion, *elms, *unique, *iptr;
450:   int num_local=0, *num_to_reduce, **local_reduce;
451:   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
452:   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
453:   int work[sizeof(oprs)/sizeof(oprs[0])-1];
454:   gs_id *gs;


457: #ifdef DEBUG  
458:   error_msg_warning("gs_check_args() begin w/(%d,%d)\n",my_id,num_nodes);
459: #endif

461: #ifdef SAFE
462:   if (!in_elms)
463:     {error_msg_fatal("elms point to nothing!!!\n");}

465:   if (nel<0)
466:     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}

468:   if (nel==0)
469:     {error_msg_warning("I don't have any elements!!!\n");}
470: #endif

472:   /* get space for gs template */
473:   gs = gsi_new();
474:   gs->id = ++num_gs_ids;

476:   /* hmt 6.4.99                                            */
477:   /* caller can set global ids that don't participate to 0 */
478:   /* gs_init ignores all zeros in elm list                 */
479:   /* negative global ids are still invalid                 */
480:   for (i=j=0;i<nel;i++)
481:     {if (in_elms[i]!=0) {j++;}}

483:   k=nel; nel=j;

485:   /* copy over in_elms list and create inverse map */
486:   elms = (int *) bss_malloc((nel+1)*INT_LEN);
487:   companion = (int *) bss_malloc(nel*INT_LEN);
488:   /* ivec_c_index(companion,nel); */
489:   /* ivec_copy(elms,in_elms,nel); */
490:   for (i=j=0;i<k;i++)
491:     {
492:       if (in_elms[i]!=0)
493:         {elms[j] = in_elms[i]; companion[j++] = i;}
494:     }

496:   if (j!=nel)
497:     {error_msg_fatal("nel j mismatch!\n");}

499: #ifdef SAFE
500:   /* pre-pass ... check to see if sorted */
501:   elms[nel] = INT_MAX;
502:   iptr = elms;
503:   unique = elms+1;
504:   j=0;
505:   while (*iptr!=INT_MAX)
506:     {
507:       if (*iptr++>*unique++)
508:         {j=1; break;}
509:     }

511:   /* set up inverse map */
512:   if (j)
513:     {
514:       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
515:       SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER);
516:     }
517:   else
518:     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
519: #else
520:   SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER);
521: #endif
522:   elms[nel] = INT_MIN;

524:   /* first pass */
525:   /* determine number of unique elements, check pd */
526:   for (i=k=0;i<nel;i+=j)
527:     {
528:       t2 = elms[i];
529:       j=++i;
530: 
531:       /* clump 'em for now */
532:       while (elms[j]==t2) {j++;}
533: 
534:       /* how many together and num local */
535:       if (j-=i)
536:         {num_local++; k+=j;}
537:     }

539:   /* how many unique elements? */
540:   gs->repeats=k;
541:   gs->nel = nel-k;


544:   /* number of repeats? */
545:   gs->num_local = num_local;
546:   num_local+=2;
547:   gs->local_reduce=local_reduce=(int **)perm_malloc(num_local*INT_PTR_LEN);
548:   gs->num_local_reduce=num_to_reduce=(int *) perm_malloc(num_local*INT_LEN);

550:   unique = (int *) bss_malloc((gs->nel+1)*INT_LEN);
551:   gs->elms = unique;
552:   gs->nel_total = nel;
553:   gs->local_elms = elms;
554:   gs->companion = companion;

556:   /* compess map as well as keep track of local ops */
557:   for (num_local=i=j=0;i<gs->nel;i++)
558:     {
559:       k=j;
560:       t2 = unique[i] = elms[j];
561:       companion[i] = companion[j];
562: 
563:       while (elms[j]==t2) {j++;}

565:       if ((t2=(j-k))>1)
566:         {
567:           /* number together */
568:           num_to_reduce[num_local] = t2++;
569:           iptr = local_reduce[num_local++] = (int *)perm_malloc(t2*INT_LEN);

571:           /* to use binary searching don't remap until we check intersection */
572:           *iptr++ = i;
573: 
574:           /* note that we're skipping the first one */
575:           while (++k<j)
576:             {*(iptr++) = companion[k];}
577:           *iptr = -1;
578:         }
579:     }

581:   /* sentinel for ngh_buf */
582:   unique[gs->nel]=INT_MAX;

584: #ifdef DEBUG
585:   if (num_local!=gs->num_local)
586:     {error_msg_fatal("compression of maps wrong!!!\n");}
587: #endif

589:   /* for two partition sort hack */
590:   num_to_reduce[num_local] = 0;
591:   local_reduce[num_local] = NULL;
592:   num_to_reduce[++num_local] = 0;
593:   local_reduce[num_local] = NULL;

595:   /* load 'em up */
596:   /* note one extra to hold NON_UNIFORM flag!!! */
597:   vals[2] = vals[1] = vals[0] = nel;
598:   if (gs->nel>0)
599:     {
600:        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
601:        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
602:     }
603:   else
604:     {
605:        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
606:        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
607:     }
608:   vals[5] = level;
609:   vals[6] = num_gs_ids;

611:   /* GLOBAL: send 'em out */
612:   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);

614:   /* must be semi-pos def - only pairwise depends on this */
615:   /* LATER - remove this restriction */
616:   if (vals[3]<0)
617:     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}

619:   if (vals[4]==INT_MAX)
620:     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}

622: #ifdef DEBUG
623:   /* check gs template count */
624:   if (vals[6] != num_gs_ids)
625:     {error_msg_fatal("num_gs_ids mismatch!!!");}

627:   /* check all have same level threshold */
628:   if (level != vals[5])
629:     {error_msg_fatal("gsi_check_args() :: level not uniform across nodes!!!\n");}
630: #endif

632:   gs->nel_min = vals[0];
633:   gs->nel_max = vals[1];
634:   gs->nel_sum = vals[2];
635:   gs->gl_min  = vals[3];
636:   gs->gl_max  = vals[4];
637:   gs->negl    = vals[4]-vals[3]+1;

639: #ifdef DEBUG
640:       printf("nel(unique)=%d\n", gs->nel);
641:       printf("nel_max=%d\n",     gs->nel_max);
642:       printf("nel_min=%d\n",     gs->nel_min);
643:       printf("nel_sum=%d\n",     gs->nel_sum);
644:       printf("negl=%d\n",        gs->negl);
645:       printf("gl_max=%d\n",      gs->gl_max);
646:       printf("gl_min=%d\n",      gs->gl_min);
647:       printf("elms ordered=%d\n",gs->ordered);
648:       printf("repeats=%d\n",     gs->repeats);
649:       printf("positive=%d\n",    gs->positive);
650:       printf("level=%d\n",       gs->level);
651: #endif

653:   if (gs->negl<=0)
654:     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
655: 
656:   /* LATER :: add level == -1 -> program selects level */
657:   if (vals[5]<0)
658:     {vals[5]=0;}
659:   else if (vals[5]>num_nodes)
660:     {vals[5]=num_nodes;}
661:   gs->level = vals[5];

663: #ifdef DEBUG  
664:   error_msg_warning("gs_check_args() :: end w/(%d,%d)+level=%d\n",
665:                     my_id,num_nodes,vals[5]);
666: #endif

668:   return(gs);
669: }


672: /******************************************************************************
673: Function: gsi_via_bit_mask()

675: Input : 
676: Output: 
677: Return: 
678: Description: 


681: ******************************************************************************/
682: static
683: void
684: gsi_via_bit_mask(gs_id *gs)
685: {
686:   register int i, nel, *elms;
687:   int t1;
688:   int **reduce;
689:   int *map;

691: #ifdef DEBUG  
692:   error_msg_warning("gsi_via_bit_mask() begin w/%d :: %d\n",my_id,num_nodes);
693: #endif

695:   /* totally local removes ... ct_bits == 0 */
696:   get_ngh_buf(gs);

698:   if (gs->level)
699:     {set_pairwise(gs);}

701:   if (gs->max_left_over)
702:     {set_tree(gs);}

704:   /* intersection local and pairwise/tree? */
705:   gs->num_local_total = gs->num_local;
706:   gs->gop_local_reduce = gs->local_reduce;
707:   gs->num_gop_local_reduce = gs->num_local_reduce;

709:   map = gs->companion;

711:   /* is there any local compression */
712:   if (gs->num_local == 0)
713:     {
714:       gs->local_strength = NONE;
715:       gs->num_local_gop = 0;
716:     }
717:   else
718:     {
719:       /* ok find intersection */
720:       map = gs->companion;
721:       reduce = gs->local_reduce;
722:       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
723:         {
724:           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
725:               ||
726:               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
727:             {
728:               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
729:               t1++;
730:               if (gs->num_local_reduce[i]<=0)
731:                 {error_msg_fatal("nobody in list?");}
732:               gs->num_local_reduce[i] *= -1;
733:             }
734:            **reduce=map[**reduce];
735:         }

737:       /* intersection is empty */
738:       if (!t1)
739:         {
740: #ifdef DEBUG          
741:           error_msg_warning("gsi_check_args() :: local gs_gop w/o intersection!");
742: #endif
743:           gs->local_strength = FULL;
744:           gs->num_local_gop = 0;
745:         }
746:       /* intersection not empty */
747:       else
748:         {
749: #ifdef DEBUG          
750:           error_msg_warning("gsi_check_args() :: local gs_gop w/intersection!");
751: #endif
752:           gs->local_strength = PARTIAL;
753:           SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce,
754:                    gs->num_local + 1, SORT_INT_PTR);

756:           gs->num_local_gop = t1;
757:           gs->num_local_total =  gs->num_local;
758:           gs->num_local    -= t1;
759:           gs->gop_local_reduce = gs->local_reduce;
760:           gs->num_gop_local_reduce = gs->num_local_reduce;

762:           for (i=0; i<t1; i++)
763:             {
764:               if (gs->num_gop_local_reduce[i]>=0)
765:                 {error_msg_fatal("they aren't negative?");}
766:               gs->num_gop_local_reduce[i] *= -1;
767:               gs->local_reduce++;
768:               gs->num_local_reduce++;
769:             }
770:           gs->local_reduce++;
771:           gs->num_local_reduce++;
772:         }
773:     }

775:   elms = gs->pw_elm_list;
776:   nel  = gs->len_pw_list;
777:   for (i=0; i<nel; i++)
778:     {elms[i] = map[elms[i]];}

780:   elms = gs->tree_map_in;
781:   nel  = gs->tree_map_sz;
782:   for (i=0; i<nel; i++)
783:     {elms[i] = map[elms[i]];}

785:   /* clean up */
786:   bss_free((void *) gs->local_elms);
787:   bss_free((void *) gs->companion);
788:   bss_free((void *) gs->elms);
789:   bss_free((void *) gs->ngh_buf);
790:   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;

792: #ifdef DEBUG  
793:   error_msg_warning("gsi_via_bit_mask() end w/%d :: %d\n",my_id,num_nodes);
794: #endif
795: }



799: /******************************************************************************
800: Function: place_in_tree()

802: Input : 
803: Output: 
804: Return: 
805: Description: 


808: ******************************************************************************/
809: static
810: void
811: place_in_tree(register int elm)
812: {
813:   register int *tp, n;


816:   if (ntree==tree_buf_sz)
817:     {
818:       if (tree_buf_sz)
819:         {
820:           tp = tree_buf;
821:           n = tree_buf_sz;
822:           tree_buf_sz<<=1;
823:           tree_buf = (int *)bss_malloc(tree_buf_sz*INT_LEN);
824:           ivec_copy(tree_buf,tp,n);
825:           bss_free(tp);
826:         }
827:       else
828:         {
829:           tree_buf_sz = TREE_BUF_SZ;
830:           tree_buf = (int *)bss_malloc(tree_buf_sz*INT_LEN);
831:         }
832:     }

834:   tree_buf[ntree++] = elm;
835: }



839: /******************************************************************************
840: Function: get_ngh_buf()

842: Input : 
843: Output: 
844: Return: 
845: Description: 


848: ******************************************************************************/
849: static
850: void
851: get_ngh_buf(gs_id *gs)
852: {
853:   register int i, j, npw=0, ntree_map=0;
854:   int p_mask_size, ngh_buf_size, buf_size;
855:   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
856:   int *ngh_buf, *buf1, *buf2;
857:   int offset, per_load, num_loads, or_ct, start, end;
858:   int *ptr1, *ptr2, i_start, negl, nel, *elms;
859:   int oper=GL_B_OR;
860:   int *ptr3, *t_mask, level, ct1, ct2;

862: #ifdef DEBUG  
863:   error_msg_warning("get_ngh_buf() begin w/%d :: %d\n",my_id,num_nodes);
864: #endif

866:   /* to make life easier */
867:   nel   = gs->nel;
868:   elms  = gs->elms;
869:   level = gs->level;
870: 
871:   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
872:   p_mask = (int *) bss_malloc(p_mask_size=len_bit_mask(num_nodes));
873:   set_bit_mask(p_mask,p_mask_size,my_id);

875:   /* allocate space for masks and info bufs */
876:   gs->nghs = sh_proc_mask = (int *) bss_malloc(p_mask_size);
877:   gs->pw_nghs = pw_sh_proc_mask = (int *) perm_malloc(p_mask_size);
878:   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
879:   t_mask = (int *) bss_malloc(p_mask_size);
880:   gs->ngh_buf = ngh_buf = (int *) bss_malloc(ngh_buf_size);

882:   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
883:   /* had thought I could exploit rendezvous threshold */

885:   /* default is one pass */
886:   per_load = negl  = gs->negl;
887:   gs->num_loads = num_loads = 1;
888:   i=p_mask_size*negl;

890:   /* possible overflow on buffer size */
891:   /* overflow hack                    */
892:   if (i<0) {i=INT_MAX;}

894:   buf_size = MIN(msg_buf,i);

896:   /* can we do it? */
897:   if (p_mask_size>buf_size)
898:     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}

900:   /* get giop buf space ... make *only* one malloc */
901:   buf1 = (int *) bss_malloc(buf_size<<1);

903:   /* more than one gior exchange needed? */
904:   if (buf_size!=i)
905:     {
906:       per_load = buf_size/p_mask_size;
907:       buf_size = per_load*p_mask_size;
908:       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
909:     }

911: #ifdef DEBUG  
912:   /* dump some basic info */
913:   error_msg_warning("n_lds=%d,pms=%d,buf_sz=%d\n",num_loads,p_mask_size,buf_size);
914: #endif

916:   /* convert buf sizes from #bytes to #ints - 32 bit only! */
917: #ifdef SAFE  
918:   p_mask_size/=INT_LEN; ngh_buf_size/=INT_LEN; buf_size/=INT_LEN;
919: #else  
920:   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
921: #endif
922: 
923:   /* find giop work space */
924:   buf2 = buf1+buf_size;

926:   /* hold #ints needed for processor masks */
927:   gs->mask_sz=p_mask_size;

929:   /* init buffers */
930:   ivec_zero(sh_proc_mask,p_mask_size);
931:   ivec_zero(pw_sh_proc_mask,p_mask_size);
932:   ivec_zero(ngh_buf,ngh_buf_size);

934:   /* HACK reset tree info */
935:   tree_buf=NULL;
936:   tree_buf_sz=ntree=0;

938:   /* queue the tree elements for now */
939:   /* elms_q = new_queue(); */
940: 
941:   /* can also queue tree info for pruned or forest implememtation */
942:   /*  mask_q = new_queue(); */

944:   /* ok do it */
945:   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
946:     {
947:       /* identity for bitwise or is 000...000 */
948:       ivec_zero(buf1,buf_size);

950:       /* load msg buffer */
951:       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
952:         {
953:           offset = (offset-start)*p_mask_size;
954:           ivec_copy(buf1+offset,p_mask,p_mask_size);
955:         }

957:       /* GLOBAL: pass buffer */
958: #if   defined NXSRC
959:       gior(buf1,buf_size,buf2);
960: #elif defined MPISRC
961:       giop(buf1,buf2,buf_size,&oper);
962: #endif


965:       /* unload buffer into ngh_buf */
966:       ptr2=(elms+i_start);
967:       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
968:         {
969:           /* I own it ... may have to pairwise it */
970:           if (j==*ptr2)
971:             {
972:               /* do i share it w/anyone? */
973: #ifdef SAFE
974:               ct1 = ct_bits((char *)ptr3,p_mask_size*INT_LEN);
975: #else
976:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
977: #endif
978:               /* guess not */
979:               if (ct1<2)
980:                 {ptr2++; ptr1+=p_mask_size; continue;}

982:               /* i do ... so keep info and turn off my bit */
983:               ivec_copy(ptr1,ptr3,p_mask_size);
984:               ivec_xor(ptr1,p_mask,p_mask_size);
985:               ivec_or(sh_proc_mask,ptr1,p_mask_size);
986: 
987:               /* is it to be done pairwise? */
988:               if (--ct1<=level)
989:                 {
990:                   npw++;
991: 
992:                   /* turn on high bit to indicate pw need to process */
993:                   *ptr2++ |= TOP_BIT;
994:                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
995:                   ptr1+=p_mask_size;
996:                   continue;
997:                 }

999:               /* get set for next and note that I have a tree contribution */
1000:               /* could save exact elm index for tree here -> save a search */
1001:               ptr2++; ptr1+=p_mask_size; ntree_map++;
1002:             }
1003:           /* i don't but still might be involved in tree */
1004:           else
1005:             {

1007:               /* shared by how many? */
1008: #ifdef SAFE
1009:               ct1 = ct_bits((char *)ptr3,p_mask_size*INT_LEN);
1010: #else
1011:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
1012: #endif

1014:               /* none! */
1015:               if (ct1<2)
1016:                 {continue;}

1018:               /* is it going to be done pairwise? but not by me of course!*/
1019:               if (--ct1<=level)
1020:                 {continue;}
1021:             }
1022:           /* LATER we're going to have to process it NOW */
1023:           /* nope ... tree it */
1024:           place_in_tree(j);
1025:         }
1026:     }

1028:   bss_free((void *)t_mask);
1029:   bss_free((void *)buf1);

1031:   gs->len_pw_list=npw;
1032:   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*INT_LEN);

1034:   /* expand from bit mask list to int list and save ngh list */
1035:   gs->nghs = (int *) perm_malloc(gs->num_nghs * INT_LEN);
1036:   bm_to_proc((char *)sh_proc_mask,p_mask_size*INT_LEN,gs->nghs);

1038:   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*INT_LEN);

1040:   oper = GL_MAX;
1041:   ct1 = gs->num_nghs;
1042:   giop(&ct1,&ct2,1,&oper);
1043:   gs->max_nghs = ct1;

1045:   gs->tree_map_sz  = ntree_map;
1046:   gs->max_left_over=ntree;

1048:   bss_free((void *)p_mask);
1049:   bss_free((void *)sh_proc_mask);

1051: #ifdef DEBUG  
1052:   error_msg_warning("get_ngh_buf() end w/%d :: %d\n",my_id,num_nodes);
1053: #endif
1054: }





1060: /******************************************************************************
1061: Function: pairwise_init()

1063: Input : 
1064: Output: 
1065: Return: 
1066: Description: 

1068: if an element is shared by fewer that level# of nodes do pairwise exch 
1069: ******************************************************************************/
1070: static
1071: void
1072: set_pairwise(gs_id *gs)
1073: {
1074:   register int i, j;
1075:   int p_mask_size;
1076:   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
1077:   int *ngh_buf, *buf2;
1078:   int offset;
1079:   int *msg_list, *msg_size, **msg_nodes, nprs;
1080:   int *pairwise_elm_list, len_pair_list=0;
1081:   int *iptr, t1, i_start, nel, *elms;
1082:   int ct;


1085: #ifdef DEBUG  
1086:   error_msg_warning("set_pairwise() begin w/%d :: %d\n",my_id,num_nodes);
1087: #endif

1089:   /* to make life easier */
1090:   nel  = gs->nel;
1091:   elms = gs->elms;
1092:   ngh_buf = gs->ngh_buf;
1093:   sh_proc_mask  = gs->pw_nghs;

1095:   /* need a few temp masks */
1096:   p_mask_size   = len_bit_mask(num_nodes);
1097:   p_mask        = (int *) bss_malloc(p_mask_size);
1098:   tmp_proc_mask = (int *) bss_malloc(p_mask_size);

1100:   /* set mask to my my_id's bit mask */
1101:   set_bit_mask(p_mask,p_mask_size,my_id);

1103: #ifdef SAFE
1104:   p_mask_size /= INT_LEN;
1105: #else
1106:   p_mask_size >>= 2;
1107: #endif
1108: 
1109:   len_pair_list=gs->len_pw_list;
1110:   gs->pw_elm_list=pairwise_elm_list=(int*)perm_malloc((len_pair_list+1)*INT_LEN);

1112:   /* how many processors (nghs) do we have to exchange with? */
1113:   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*INT_LEN);


1116:   /* allocate space for gs_gop() info */
1117:   gs->pair_list = msg_list = (int *)  perm_malloc(INT_LEN*nprs);
1118:   gs->msg_sizes = msg_size  = (int *)  perm_malloc(INT_LEN*nprs);
1119:   gs->node_list = msg_nodes = (int **) perm_malloc(INT_PTR_LEN*(nprs+1));

1121:   /* init msg_size list */
1122:   ivec_zero(msg_size,nprs);

1124:   /* expand from bit mask list to int list */
1125:   bm_to_proc((char *)sh_proc_mask,p_mask_size*INT_LEN,msg_list);
1126: 
1127:   /* keep list of elements being handled pairwise */
1128:   for (i=j=0;i<nel;i++)
1129:     {
1130:       if (elms[i] & TOP_BIT)
1131:         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
1132:     }
1133:   pairwise_elm_list[j] = -1;

1135: #ifdef DEBUG
1136:   if (j!=len_pair_list)
1137:     {error_msg_fatal("oops ... bad paiwise list in set_pairwise!");}
1138: #endif

1140: #if defined MPISRC  
1141:   gs->msg_ids_out = (MPI_Request *)  perm_malloc(sizeof(MPI_Request)*(nprs+1));
1142:   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
1143:   gs->msg_ids_in = (MPI_Request *)  perm_malloc(sizeof(MPI_Request)*(nprs+1));
1144:   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
1145:   gs->pw_vals = (REAL *) perm_malloc(REAL_LEN*len_pair_list*vec_sz);
1146: #else
1147:   gs->msg_ids_out = (int *)  perm_malloc(INT_LEN*(nprs+1));
1148:   ivec_zero(gs->msg_ids_out,nprs);
1149:   gs->msg_ids_out[nprs] = -1;
1150:   gs->msg_ids_in = (int *)  perm_malloc(INT_LEN*(nprs+1));
1151:   ivec_zero(gs->msg_ids_in,nprs);
1152:   gs->msg_ids_in[nprs] = -1;
1153:   gs->pw_vals = (REAL *) perm_malloc(REAL_LEN*len_pair_list*vec_sz);
1154: #endif

1156:   /* find who goes to each processor */
1157:   for (i_start=i=0;i<nprs;i++)
1158:     {
1159:       /* processor i's mask */
1160:       set_bit_mask(p_mask,p_mask_size*INT_LEN,msg_list[i]);

1162:       /* det # going to processor i */
1163:       for (ct=j=0;j<len_pair_list;j++)
1164:         {
1165:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
1166:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
1167:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*INT_LEN))
1168:             {ct++;}
1169:         }
1170:       msg_size[i] = ct;
1171:       i_start = MAX(i_start,ct);

1173:       /*space to hold nodes in message to first neighbor */
1174:       msg_nodes[i] = iptr = (int *) perm_malloc(INT_LEN*(ct+1));

1176:       for (j=0;j<len_pair_list;j++)
1177:         {
1178:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
1179:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
1180:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*INT_LEN))
1181:             {*iptr++ = j;}
1182:         }
1183:       *iptr = -1;
1184:     }
1185:   msg_nodes[nprs] = NULL;

1187: #ifdef INFO1
1188:   t1 = GL_MAX;
1189:   giop(&i_start,&offset,1,&t1);
1190:   gs->max_pairs = i_start;
1191: #else
1192:   j=gs->loc_node_pairs=i_start;
1193:   t1 = GL_MAX;
1194:   giop(&i_start,&offset,1,&t1);
1195:   gs->max_node_pairs = i_start;

1197:   i_start=j;
1198:   t1 = GL_MIN;
1199:   giop(&i_start,&offset,1,&t1);
1200:   gs->min_node_pairs = i_start;

1202:   i_start=j;
1203:   t1 = GL_ADD;
1204:   giop(&i_start,&offset,1,&t1);
1205:   gs->avg_node_pairs = i_start/num_nodes + 1;

1207:   i_start=nprs;
1208:   t1 = GL_MAX;
1209:   giop(&i_start,&offset,1,&t1);
1210:   gs->max_pairs = i_start;

1212:   /*gs->max_pairs = -1;*/
1213: #endif

1215:   /* remap pairwise in tail of gsi_via_bit_mask() */
1216:   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1217:   gs->out = (REAL *) perm_malloc(REAL_LEN*gs->msg_total*vec_sz);
1218:   gs->in  = (REAL *) perm_malloc(REAL_LEN*gs->msg_total*vec_sz);

1220:   /* reset malloc pool */
1221:   bss_free((void *)p_mask);
1222:   bss_free((void *)tmp_proc_mask);

1224: #ifdef DEBUG  
1225:   error_msg_warning("set_pairwise() end w/%d :: %d\n",my_id,num_nodes);
1226: #endif
1227: }



1231: /******************************************************************************
1232: Function: set_tree()

1234: Input : 
1235: Output: 
1236: Return: 
1237: Description: 

1239: to do pruned tree just save ngh buf copy for each one and decode here!
1240: ******************************************************************************/
1241: static
1242: void
1243: set_tree(gs_id *gs)
1244: {
1245:   register int i, j, n, nel;
1246:   register int *iptr_in, *iptr_out, *tree_elms, *elms;


1249: #ifdef DEBUG
1250:   error_msg_warning("set_tree() :: begin\n");
1251: #endif

1253:   /* local work ptrs */
1254:   elms = gs->elms;
1255:   nel     = gs->nel;

1257:   /* how many via tree */
1258:   gs->tree_nel  = n = ntree;
1259:   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1260:   gs->tree_buf  = (REAL *) bss_malloc(REAL_LEN*n*vec_sz);
1261:   gs->tree_work = (REAL *) bss_malloc(REAL_LEN*n*vec_sz);
1262:   j=gs->tree_map_sz;
1263:   gs->tree_map_in = iptr_in  = (int *) bss_malloc(INT_LEN*(j+1));
1264:   gs->tree_map_out = iptr_out = (int *) bss_malloc(INT_LEN*(j+1));

1266: #ifdef DEBUG  
1267:   error_msg_warning("num on tree=%d,%d",gs->max_left_over,gs->tree_nel);
1268: #endif

1270:   /* search the longer of the two lists */
1271:   /* note ... could save this info in get_ngh_buf and save searches */
1272:   if (n<=nel)
1273:     {
1274:       /* bijective fct w/remap - search elm list */
1275:       for (i=0; i<n; i++)
1276:         {
1277:           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1278:             {*iptr_in++ = j; *iptr_out++ = i;}
1279:         }
1280:     }
1281:   else
1282:     {
1283:       for (i=0; i<nel; i++)
1284:         {
1285:           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1286:             {*iptr_in++ = i; *iptr_out++ = j;}
1287:         }
1288:     }

1290:   /* sentinel */
1291:   *iptr_in = *iptr_out = -1;

1293: #ifdef DEBUG
1294:   error_msg_warning("set_tree() :: end\n");
1295: #endif
1296: }


1299: /******************************************************************************
1300: Function: gsi_via_int_list()

1302: Input : 
1303: Output: 
1304: Return: 
1305: Description: 
1306: ******************************************************************************/
1307: /*static
1308: void
1309: gsi_via_int_list(gs_id *gs)
1310: {

1312:    LATER: for P large the bit masks -> too many passes
1313:    LATER: strategy: do gsum w/1 in position i in negl if owner
1314:    LATER: then sum of entire vector 1 ... negl determines min buf len
1315:    LATER: So choose min from this or mask method
1316: }*/


1319: #if defined(not_used)
1320: static
1321: int
1322: root_sub_tree(int *proc_list, int num)
1323: {
1324:   register int i, j, p_or, p_and;
1325:   register int root, mask;


1328:   /* ceiling(log2(num_nodes)) - 1 */
1329:   j = i_log2_num_nodes;
1330:   if (num_nodes==floor_num_nodes)
1331:     {j--;}

1333:   /* set mask to msb */
1334:   for(mask=1,i=0; i<j; i++)
1335:     {mask<<=1;}

1337:   p_or  = ivec_reduce_or(proc_list,num);
1338:   p_and = ivec_reduce_and(proc_list,num);
1339:   for(root=i=0; i<j; i++,mask>>=1)
1340:     {
1341:       /* (msb-i)'th bits on ==> root in right 1/2 tree */
1342:       if (mask & p_and)
1343:         {root |= mask;}

1345:       /* (msb-i)'th bits differ ==> root found */
1346:       else if (mask & p_or)
1347:         {break;}

1349:       /* (msb-i)'th bits off ==>root in left 1/2 tree */
1350:     }

1352: #ifdef DEBUG
1353:   if ((root<0) || (root>num_nodes))
1354:     {error_msg_fatal("root_sub_tree() :: bad root!");}

1356:   if (!my_id)
1357:     {
1358:       printf("num_nodes=%d, j=%d, root=%d\n",num_nodes,j,root);
1359:       printf("procs: ");
1360:       for(i=0;i<num;i++)
1361:         {printf("%d ",proc_list[i]);}
1362:       printf("\n");
1363:     }
1364: #endif

1366:   return(root);
1367: }
1368: #endif


1371: #if defined(not_used)
1372: static int
1373: in_sub_tree(int *mask, int mask_size, int *work, int nw)
1374: {
1375:   int ct, nb;
1376: 
1377:   /* mask size in bytes */
1378:   nb = mask_size<<2;
1379: 
1380:   /* shared amoungst how many? */
1381:   ct = ct_bits((char *)mask,nb);

1383:   /* enough space? */
1384:   if (nw<ct)
1385:     {error_msg_fatal("in_sub_tree() :: not enough space to expand bit mask!");}

1387:   /* expand */
1388:   bm_to_proc((char *)mask,nb,work);
1389: 
1390:   /* find tree root */
1391:   root_sub_tree(work,ct);

1393:   /* am i in any of the paths? */

1395:   return(TRUE);

1397:   /*
1398:   sh_mask = (int *)bss_malloc(nb);
1399:   bss_free(sh_mask);
1400:   */
1401: }
1402: #endif


1405: /******************************************************************************
1406: Function: gather_scatter

1408: Input : 
1409: Output: 
1410: Return: 
1411: Description: 
1412: ******************************************************************************/
1413: static
1414: void
1415: gs_gop_local_out(register gs_id *gs, register REAL *vals)
1416: {
1417:   register int *num, *map, **reduce;
1418:   register REAL tmp;


1421: #ifdef DEBUG
1422:   error_msg_warning("start gs_gop_xxx()\n");
1423: #endif

1425:   num    = gs->num_gop_local_reduce;
1426:   reduce = gs->gop_local_reduce;
1427:   while ((map = *reduce++))
1428:     {
1429:       /* wall */
1430:       if (*num == 2)
1431:         {
1432:           num ++;
1433:           vals[map[1]] = vals[map[0]];
1434:         }
1435:       /* corner shared by three elements */
1436:       else if (*num == 3)
1437:         {
1438:           num ++;
1439:           vals[map[2]] = vals[map[1]] = vals[map[0]];
1440:         }
1441:       /* corner shared by four elements */
1442:       else if (*num == 4)
1443:         {
1444:           num ++;
1445:           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1446:         }
1447:       /* general case ... odd geoms ... 3D*/
1448:       else
1449:         {
1450:           num++;
1451:           tmp = *(vals + *map++);
1452:           while (*map >= 0)
1453:             {*(vals + *map++) = tmp;}
1454:         }
1455:     }
1456: }



1460: /******************************************************************************
1461: Function: gather_scatter

1463: Input : 
1464: Output: 
1465: Return: 
1466: Description: 
1467: ******************************************************************************/
1468: void
1469: gs_gop_binary(gs_ADT gs, REAL *vals, rbfp fct)
1470: {
1471: #ifdef DEBUG  
1472:   if (!gs)  {error_msg_fatal("gs_gop() :: passed NULL gs handle!!!");}
1473:   if (!fct) {error_msg_fatal("gs_gop() :: passed NULL bin fct handle!!!");}
1474:   error_msg_warning("start gs_gop_xxx()\n");
1475: #endif

1477:   /* local only operations!!! */
1478:   if (gs->num_local)
1479:     {gs_gop_local_binary(gs,vals,fct);}
1480: 
1481:   /* if intersection tree/pairwise and local isn't empty */
1482:   if (gs->num_local_gop)
1483:     {
1484:       gs_gop_local_in_binary(gs,vals,fct);
1485: 
1486:       /* pairwise */
1487:       if (gs->num_pairs)
1488:         {gs_gop_pairwise_binary(gs,vals,fct);}
1489: 
1490:       /* tree */
1491:       else if (gs->max_left_over)
1492:         {gs_gop_tree_binary(gs,vals,fct);}
1493: 
1494:       gs_gop_local_out(gs,vals);
1495:     }
1496:   /* if intersection tree/pairwise and local is empty */
1497:   else
1498:     {
1499:       /* pairwise */
1500:       if (gs->num_pairs)
1501:         {gs_gop_pairwise_binary(gs,vals,fct);}
1502: 
1503:       /* tree */
1504:       else if (gs->max_left_over)
1505:         {gs_gop_tree_binary(gs,vals,fct);}
1506:     }
1507: }



1511: /******************************************************************************
1512: Function: gather_scatter

1514: Input : 
1515: Output: 
1516: Return: 
1517: Description: 
1518: ******************************************************************************/
1519: static
1520: void
1521: gs_gop_local_binary(register gs_id *gs, register REAL *vals, register rbfp fct)
1522: {
1523:   register int *num, *map, **reduce;
1524:   REAL tmp;


1527: #ifdef DEBUG
1528:   error_msg_warning("start gs_gop_xxx()\n");
1529: #endif
1530:   num    = gs->num_local_reduce;
1531:   reduce = gs->local_reduce;
1532:   while ((map = *reduce))
1533:     {
1534:       num ++;
1535:       (*fct)(&tmp,NULL,1);
1536:       /* tmp = 0.0; */
1537:       while (*map >= 0)
1538:         {(*fct)(&tmp,(vals + *map),1); map++;}
1539:         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1540: 
1541:       map = *reduce++;
1542:       while (*map >= 0)
1543:         {*(vals + *map++) = tmp;}
1544:     }
1545: }



1549: /******************************************************************************
1550: Function: gather_scatter

1552: Input : 
1553: Output: 
1554: Return: 
1555: Description: 
1556: ******************************************************************************/
1557: static
1558: void
1559: gs_gop_local_in_binary(register gs_id *gs, register REAL *vals, register rbfp fct)
1560: {
1561:   register int *num, *map, **reduce;
1562:   register REAL *base;


1565: #ifdef DEBUG
1566:   error_msg_warning("start gs_gop_xxx()\n");
1567: #endif

1569:   num    = gs->num_gop_local_reduce;

1571:   reduce = gs->gop_local_reduce;
1572:   while ((map = *reduce++))
1573:     {
1574:       num++;
1575:       base = vals + *map++;
1576:       while (*map >= 0)
1577:         {(*fct)(base,(vals + *map),1); map++;}
1578:         /*        {*base = (*fct)(*base,*(vals + *map)); map++;} */
1579:     }
1580: }



1584: /******************************************************************************
1585: Function: gather_scatter

1587: VERSION 3 :: 

1589: Input : 
1590: Output: 
1591: Return: 
1592: Description: 
1593: ******************************************************************************/
1594: static
1595: void
1596: gs_gop_pairwise_binary(register gs_id *gs, register REAL *in_vals,
1597:                        register rbfp fct)
1598: {
1599: #if   defined NXSRC
1600:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
1601:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
1602:   register int *pw, *list, *size, **nodes;
1603:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;

1605: #ifdef DEBUG
1606:   error_msg_warning("start gs_gop_xxx()\n");
1607: #endif

1609:   /* strip and load registers */
1610:   msg_list =list         = gs->pair_list;
1611:   msg_size =size         = gs->msg_sizes;
1612:   msg_nodes=nodes        = gs->node_list;
1613:   iptr=pw                = gs->pw_elm_list;
1614:   dptr1=dptr3            = gs->pw_vals;
1615:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1616:   msg_ids_out = ids_out  = gs->msg_ids_out;
1617:   dptr2                  = gs->out;
1618:   in1=in2                = gs->in;

1620:   /* post the receives */
1621:   do
1622:     {
1623:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
1624:       in1 += *size++;
1625:     }
1626:   while (*msg_ids_in >= 0);

1628:   /* load gs values into in out gs buffers */
1629:   while (*iptr >= 0)
1630:     {*dptr3++ = *(in_vals + *iptr++);}

1632:   /* load out buffers and post the sends */
1633:   while (iptr = *msg_nodes++)
1634:     {
1635:       dptr3 = dptr2;
1636:       while (*iptr >= 0)
1637:         {*dptr2++ = *(dptr1 + *iptr++);}
1638:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,
1639:                                    *(msg_size++)*REAL_LEN,*msg_list++,0);
1640:     }

1642:   /* post the receives ... was here*/
1643:   if (gs->max_left_over)
1644:     {gs_gop_tree_binary(gs,in_vals,fct);}

1646:   /* process the received data */
1647:   while (iptr = *nodes++)
1648:     {
1649:       msgwait(*ids_in++);
1650:       while (*iptr >= 0)
1651:         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1652:         /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1653:     }

1655:   /* replace vals */
1656:   while (*pw >= 0)
1657:     {*(in_vals + *pw++) = *dptr1++;}

1659:   /* clear isend message handles */
1660:   while (*ids_out >= 0)
1661:     {msgwait(*ids_out++);}

1663: #elif defined MPISRC
1664:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
1665:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
1666:   register int *pw, *list, *size, **nodes;
1667:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1668:   MPI_Status status;


1671:   /* strip and load registers */
1672:   msg_list =list         = gs->pair_list;
1673:   msg_size =size         = gs->msg_sizes;
1674:   msg_nodes=nodes        = gs->node_list;
1675:   iptr=pw                = gs->pw_elm_list;
1676:   dptr1=dptr3            = gs->pw_vals;
1677:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1678:   msg_ids_out = ids_out  = gs->msg_ids_out;
1679:   dptr2                  = gs->out;
1680:   in1=in2                = gs->in;

1682:   /* post the receives */
1683:   /*  msg_nodes=nodes; */
1684:   do
1685:     {
1686:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1687:          second one *list and do list++ afterwards */
1688:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1689:                 gs->gs_comm, msg_ids_in++);
1690:       in1 += *size++;
1691:     }
1692:   while (*++msg_nodes);
1693:   msg_nodes=nodes;

1695:   /* load gs values into in out gs buffers */
1696:   while (*iptr >= 0)
1697:     {*dptr3++ = *(in_vals + *iptr++);}

1699:   /* load out buffers and post the sends */
1700:   while ((iptr = *msg_nodes++))
1701:     {
1702:       dptr3 = dptr2;
1703:       while (*iptr >= 0)
1704:         {*dptr2++ = *(dptr1 + *iptr++);}
1705:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1706:       /* is msg_ids_out++ correct? */
1707:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
1708:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1709:     }

1711:   if (gs->max_left_over)
1712:     {gs_gop_tree_binary(gs,in_vals,fct);}

1714:   /* process the received data */
1715:   msg_nodes=nodes;
1716:   while ((iptr = *nodes++))
1717:     {
1718:       /* Should I check the return value of MPI_Wait() or status? */
1719:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1720:       MPI_Wait(ids_in++, &status);
1721:       while (*iptr >= 0)
1722:         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1723:       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1724:     }

1726:   /* replace vals */
1727:   while (*pw >= 0)
1728:     {*(in_vals + *pw++) = *dptr1++;}

1730:   /* clear isend message handles */
1731:   /* This changed for clarity though it could be the same */
1732:   while (*msg_nodes++)
1733:     /* Should I check the return value of MPI_Wait() or status? */
1734:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1735:     {MPI_Wait(ids_out++, &status);}
1736: #else
1737:   return;
1738: #endif


1741: }



1745: /******************************************************************************
1746: Function: gather_scatter

1748: Input : 
1749: Output: 
1750: Return: 
1751: Description: 
1752: ******************************************************************************/
1753: static
1754: void
1755: gs_gop_tree_binary(gs_id *gs, REAL *vals, register rbfp fct)
1756: {
1757:   int size;
1758:   int *in, *out;
1759:   REAL *buf, *work;

1761: #ifdef DEBUG
1762:   error_msg_warning("gs_gop_tree_binary() :: start\n");
1763: #endif
1764: 
1765:   in   = gs->tree_map_in;
1766:   out  = gs->tree_map_out;
1767:   buf  = gs->tree_buf;
1768:   work = gs->tree_work;
1769:   size = gs->tree_nel;

1771:   /* load vals vector w/identity */
1772:   (*fct)(buf,NULL,size);
1773: 
1774:   /* load my contribution into val vector */
1775:   while (*in >= 0)
1776:     {(*fct)((buf + *out++),(vals + *in++),-1);}
1777: /*    {*(buf + *out++) = *(vals + *in++);} */

1779:   gfop(buf,work,size,(vbfp)fct,REAL_TYPE,0);

1781:   in   = gs->tree_map_in;
1782:   out  = gs->tree_map_out;
1783:   while (*in >= 0)
1784:     {(*fct)((vals + *in++),(buf + *out++),-1);}
1785:     /*    {*(vals + *in++) = *(buf + *out++);} */


1788: #ifdef DEBUG
1789:   error_msg_warning("gs_gop_tree_binary() :: end\n");
1790: #endif

1792: }




1797: /******************************************************************************
1798: Function: gather_scatter

1800: Input : 
1801: Output: 
1802: Return: 
1803: Description: 
1804: ******************************************************************************/
1805: void
1806: gs_gop(register gs_id *gs, register REAL *vals, register const char *op)
1807: {
1808: #ifdef DEBUG
1809:   error_msg_warning("start gs_gop()\n");
1810:   if (!gs) {error_msg_fatal("gs_gop() :: passed NULL gs handle!!!");}
1811:   if (!op) {error_msg_fatal("gs_gop() :: passed NULL operation!!!");}
1812: #endif

1814:   switch (*op) {
1815:   case '+':
1816:     gs_gop_plus(gs,vals);
1817:     break;
1818:   case '*':
1819:     gs_gop_times(gs,vals);
1820:     break;
1821:   case 'a':
1822:     gs_gop_min_abs(gs,vals);
1823:     break;
1824:   case 'A':
1825:     gs_gop_max_abs(gs,vals);
1826:     break;
1827:   case 'e':
1828:     gs_gop_exists(gs,vals);
1829:     break;
1830:   case 'm':
1831:     gs_gop_min(gs,vals);
1832:     break;
1833:   case 'M':
1834:     gs_gop_max(gs,vals); break;
1835:     /*
1836:     if (*(op+1)=='\0')
1837:       {gs_gop_max(gs,vals); break;}
1838:     else if (*(op+1)=='X')
1839:       {gs_gop_max_abs(gs,vals); break;}
1840:     else if (*(op+1)=='N')
1841:       {gs_gop_min_abs(gs,vals); break;}
1842:     */
1843:   default:
1844:     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1845:     error_msg_warning("gs_gop() :: default :: plus");
1846:     gs_gop_plus(gs,vals);
1847:     break;
1848:   }
1849: #ifdef DEBUG
1850:   error_msg_warning("end gs_gop()\n");
1851: #endif
1852: }


1855: /******************************************************************************
1856: Function: gather_scatter

1858: Input : 
1859: Output: 
1860: Return: 
1861: Description: 
1862: ******************************************************************************/
1863: static void
1864: gs_gop_exists(register gs_id *gs, register REAL *vals)
1865: {
1866: #ifdef DEBUG
1867:   error_msg_warning("start gs_gop_xxx()\n");
1868: #endif

1870:   /* local only operations!!! */
1871:   if (gs->num_local)
1872:     {gs_gop_local_exists(gs,vals);}

1874:   /* if intersection tree/pairwise and local isn't empty */
1875:   if (gs->num_local_gop)
1876:     {
1877:       gs_gop_local_in_exists(gs,vals);

1879:       /* pairwise */
1880:       if (gs->num_pairs)
1881:         {gs_gop_pairwise_exists(gs,vals);}
1882: 
1883:       /* tree */
1884:       else if (gs->max_left_over)
1885:         {gs_gop_tree_exists(gs,vals);}
1886: 
1887:       gs_gop_local_out(gs,vals);
1888:     }
1889:   /* if intersection tree/pairwise and local is empty */
1890:   else
1891:     {
1892:       /* pairwise */
1893:       if (gs->num_pairs)
1894:         {gs_gop_pairwise_exists(gs,vals);}
1895: 
1896:       /* tree */
1897:       else if (gs->max_left_over)
1898:         {gs_gop_tree_exists(gs,vals);}
1899:     }
1900: }



1904: /******************************************************************************
1905: Function: gather_scatter

1907: Input : 
1908: Output: 
1909: Return: 
1910: Description: 
1911: ******************************************************************************/
1912: static
1913: void
1914: gs_gop_local_exists(register gs_id *gs, register REAL *vals)
1915: {
1916:   register int *num, *map, **reduce;
1917:   register REAL tmp;


1920: #ifdef DEBUG
1921:   error_msg_warning("start gs_gop_xxx()\n");
1922: #endif

1924:   num    = gs->num_local_reduce;
1925:   reduce = gs->local_reduce;
1926:   while ((map = *reduce))
1927:     {
1928:       num ++;
1929:       tmp = 0.0;
1930:       while (*map >= 0)
1931:         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1932: 
1933:       map = *reduce++;
1934:       while (*map >= 0)
1935:         {*(vals + *map++) = tmp;}
1936:     }
1937: }



1941: /******************************************************************************
1942: Function: gather_scatter

1944: Input : 
1945: Output: 
1946: Return: 
1947: Description: 
1948: ******************************************************************************/
1949: static
1950: void
1951: gs_gop_local_in_exists(register gs_id *gs, register REAL *vals)
1952: {
1953:   register int *num, *map, **reduce;
1954:   register REAL *base;


1957: #ifdef DEBUG
1958:   error_msg_warning("start gs_gop_xxx()\n");
1959: #endif

1961:   num    = gs->num_gop_local_reduce;
1962:   reduce = gs->gop_local_reduce;
1963:   while ((map = *reduce++))
1964:     {
1965:       num++;
1966:       base = vals + *map++;
1967:       while (*map >= 0)
1968:         {*base = EXISTS(*base,*(vals + *map)); map++;}
1969:     }
1970: }



1974: /******************************************************************************
1975: Function: gather_scatter

1977: VERSION 3 :: 

1979: Input : 
1980: Output: 
1981: Return: 
1982: Description: 
1983: ******************************************************************************/
1984: static
1985: void
1986: gs_gop_pairwise_exists(register gs_id *gs, register REAL *in_vals)
1987: {
1988: #if   defined NXSRC
1989:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
1990:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
1991:   register int *pw, *list, *size, **nodes;
1992:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;


1995: #ifdef DEBUG
1996:   error_msg_warning("start gs_gop_xxx()\n");
1997: #endif

1999:   /* strip and load registers */
2000:   msg_list =list         = gs->pair_list;
2001:   msg_size =size         = gs->msg_sizes;
2002:   msg_nodes=nodes        = gs->node_list;
2003:   iptr=pw                = gs->pw_elm_list;
2004:   dptr1=dptr3            = gs->pw_vals;
2005:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2006:   msg_ids_out = ids_out  = gs->msg_ids_out;
2007:   dptr2                  = gs->out;
2008:   in1=in2                = gs->in;

2010:   /* post the receives */
2011:   do
2012:     {
2013:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
2014:       in1 += *size++;
2015:     }
2016:   while (*msg_ids_in >= 0);

2018:   /* load gs values into in out gs buffers */
2019:   while (*iptr >= 0)
2020:     {*dptr3++ = *(in_vals + *iptr++);}

2022:   /* load out buffers and post the sends */
2023:   while (iptr = *msg_nodes++)
2024:     {
2025:       dptr3 = dptr2;
2026:       while (*iptr >= 0)
2027:         {*dptr2++ = *(dptr1 + *iptr++);}
2028:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
2029:                              *msg_list++,0);
2030:     }

2032:   /* post the receives ... was here*/
2033:   if (gs->max_left_over)
2034:     {gs_gop_tree_exists(gs,in_vals);}

2036:   /* process the received data */
2037:   while (iptr = *nodes++)
2038:     {
2039:       msgwait(*ids_in++);
2040:       while (*iptr >= 0)
2041:         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2042:     }

2044:   /* replace vals */
2045:   while (*pw >= 0)
2046:     {*(in_vals + *pw++) = *dptr1++;}

2048:   /* clear isend message handles */
2049:   while (*ids_out >= 0)
2050:     {msgwait(*ids_out++);}

2052: #elif defined MPISRC
2053:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2054:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2055:   register int *pw, *list, *size, **nodes;
2056:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2057:   MPI_Status status;


2060:   /* strip and load registers */
2061:   msg_list =list         = gs->pair_list;
2062:   msg_size =size         = gs->msg_sizes;
2063:   msg_nodes=nodes        = gs->node_list;
2064:   iptr=pw                = gs->pw_elm_list;
2065:   dptr1=dptr3            = gs->pw_vals;
2066:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2067:   msg_ids_out = ids_out  = gs->msg_ids_out;
2068:   dptr2                  = gs->out;
2069:   in1=in2                = gs->in;

2071:   /* post the receives */
2072:   /*  msg_nodes=nodes; */
2073:   do
2074:     {
2075:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2076:          second one *list and do list++ afterwards */
2077:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2078:                 gs->gs_comm, msg_ids_in++);
2079:       in1 += *size++;
2080:     }
2081:   while (*++msg_nodes);
2082:   msg_nodes=nodes;

2084:   /* load gs values into in out gs buffers */
2085:   while (*iptr >= 0)
2086:     {*dptr3++ = *(in_vals + *iptr++);}

2088:   /* load out buffers and post the sends */
2089:   while ((iptr = *msg_nodes++))
2090:     {
2091:       dptr3 = dptr2;
2092:       while (*iptr >= 0)
2093:         {*dptr2++ = *(dptr1 + *iptr++);}
2094:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2095:       /* is msg_ids_out++ correct? */
2096:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2097:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2098:     }

2100:   if (gs->max_left_over)
2101:     {gs_gop_tree_exists(gs,in_vals);}

2103:   /* process the received data */
2104:   msg_nodes=nodes;
2105:   while ((iptr = *nodes++))
2106:     {
2107:       /* Should I check the return value of MPI_Wait() or status? */
2108:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2109:       MPI_Wait(ids_in++, &status);
2110:       while (*iptr >= 0)
2111:         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2112:     }

2114:   /* replace vals */
2115:   while (*pw >= 0)
2116:     {*(in_vals + *pw++) = *dptr1++;}

2118:   /* clear isend message handles */
2119:   /* This changed for clarity though it could be the same */
2120:   while (*msg_nodes++)
2121:     /* Should I check the return value of MPI_Wait() or status? */
2122:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2123:     {MPI_Wait(ids_out++, &status);}
2124: #else
2125:   return;
2126: #endif
2127: }



2131: /******************************************************************************
2132: Function: gather_scatter

2134: Input : 
2135: Output: 
2136: Return: 
2137: Description: 
2138: ******************************************************************************/
2139: static
2140: void
2141: gs_gop_tree_exists(gs_id *gs, REAL *vals)
2142: {
2143:   int size;
2144:   int *in, *out;
2145:   REAL *buf, *work;
2146:   int op[] = {GL_EXISTS,0};


2149: #ifdef DEBUG
2150:   error_msg_warning("start gs_gop_tree_exists()");
2151: #endif
2152: 
2153:   in   = gs->tree_map_in;
2154:   out  = gs->tree_map_out;
2155:   buf  = gs->tree_buf;
2156:   work = gs->tree_work;
2157:   size = gs->tree_nel;

2159: #if defined  BLAS||CBLAS
2160:   *work = 0.0;
2161:   copy(size,work,0,buf,1);
2162: #else
2163:   rvec_zero(buf,size);
2164: #endif

2166:   while (*in >= 0)
2167:     {
2168:       /*
2169:       printf("%d :: out=%d\n",my_id,*out);
2170:       printf("%d :: in=%d\n",my_id,*in);
2171:       */
2172:       *(buf + *out++) = *(vals + *in++);
2173:     }

2175:   grop(buf,work,size,op);

2177:   in   = gs->tree_map_in;
2178:   out  = gs->tree_map_out;

2180:   while (*in >= 0)
2181:     {*(vals + *in++) = *(buf + *out++);}

2183: #ifdef DEBUG
2184:   error_msg_warning("start gs_gop_tree_exists()");
2185: #endif
2186: }



2190: /******************************************************************************
2191: Function: gather_scatter

2193: Input : 
2194: Output: 
2195: Return: 
2196: Description: 
2197: ******************************************************************************/
2198: static void
2199: gs_gop_max_abs(register gs_id *gs, register REAL *vals)
2200: {
2201: #ifdef DEBUG
2202:   error_msg_warning("start gs_gop_xxx()\n");
2203: #endif

2205:   /* local only operations!!! */
2206:   if (gs->num_local)
2207:     {gs_gop_local_max_abs(gs,vals);}

2209:   /* if intersection tree/pairwise and local isn't empty */
2210:   if (gs->num_local_gop)
2211:     {
2212:       gs_gop_local_in_max_abs(gs,vals);

2214:       /* pairwise */
2215:       if (gs->num_pairs)
2216:         {gs_gop_pairwise_max_abs(gs,vals);}
2217: 
2218:       /* tree */
2219:       else if (gs->max_left_over)
2220:         {gs_gop_tree_max_abs(gs,vals);}
2221: 
2222:       gs_gop_local_out(gs,vals);
2223:     }
2224:   /* if intersection tree/pairwise and local is empty */
2225:   else
2226:     {
2227:       /* pairwise */
2228:       if (gs->num_pairs)
2229:         {gs_gop_pairwise_max_abs(gs,vals);}
2230: 
2231:       /* tree */
2232:       else if (gs->max_left_over)
2233:         {gs_gop_tree_max_abs(gs,vals);}
2234:     }
2235: }



2239: /******************************************************************************
2240: Function: gather_scatter

2242: Input : 
2243: Output: 
2244: Return: 
2245: Description: 
2246: ******************************************************************************/
2247: static
2248: void
2249: gs_gop_local_max_abs(register gs_id *gs, register REAL *vals)
2250: {
2251:   register int *num, *map, **reduce;
2252:   register REAL tmp;


2255: #ifdef DEBUG
2256:   error_msg_warning("start gs_gop_xxx()\n");
2257: #endif

2259:   num    = gs->num_local_reduce;
2260:   reduce = gs->local_reduce;
2261:   while ((map = *reduce))
2262:     {
2263:       num ++;
2264:       tmp = 0.0;
2265:       while (*map >= 0)
2266:         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
2267: 
2268:       map = *reduce++;
2269:       while (*map >= 0)
2270:         {*(vals + *map++) = tmp;}
2271:     }
2272: }



2276: /******************************************************************************
2277: Function: gather_scatter

2279: Input : 
2280: Output: 
2281: Return: 
2282: Description: 
2283: ******************************************************************************/
2284: static
2285: void
2286: gs_gop_local_in_max_abs(register gs_id *gs, register REAL *vals)
2287: {
2288:   register int *num, *map, **reduce;
2289:   register REAL *base;


2292: #ifdef DEBUG
2293:   error_msg_warning("start gs_gop_xxx()\n");
2294: #endif

2296:   num    = gs->num_gop_local_reduce;
2297:   reduce = gs->gop_local_reduce;
2298:   while ((map = *reduce++))
2299:     {
2300:       num++;
2301:       base = vals + *map++;
2302:       while (*map >= 0)
2303:         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
2304:     }
2305: }



2309: /******************************************************************************
2310: Function: gather_scatter

2312: VERSION 3 :: 

2314: Input : 
2315: Output: 
2316: Return: 
2317: Description: 
2318: ******************************************************************************/
2319: static
2320: void
2321: gs_gop_pairwise_max_abs(register gs_id *gs, register REAL *in_vals)
2322: {
2323: #if   defined NXSRC
2324:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2325:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2326:   register int *pw, *list, *size, **nodes;
2327:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;



2331: #ifdef DEBUG
2332:   error_msg_warning("start gs_gop_xxx()\n");
2333: #endif

2335:   /* strip and load registers */
2336:   msg_list =list         = gs->pair_list;
2337:   msg_size =size         = gs->msg_sizes;
2338:   msg_nodes=nodes        = gs->node_list;
2339:   iptr=pw                = gs->pw_elm_list;
2340:   dptr1=dptr3            = gs->pw_vals;
2341:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2342:   msg_ids_out = ids_out  = gs->msg_ids_out;
2343:   dptr2                  = gs->out;
2344:   in1=in2                = gs->in;

2346:   /* post the receives */
2347:   do
2348:     {
2349:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
2350:       in1 += *size++;
2351:     }
2352:   while (*msg_ids_in >= 0);

2354:   /* load gs values into in out gs buffers */
2355:   while (*iptr >= 0)
2356:     {*dptr3++ = *(in_vals + *iptr++);}

2358:   /* load out buffers and post the sends */
2359:   while (iptr = *msg_nodes++)
2360:     {
2361:       dptr3 = dptr2;
2362:       while (*iptr >= 0)
2363:         {*dptr2++ = *(dptr1 + *iptr++);}
2364:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
2365:                              *msg_list++,0);
2366:     }

2368:   /* post the receives ... was here*/
2369:   if (gs->max_left_over)
2370:     {gs_gop_tree_max_abs(gs,in_vals);}

2372:   /* process the received data */
2373:   while (iptr = *nodes++)
2374:     {
2375:       msgwait(*ids_in++);
2376:       while (*iptr >= 0)
2377:         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2378:     }

2380:   /* replace vals */
2381:   while (*pw >= 0)
2382:     {*(in_vals + *pw++) = *dptr1++;}

2384:   /* clear isend message handles */
2385:   while (*ids_out >= 0)
2386:     {msgwait(*ids_out++);}

2388: #elif defined MPISRC
2389:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2390:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2391:   register int *pw, *list, *size, **nodes;
2392:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2393:   MPI_Status status;


2396:   /* strip and load registers */
2397:   msg_list =list         = gs->pair_list;
2398:   msg_size =size         = gs->msg_sizes;
2399:   msg_nodes=nodes        = gs->node_list;
2400:   iptr=pw                = gs->pw_elm_list;
2401:   dptr1=dptr3            = gs->pw_vals;
2402:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2403:   msg_ids_out = ids_out  = gs->msg_ids_out;
2404:   dptr2                  = gs->out;
2405:   in1=in2                = gs->in;

2407:   /* post the receives */
2408:   /*  msg_nodes=nodes; */
2409:   do
2410:     {
2411:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2412:          second one *list and do list++ afterwards */
2413:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2414:                 gs->gs_comm, msg_ids_in++);
2415:       in1 += *size++;
2416:     }
2417:   while (*++msg_nodes);
2418:   msg_nodes=nodes;

2420:   /* load gs values into in out gs buffers */
2421:   while (*iptr >= 0)
2422:     {*dptr3++ = *(in_vals + *iptr++);}

2424:   /* load out buffers and post the sends */
2425:   while ((iptr = *msg_nodes++))
2426:     {
2427:       dptr3 = dptr2;
2428:       while (*iptr >= 0)
2429:         {*dptr2++ = *(dptr1 + *iptr++);}
2430:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2431:       /* is msg_ids_out++ correct? */
2432:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2433:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2434:     }

2436:   if (gs->max_left_over)
2437:     {gs_gop_tree_max_abs(gs,in_vals);}

2439:   /* process the received data */
2440:   msg_nodes=nodes;
2441:   while ((iptr = *nodes++))
2442:     {
2443:       /* Should I check the return value of MPI_Wait() or status? */
2444:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2445:       MPI_Wait(ids_in++, &status);
2446:       while (*iptr >= 0)
2447:         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2448:     }

2450:   /* replace vals */
2451:   while (*pw >= 0)
2452:     {*(in_vals + *pw++) = *dptr1++;}

2454:   /* clear isend message handles */
2455:   /* This changed for clarity though it could be the same */
2456:   while (*msg_nodes++)
2457:     /* Should I check the return value of MPI_Wait() or status? */
2458:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2459:     {MPI_Wait(ids_out++, &status);}
2460: #else
2461:   return;
2462: #endif
2463: }



2467: /******************************************************************************
2468: Function: gather_scatter

2470: Input : 
2471: Output: 
2472: Return: 
2473: Description: 
2474: ******************************************************************************/
2475: static
2476: void
2477: gs_gop_tree_max_abs(gs_id *gs, REAL *vals)
2478: {
2479:   int size;
2480:   int *in, *out;
2481:   REAL *buf, *work;
2482:   int op[] = {GL_MAX_ABS,0};


2485: #ifdef DEBUG
2486:   error_msg_warning("start gs_gop_tree_max_abs()");
2487: #endif
2488: 
2489:   in   = gs->tree_map_in;
2490:   out  = gs->tree_map_out;
2491:   buf  = gs->tree_buf;
2492:   work = gs->tree_work;
2493:   size = gs->tree_nel;

2495: #if defined BLAS||CBLAS
2496:   *work = 0.0;
2497:   copy(size,work,0,buf,1);
2498: #else
2499:   rvec_zero(buf,size);
2500: #endif

2502:   while (*in >= 0)
2503:     {
2504:       /*
2505:       printf("%d :: out=%d\n",my_id,*out);
2506:       printf("%d :: in=%d\n",my_id,*in);
2507:       */
2508:       *(buf + *out++) = *(vals + *in++);
2509:     }

2511:   grop(buf,work,size,op);

2513:   in   = gs->tree_map_in;
2514:   out  = gs->tree_map_out;

2516:   while (*in >= 0)
2517:     {*(vals + *in++) = *(buf + *out++);}

2519: #ifdef DEBUG
2520:   error_msg_warning("start gs_gop_tree_max_abs()");
2521: #endif
2522: }



2526: /******************************************************************************
2527: Function: gather_scatter

2529: Input : 
2530: Output: 
2531: Return: 
2532: Description: 
2533: ******************************************************************************/
2534: static void
2535: gs_gop_max(register gs_id *gs, register REAL *vals)
2536: {
2537: #ifdef DEBUG
2538:   error_msg_warning("start gs_gop_xxx()\n");
2539: #endif


2542:   /* local only operations!!! */
2543:   if (gs->num_local)
2544:     {gs_gop_local_max(gs,vals);}

2546:   /* if intersection tree/pairwise and local isn't empty */
2547:   if (gs->num_local_gop)
2548:     {
2549:       gs_gop_local_in_max(gs,vals);

2551:       /* pairwise */
2552:       if (gs->num_pairs)
2553:         {gs_gop_pairwise_max(gs,vals);}
2554: 
2555:       /* tree */
2556:       else if (gs->max_left_over)
2557:         {gs_gop_tree_max(gs,vals);}
2558: 
2559:       gs_gop_local_out(gs,vals);
2560:     }
2561:   /* if intersection tree/pairwise and local is empty */
2562:   else
2563:     {
2564:       /* pairwise */
2565:       if (gs->num_pairs)
2566:         {gs_gop_pairwise_max(gs,vals);}
2567: 
2568:       /* tree */
2569:       else if (gs->max_left_over)
2570:         {gs_gop_tree_max(gs,vals);}
2571:     }
2572: }



2576: /******************************************************************************
2577: Function: gather_scatter

2579: Input : 
2580: Output: 
2581: Return: 
2582: Description: 
2583: ******************************************************************************/
2584: static
2585: void
2586: gs_gop_local_max(register gs_id *gs, register REAL *vals)
2587: {
2588:   register int *num, *map, **reduce;
2589:   register REAL tmp;


2592: #ifdef DEBUG
2593:   error_msg_warning("start gs_gop_xxx()\n");
2594: #endif

2596:   num    = gs->num_local_reduce;
2597:   reduce = gs->local_reduce;
2598:   while ((map = *reduce))
2599:     {
2600:       num ++;
2601:       tmp = -REAL_MAX;
2602:       while (*map >= 0)
2603:         {tmp = MAX(tmp,*(vals + *map)); map++;}
2604: 
2605:       map = *reduce++;
2606:       while (*map >= 0)
2607:         {*(vals + *map++) = tmp;}
2608:     }
2609: }



2613: /******************************************************************************
2614: Function: gather_scatter

2616: Input : 
2617: Output: 
2618: Return: 
2619: Description: 
2620: ******************************************************************************/
2621: static
2622: void
2623: gs_gop_local_in_max(register gs_id *gs, register REAL *vals)
2624: {
2625:   register int *num, *map, **reduce;
2626:   register REAL *base;


2629: #ifdef DEBUG
2630:   error_msg_warning("start gs_gop_xxx()\n");
2631: #endif

2633:   num    = gs->num_gop_local_reduce;
2634:   reduce = gs->gop_local_reduce;
2635:   while ((map = *reduce++))
2636:     {
2637:       num++;
2638:       base = vals + *map++;
2639:       while (*map >= 0)
2640:         {*base = MAX(*base,*(vals + *map)); map++;}
2641:     }
2642: }



2646: /******************************************************************************
2647: Function: gather_scatter

2649: VERSION 3 :: 

2651: Input : 
2652: Output: 
2653: Return: 
2654: Description: 
2655: ******************************************************************************/
2656: static
2657: void
2658: gs_gop_pairwise_max(register gs_id *gs, register REAL *in_vals)
2659: {
2660: #if   defined NXSRC
2661:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2662:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2663:   register int *pw, *list, *size, **nodes;
2664:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;


2667: #ifdef DEBUG
2668:   error_msg_warning("start gs_gop_xxx()\n");
2669: #endif

2671:   /* strip and load registers */
2672:   msg_list =list         = gs->pair_list;
2673:   msg_size =size         = gs->msg_sizes;
2674:   msg_nodes=nodes        = gs->node_list;
2675:   iptr=pw                = gs->pw_elm_list;
2676:   dptr1=dptr3            = gs->pw_vals;
2677:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2678:   msg_ids_out = ids_out  = gs->msg_ids_out;
2679:   dptr2                  = gs->out;
2680:   in1=in2                = gs->in;

2682:   /* post the receives */
2683:   do
2684:     {
2685:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
2686:       in1 += *size++;
2687:     }
2688:   while (*msg_ids_in >= 0);

2690:   /* load gs values into in out gs buffers */
2691:   while (*iptr >= 0)
2692:     {*dptr3++ = *(in_vals + *iptr++);}

2694:   /* load out buffers and post the sends */
2695:   while (iptr = *msg_nodes++)
2696:     {
2697:       dptr3 = dptr2;
2698:       while (*iptr >= 0)
2699:         {*dptr2++ = *(dptr1 + *iptr++);}
2700:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
2701:                              *msg_list++,0);
2702:     }

2704:   /* post the receives ... was here*/
2705:   if (gs->max_left_over)
2706:     {gs_gop_tree_max(gs,in_vals);}

2708:   /* process the received data */
2709:   while (iptr = *nodes++)
2710:     {
2711:       msgwait(*ids_in++);
2712:       while (*iptr >= 0)
2713:         {*(dptr1 + *iptr) = MAX(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2714:     }

2716:   /* replace vals */
2717:   while (*pw >= 0)
2718:     {*(in_vals + *pw++) = *dptr1++;}

2720:   /* clear isend message handles */
2721:   while (*ids_out >= 0)
2722:     {msgwait(*ids_out++);}

2724: #elif defined MPISRC
2725:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2726:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2727:   register int *pw, *list, *size, **nodes;
2728:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2729:   MPI_Status status;


2732:   /* strip and load registers */
2733:   msg_list =list         = gs->pair_list;
2734:   msg_size =size         = gs->msg_sizes;
2735:   msg_nodes=nodes        = gs->node_list;
2736:   iptr=pw                = gs->pw_elm_list;
2737:   dptr1=dptr3            = gs->pw_vals;
2738:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2739:   msg_ids_out = ids_out  = gs->msg_ids_out;
2740:   dptr2                  = gs->out;
2741:   in1=in2                = gs->in;

2743:   /* post the receives */
2744:   /*  msg_nodes=nodes; */
2745:   do
2746:     {
2747:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2748:          second one *list and do list++ afterwards */
2749:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2750:                 gs->gs_comm, msg_ids_in++);
2751:       in1 += *size++;
2752:     }
2753:   while (*++msg_nodes);
2754:   msg_nodes=nodes;

2756:   /* load gs values into in out gs buffers */
2757:   while (*iptr >= 0)
2758:     {*dptr3++ = *(in_vals + *iptr++);}

2760:   /* load out buffers and post the sends */
2761:   while ((iptr = *msg_nodes++))
2762:     {
2763:       dptr3 = dptr2;
2764:       while (*iptr >= 0)
2765:         {*dptr2++ = *(dptr1 + *iptr++);}
2766:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2767:       /* is msg_ids_out++ correct? */
2768:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
2769:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2770:     }

2772:   if (gs->max_left_over)
2773:     {gs_gop_tree_max(gs,in_vals);}

2775:   /* process the received data */
2776:   msg_nodes=nodes;
2777:   while ((iptr = *nodes++))
2778:     {
2779:       /* Should I check the return value of MPI_Wait() or status? */
2780:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2781:       MPI_Wait(ids_in++, &status);
2782:       while (*iptr >= 0)
2783:         {*(dptr1 + *iptr) = MAX(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2784:     }

2786:   /* replace vals */
2787:   while (*pw >= 0)
2788:     {*(in_vals + *pw++) = *dptr1++;}

2790:   /* clear isend message handles */
2791:   /* This changed for clarity though it could be the same */
2792:   while (*msg_nodes++)
2793:     /* Should I check the return value of MPI_Wait() or status? */
2794:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2795:     {MPI_Wait(ids_out++, &status);}
2796: #else
2797:   return;
2798: #endif
2799: }



2803: /******************************************************************************
2804: Function: gather_scatter

2806: Input : 
2807: Output: 
2808: Return: 
2809: Description: 
2810: ******************************************************************************/
2811: static
2812: void
2813: gs_gop_tree_max(gs_id *gs, REAL *vals)
2814: {
2815:   int size;
2816:   int *in, *out;
2817:   REAL *buf, *work;
2818:   /* int op[] = {GL_MAX,0}; */


2821: #ifdef DEBUG
2822:   error_msg_warning("start gs_gop_tree_max()");
2823: #endif
2824: 
2825:   in   = gs->tree_map_in;
2826:   out  = gs->tree_map_out;
2827:   buf  = gs->tree_buf;
2828:   work = gs->tree_work;
2829:   size = gs->tree_nel;

2831: #if defined BLAS||CBLAS
2832:   *work = -REAL_MAX;
2833:   copy(size,work,0,buf,1);
2834: #else
2835:   rvec_set(buf,-REAL_MAX,size);
2836: #endif

2838:   while (*in >= 0)
2839:     {*(buf + *out++) = *(vals + *in++);}

2841:   in   = gs->tree_map_in;
2842:   out  = gs->tree_map_out;
2843: #if defined(NXSRC) && defined(r8)
2844:   gdhigh(buf,size,work);
2845:   while (*in >= 0)
2846:     {*(vals + *in++) = *(buf + *out++);}
2847: #elif defined MPISRC
2848:   MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_MAX,gs->gs_comm);
2849:   while (*in >= 0)
2850:     {*(vals + *in++) = *(work + *out++);}
2851: #else
2852:   grop(buf,work,size,op);
2853:   while (*in >= 0)
2854:     {*(vals + *in++) = *(buf + *out++);}
2855: #endif

2857: #ifdef DEBUG
2858:   error_msg_warning("end gs_gop_tree_max()");
2859: #endif
2860: }



2864: /******************************************************************************
2865: Function: gather_scatter

2867: Input : 
2868: Output: 
2869: Return: 
2870: Description: 
2871: ******************************************************************************/
2872: static void
2873: gs_gop_min_abs(register gs_id *gs, register REAL *vals)
2874: {
2875: #ifdef DEBUG
2876:   error_msg_warning("start gs_gop_xxx()\n");
2877: #endif

2879:   /* local only operations!!! */
2880:   if (gs->num_local)
2881:     {gs_gop_local_min_abs(gs,vals);}

2883:   /* if intersection tree/pairwise and local isn't empty */
2884:   if (gs->num_local_gop)
2885:     {
2886:       gs_gop_local_in_min_abs(gs,vals);

2888:       /* pairwise */
2889:       if (gs->num_pairs)
2890:         {gs_gop_pairwise_min_abs(gs,vals);}
2891: 
2892:       /* tree */
2893:       else if (gs->max_left_over)
2894:         {gs_gop_tree_min_abs(gs,vals);}
2895: 
2896:       gs_gop_local_out(gs,vals);
2897:     }
2898:   /* if intersection tree/pairwise and local is empty */
2899:   else
2900:     {
2901:       /* pairwise */
2902:       if (gs->num_pairs)
2903:         {gs_gop_pairwise_min_abs(gs,vals);}
2904: 
2905:       /* tree */
2906:       else if (gs->max_left_over)
2907:         {gs_gop_tree_min_abs(gs,vals);}
2908:     }
2909: }



2913: /******************************************************************************
2914: Function: gather_scatter

2916: Input : 
2917: Output: 
2918: Return: 
2919: Description: 
2920: ******************************************************************************/
2921: static
2922: void
2923: gs_gop_local_min_abs(register gs_id *gs, register REAL *vals)
2924: {
2925:   register int *num, *map, **reduce;
2926:   register REAL tmp;


2929: #ifdef DEBUG
2930:   error_msg_warning("start gs_gop_xxx()\n");
2931: #endif

2933:   num    = gs->num_local_reduce;
2934:   reduce = gs->local_reduce;
2935:   while ((map = *reduce))
2936:     {
2937:       num ++;
2938:       tmp = REAL_MAX;
2939:       while (*map >= 0)
2940:         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2941: 
2942:       map = *reduce++;
2943:       while (*map >= 0)
2944:         {*(vals + *map++) = tmp;}
2945:     }
2946: }



2950: /******************************************************************************
2951: Function: gather_scatter

2953: Input : 
2954: Output: 
2955: Return: 
2956: Description: 
2957: ******************************************************************************/
2958: static
2959: void
2960: gs_gop_local_in_min_abs(register gs_id *gs, register REAL *vals)
2961: {
2962:   register int *num, *map, **reduce;
2963:   register REAL *base;

2965: #ifdef DEBUG
2966:   error_msg_warning("start gs_gop_xxx()\n");
2967: #endif

2969:   num    = gs->num_gop_local_reduce;
2970:   reduce = gs->gop_local_reduce;
2971:   while ((map = *reduce++))
2972:     {
2973:       num++;
2974:       base = vals + *map++;
2975:       while (*map >= 0)
2976:         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2977:     }
2978: }



2982: /******************************************************************************
2983: Function: gather_scatter

2985: VERSION 3 :: 

2987: Input : 
2988: Output: 
2989: Return: 
2990: Description: 
2991: ******************************************************************************/
2992: static
2993: void
2994: gs_gop_pairwise_min_abs(register gs_id *gs, register REAL *in_vals)
2995: {
2996: #if   defined NXSRC
2997:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
2998:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
2999:   register int *pw, *list, *size, **nodes;
3000:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;


3003: #ifdef DEBUG
3004:   error_msg_warning("start gs_gop_xxx()\n");
3005: #endif

3007:   /* strip and load registers */
3008:   msg_list =list         = gs->pair_list;
3009:   msg_size =size         = gs->msg_sizes;
3010:   msg_nodes=nodes        = gs->node_list;
3011:   iptr=pw                = gs->pw_elm_list;
3012:   dptr1=dptr3            = gs->pw_vals;
3013:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3014:   msg_ids_out = ids_out  = gs->msg_ids_out;
3015:   dptr2                  = gs->out;
3016:   in1=in2                = gs->in;

3018:   /* post the receives */
3019:   do
3020:     {
3021:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
3022:       in1 += *size++;
3023:     }
3024:   while (*msg_ids_in >= 0);

3026:   /* load gs values into in out gs buffers */
3027:   while (*iptr >= 0)
3028:     {*dptr3++ = *(in_vals + *iptr++);}

3030:   /* load out buffers and post the sends */
3031:   while (iptr = *msg_nodes++)
3032:     {
3033:       dptr3 = dptr2;
3034:       while (*iptr >= 0)
3035:         {*dptr2++ = *(dptr1 + *iptr++);}
3036:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
3037:                              *msg_list++,0);
3038:     }

3040:   /* post the receives ... was here*/
3041:   if (gs->max_left_over)
3042:     {gs_gop_tree_min_abs(gs,in_vals);}

3044:   /* process the received data */
3045:   while (iptr = *nodes++)
3046:     {
3047:       msgwait(*ids_in++);
3048:       while (*iptr >= 0)
3049:         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
3050:     }

3052:   /* replace vals */
3053:   while (*pw >= 0)
3054:     {*(in_vals + *pw++) = *dptr1++;}

3056:   /* clear isend message handles */
3057:   while (*ids_out >= 0)
3058:     {msgwait(*ids_out++);}

3060: #elif defined MPISRC
3061:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3062:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
3063:   register int *pw, *list, *size, **nodes;
3064:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3065:   MPI_Status status;


3068:   /* strip and load registers */
3069:   msg_list =list         = gs->pair_list;
3070:   msg_size =size         = gs->msg_sizes;
3071:   msg_nodes=nodes        = gs->node_list;
3072:   iptr=pw                = gs->pw_elm_list;
3073:   dptr1=dptr3            = gs->pw_vals;
3074:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3075:   msg_ids_out = ids_out  = gs->msg_ids_out;
3076:   dptr2                  = gs->out;
3077:   in1=in2                = gs->in;

3079:   /* post the receives */
3080:   /*  msg_nodes=nodes; */
3081:   do
3082:     {
3083:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3084:          second one *list and do list++ afterwards */
3085:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3086:                 gs->gs_comm, msg_ids_in++);
3087:       in1 += *size++;
3088:     }
3089:   while (*++msg_nodes);
3090:   msg_nodes=nodes;

3092:   /* load gs values into in out gs buffers */
3093:   while (*iptr >= 0)
3094:     {*dptr3++ = *(in_vals + *iptr++);}

3096:   /* load out buffers and post the sends */
3097:   while ((iptr = *msg_nodes++))
3098:     {
3099:       dptr3 = dptr2;
3100:       while (*iptr >= 0)
3101:         {*dptr2++ = *(dptr1 + *iptr++);}
3102:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3103:       /* is msg_ids_out++ correct? */
3104:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
3105:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3106:     }

3108:   if (gs->max_left_over)
3109:     {gs_gop_tree_min_abs(gs,in_vals);}

3111:   /* process the received data */
3112:   msg_nodes=nodes;
3113:   while ((iptr = *nodes++))
3114:     {
3115:       /* Should I check the return value of MPI_Wait() or status? */
3116:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3117:       MPI_Wait(ids_in++, &status);
3118:       while (*iptr >= 0)
3119:         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
3120:     }

3122:   /* replace vals */
3123:   while (*pw >= 0)
3124:     {*(in_vals + *pw++) = *dptr1++;}

3126:   /* clear isend message handles */
3127:   /* This changed for clarity though it could be the same */
3128:   while (*msg_nodes++)
3129:     /* Should I check the return value of MPI_Wait() or status? */
3130:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3131:     {MPI_Wait(ids_out++, &status);}
3132: #else
3133:   return;
3134: #endif
3135: }



3139: /******************************************************************************
3140: Function: gather_scatter

3142: Input : 
3143: Output: 
3144: Return: 
3145: Description: 
3146: ******************************************************************************/
3147: static
3148: void
3149: gs_gop_tree_min_abs(gs_id *gs, REAL *vals)
3150: {
3151:   int size;
3152:   int *in, *out;
3153:   REAL *buf, *work;
3154:   int op[] = {GL_MIN_ABS,0};


3157: #ifdef DEBUG
3158:   error_msg_warning("start gs_gop_tree_min_abs()");
3159: #endif
3160: 
3161:   in   = gs->tree_map_in;
3162:   out  = gs->tree_map_out;
3163:   buf  = gs->tree_buf;
3164:   work = gs->tree_work;
3165:   size = gs->tree_nel;

3167: #if defined  BLAS||CBLAS
3168:   *work = REAL_MAX;
3169:   copy(size,work,0,buf,1);
3170: #else
3171:   rvec_set(buf,REAL_MAX,size);
3172: #endif

3174:   while (*in >= 0)
3175:     {*(buf + *out++) = *(vals + *in++);}

3177:   in   = gs->tree_map_in;
3178:   out  = gs->tree_map_out;
3179:   grop(buf,work,size,op);
3180:   while (*in >= 0)
3181:     {*(vals + *in++) = *(buf + *out++);}

3183: #ifdef DEBUG
3184:   error_msg_warning("end gs_gop_tree_min_abs()");
3185: #endif
3186: }



3190: /******************************************************************************
3191: Function: gather_scatter

3193: Input : 
3194: Output: 
3195: Return: 
3196: Description: 
3197: ******************************************************************************/
3198: static void
3199: gs_gop_min(register gs_id *gs, register REAL *vals)
3200: {
3201: #ifdef DEBUG
3202:   error_msg_warning("start gs_gop_xxx()\n");
3203: #endif

3205:   /* local only operations!!! */
3206:   if (gs->num_local)
3207:     {gs_gop_local_min(gs,vals);}

3209:   /* if intersection tree/pairwise and local isn't empty */
3210:   if (gs->num_local_gop)
3211:     {
3212:       gs_gop_local_in_min(gs,vals);

3214:       /* pairwise */
3215:       if (gs->num_pairs)
3216:         {gs_gop_pairwise_min(gs,vals);}
3217: 
3218:       /* tree */
3219:       else if (gs->max_left_over)
3220:         {gs_gop_tree_min(gs,vals);}
3221: 
3222:       gs_gop_local_out(gs,vals);
3223:     }
3224:   /* if intersection tree/pairwise and local is empty */
3225:   else
3226:     {
3227:       /* pairwise */
3228:       if (gs->num_pairs)
3229:         {gs_gop_pairwise_min(gs,vals);}
3230: 
3231:       /* tree */
3232:       else if (gs->max_left_over)
3233:         {gs_gop_tree_min(gs,vals);}
3234:     }
3235: #ifdef DEBUG
3236:   error_msg_warning("end gs_gop_xxx()\n");
3237: #endif
3238: }



3242: /******************************************************************************
3243: Function: gather_scatter

3245: Input : 
3246: Output: 
3247: Return: 
3248: Description: 
3249: ******************************************************************************/
3250: static
3251: void
3252: gs_gop_local_min(register gs_id *gs, register REAL *vals)
3253: {
3254:   register int *num, *map, **reduce;
3255:   register REAL tmp;


3258: #ifdef DEBUG
3259:   error_msg_warning("start gs_gop_xxx()\n");
3260: #endif

3262:   num    = gs->num_local_reduce;
3263:   reduce = gs->local_reduce;
3264:   while ((map = *reduce))
3265:     {
3266:       num ++;
3267:       tmp = REAL_MAX;
3268:       while (*map >= 0)
3269:         {tmp = MIN(tmp,*(vals + *map)); map++;}
3270: 
3271:       map = *reduce++;
3272:       while (*map >= 0)
3273:         {*(vals + *map++) = tmp;}
3274:     }
3275: }



3279: /******************************************************************************
3280: Function: gather_scatter

3282: Input : 
3283: Output: 
3284: Return: 
3285: Description: 
3286: ******************************************************************************/
3287: static
3288: void
3289: gs_gop_local_in_min(register gs_id *gs, register REAL *vals)
3290: {
3291:   register int *num, *map, **reduce;
3292:   register REAL *base;


3295: #ifdef DEBUG
3296:   error_msg_warning("start gs_gop_xxx()\n");
3297: #endif

3299:   num    = gs->num_gop_local_reduce;
3300:   reduce = gs->gop_local_reduce;
3301:   while ((map = *reduce++))
3302:     {
3303:       num++;
3304:       base = vals + *map++;
3305:       while (*map >= 0)
3306:         {*base = MIN(*base,*(vals + *map)); map++;}
3307:     }
3308: }



3312: /******************************************************************************
3313: Function: gather_scatter

3315: VERSION 3 :: 

3317: Input : 
3318: Output: 
3319: Return: 
3320: Description: 
3321: ******************************************************************************/
3322: static
3323: void
3324: gs_gop_pairwise_min(register gs_id *gs, register REAL *in_vals)
3325: {
3326: #if   defined NXSRC
3327:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3328:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
3329:   register int *pw, *list, *size, **nodes;
3330:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;


3333: #ifdef DEBUG
3334:   error_msg_warning("start gs_gop_xxx()\n");
3335: #endif

3337:   /* strip and load registers */
3338:   msg_list =list         = gs->pair_list;
3339:   msg_size =size         = gs->msg_sizes;
3340:   msg_nodes=nodes        = gs->node_list;
3341:   iptr=pw                = gs->pw_elm_list;
3342:   dptr1=dptr3            = gs->pw_vals;
3343:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3344:   msg_ids_out = ids_out  = gs->msg_ids_out;
3345:   dptr2                  = gs->out;
3346:   in1=in2                = gs->in;

3348:   /* post the receives */
3349:   do
3350:     {
3351:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
3352:       in1 += *size++;
3353:     }
3354:   while (*msg_ids_in >= 0);

3356:   /* load gs values into in out gs buffers */
3357:   while (*iptr >= 0)
3358:     {*dptr3++ = *(in_vals + *iptr++);}

3360:   /* load out buffers and post the sends */
3361:   while (iptr = *msg_nodes++)
3362:     {
3363:       dptr3 = dptr2;
3364:       while (*iptr >= 0)
3365:         {*dptr2++ = *(dptr1 + *iptr++);}
3366:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
3367:                              *msg_list++,0);
3368:     }

3370:   /* post the receives ... was here*/
3371:   if (gs->max_left_over)
3372:     {gs_gop_tree_min(gs,in_vals);}

3374:   /* process the received data */
3375:   while (iptr = *nodes++)
3376:     {
3377:       msgwait(*ids_in++);
3378:       while (*iptr >= 0)
3379:         {*(dptr1 + *iptr) = MIN(*(dptr1 + *iptr),*in2); iptr++; in2++;}
3380:     }

3382:   /* replace vals */
3383:   while (*pw >= 0)
3384:     {*(in_vals + *pw++) = *dptr1++;}

3386:   /* clear isend message handles */
3387:   while (*ids_out >= 0)
3388:     {msgwait(*ids_out++);}

3390: #elif defined MPISRC
3391:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3392:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
3393:   register int *pw, *list, *size, **nodes;
3394:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3395:   MPI_Status status;


3398:   /* strip and load registers */
3399:   msg_list =list         = gs->pair_list;
3400:   msg_size =size         = gs->msg_sizes;
3401:   msg_nodes=nodes        = gs->node_list;
3402:   iptr=pw                = gs->pw_elm_list;
3403:   dptr1=dptr3            = gs->pw_vals;
3404:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3405:   msg_ids_out = ids_out  = gs->msg_ids_out;
3406:   dptr2                  = gs->out;
3407:   in1=in2                = gs->in;

3409:   /* post the receives */
3410:   /*  msg_nodes=nodes; */
3411:   do
3412:     {
3413:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3414:          second one *list and do list++ afterwards */
3415:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3416:                 gs->gs_comm, msg_ids_in++);
3417:       in1 += *size++;
3418:     }
3419:   while (*++msg_nodes);
3420:   msg_nodes=nodes;

3422:   /* load gs values into in out gs buffers */
3423:   while (*iptr >= 0)
3424:     {*dptr3++ = *(in_vals + *iptr++);}

3426:   /* load out buffers and post the sends */
3427:   while ((iptr = *msg_nodes++))
3428:     {
3429:       dptr3 = dptr2;
3430:       while (*iptr >= 0)
3431:         {*dptr2++ = *(dptr1 + *iptr++);}
3432:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3433:       /* is msg_ids_out++ correct? */
3434:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
3435:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3436:     }

3438:   /* process the received data */
3439:   if (gs->max_left_over)
3440:     {gs_gop_tree_min(gs,in_vals);}

3442:   msg_nodes=nodes;
3443:   while ((iptr = *nodes++))
3444:     {
3445:       /* Should I check the return value of MPI_Wait() or status? */
3446:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3447:       MPI_Wait(ids_in++, &status);
3448:       while (*iptr >= 0)
3449:         {*(dptr1 + *iptr) = MIN(*(dptr1 + *iptr),*in2); iptr++; in2++;}
3450:     }

3452:   /* replace vals */
3453:   while (*pw >= 0)
3454:     {*(in_vals + *pw++) = *dptr1++;}

3456:   /* clear isend message handles */
3457:   /* This changed for clarity though it could be the same */
3458:   while (*msg_nodes++)
3459:     /* Should I check the return value of MPI_Wait() or status? */
3460:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3461:     {MPI_Wait(ids_out++, &status);}
3462: #else
3463:   return;
3464: #endif
3465: }



3469: /******************************************************************************
3470: Function: gather_scatter

3472: Input : 
3473: Output: 
3474: Return: 
3475: Description: 
3476: ******************************************************************************/
3477: static
3478: void
3479: gs_gop_tree_min(gs_id *gs, REAL *vals)
3480: {
3481:   int size;
3482:   int *in, *out;
3483:   REAL *buf, *work;
3484:   /*int op[] = {GL_MIN,0};*/


3487: #ifdef DEBUG
3488:   error_msg_warning("start gs_gop_tree_min()");
3489: #endif
3490: 
3491:   in   = gs->tree_map_in;
3492:   out  = gs->tree_map_out;
3493:   buf  = gs->tree_buf;
3494:   work = gs->tree_work;
3495:   size = gs->tree_nel;

3497: #if defined  BLAS||CBLAS
3498:   *work = REAL_MAX;
3499:   copy(size,work,0,buf,1);
3500: #else
3501:   rvec_set(buf,REAL_MAX,size);
3502: #endif

3504:   while (*in >= 0)
3505:     {*(buf + *out++) = *(vals + *in++);}

3507:   in   = gs->tree_map_in;
3508:   out  = gs->tree_map_out;
3509: #if defined(NXSRC) && defined(r8)
3510:   gdlow(buf,size,work);
3511:   while (*in >= 0)
3512:     {*(vals + *in++) = *(buf + *out++);}
3513: #elif defined MPISRC
3514:   MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_MIN,gs->gs_comm);
3515:   while (*in >= 0)
3516:     {*(vals + *in++) = *(work + *out++);}
3517: #else
3518:   grop(buf,work,size,op);
3519:   while (*in >= 0)
3520:     {*(vals + *in++) = *(buf + *out++);}
3521: #endif

3523: #ifdef DEBUG
3524:   error_msg_warning("end gs_gop_tree_min()");
3525: #endif
3526: }



3530: /******************************************************************************
3531: Function: gather_scatter

3533: Input : 
3534: Output: 
3535: Return: 
3536: Description: 
3537: ******************************************************************************/
3538: static void
3539: gs_gop_times(register gs_id *gs, register REAL *vals)
3540: {
3541: #ifdef DEBUG  
3542:   error_msg_warning("start gs_gop_times()\n");
3543: #endif

3545:   /* local only operations!!! */
3546:   if (gs->num_local)
3547:     {gs_gop_local_times(gs,vals);}

3549:   /* if intersection tree/pairwise and local isn't empty */
3550:   if (gs->num_local_gop)
3551:     {
3552:       gs_gop_local_in_times(gs,vals);

3554:       /* pairwise */
3555:       if (gs->num_pairs)
3556:         {gs_gop_pairwise_times(gs,vals);}
3557: 
3558:       /* tree */
3559:       else if (gs->max_left_over)
3560:         {gs_gop_tree_times(gs,vals);}
3561: 
3562:       gs_gop_local_out(gs,vals);
3563:     }
3564:   /* if intersection tree/pairwise and local is empty */
3565:   else
3566:     {
3567:       /* pairwise */
3568:       if (gs->num_pairs)
3569:         {gs_gop_pairwise_times(gs,vals);}
3570: 
3571:       /* tree */
3572:       else if (gs->max_left_over)
3573:         {gs_gop_tree_times(gs,vals);}
3574:     }
3575: }



3579: /******************************************************************************
3580: Function: gather_scatter

3582: Input : 
3583: Output: 
3584: Return: 
3585: Description: 
3586: ******************************************************************************/
3587: static
3588: void
3589: gs_gop_local_times(register gs_id *gs, register REAL *vals)
3590: {
3591:   register int *num, *map, **reduce;
3592:   register REAL tmp;


3595: #ifdef DEBUG  
3596:   error_msg_warning("start gs_gop_xxx()\n");
3597: #endif

3599:   num    = gs->num_local_reduce;
3600:   reduce = gs->local_reduce;
3601:   while ((map = *reduce))
3602:     {
3603:       /* wall */
3604:       if (*num == 2)
3605:         {
3606:           num ++; reduce++;
3607:           vals[map[1]] = vals[map[0]] *= vals[map[1]];
3608:         }
3609:       /* corner shared by three elements */
3610:       else if (*num == 3)
3611:         {
3612:           num ++; reduce++;
3613:           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
3614:         }
3615:       /* corner shared by four elements */
3616:       else if (*num == 4)
3617:         {
3618:           num ++; reduce++;
3619:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
3620:                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
3621:         }
3622:       /* general case ... odd geoms ... 3D*/
3623:       else
3624:         {
3625:           num ++;
3626:            tmp = 1.0;
3627:           while (*map >= 0)
3628:             {tmp *= *(vals + *map++);}

3630:           map = *reduce++;
3631:           while (*map >= 0)
3632:             {*(vals + *map++) = tmp;}
3633:         }
3634:     }
3635: }



3639: /******************************************************************************
3640: Function: gather_scatter

3642: Input : 
3643: Output: 
3644: Return: 
3645: Description: 
3646: ******************************************************************************/
3647: static
3648: void
3649: gs_gop_local_in_times(register gs_id *gs, register REAL *vals)
3650: {
3651:   register int *num, *map, **reduce;
3652:   register REAL *base;


3655: #ifdef DEBUG  
3656:   error_msg_warning("start gs_gop_xxx()\n");
3657: #endif

3659:   num    = gs->num_gop_local_reduce;
3660:   reduce = gs->gop_local_reduce;
3661:   while ((map = *reduce++))
3662:     {
3663:       /* wall */
3664:       if (*num == 2)
3665:         {
3666:           num ++;
3667:           vals[map[0]] *= vals[map[1]];
3668:         }
3669:       /* corner shared by three elements */
3670:       else if (*num == 3)
3671:         {
3672:           num ++;
3673:           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
3674:         }
3675:       /* corner shared by four elements */
3676:       else if (*num == 4)
3677:         {
3678:           num ++;
3679:           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
3680:         }
3681:       /* general case ... odd geoms ... 3D*/
3682:       else
3683:         {
3684:           num++;
3685:           base = vals + *map++;
3686:           while (*map >= 0)
3687:             {*base *= *(vals + *map++);}
3688:         }
3689:     }
3690: }



3694: /******************************************************************************
3695: Function: gather_scatter

3697: VERSION 3 :: 

3699: Input : 
3700: Output: 
3701: Return: 
3702: Description: 
3703: ******************************************************************************/
3704: static
3705: void
3706: gs_gop_pairwise_times(register gs_id *gs, register REAL *in_vals)
3707: {
3708: #if   defined NXSRC
3709:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3710:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
3711:   register int *pw, *list, *size, **nodes;
3712:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;



3716: #ifdef DEBUG  
3717:   error_msg_warning("start gs_gop_xxx()\n");
3718: #endif

3720:   /* strip and load registers */
3721:   msg_list =list         = gs->pair_list;
3722:   msg_size =size         = gs->msg_sizes;
3723:   msg_nodes=nodes        = gs->node_list;
3724:   iptr=pw                = gs->pw_elm_list;
3725:   dptr1=dptr3            = gs->pw_vals;
3726:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3727:   msg_ids_out = ids_out  = gs->msg_ids_out;
3728:   dptr2                  = gs->out;
3729:   in1=in2                = gs->in;

3731:   /* post the receives */
3732:   do
3733:     {
3734:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
3735:       in1 += *size++;
3736:     }
3737:   while (*msg_ids_in >= 0);

3739:   /* load gs values into in out gs buffers */
3740:   while (*iptr >= 0)
3741:     {*dptr3++ = *(in_vals + *iptr++);}

3743:   /* load out buffers and post the sends */
3744:   while (iptr = *msg_nodes++)
3745:     {
3746:       dptr3 = dptr2;
3747:       while (*iptr >= 0)
3748:         {*dptr2++ = *(dptr1 + *iptr++);}
3749:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
3750:                              *msg_list++,0);
3751:     }

3753:   /* post the receives ... was here*/
3754:   if (gs->max_left_over)
3755:     {gs_gop_tree_times(gs,in_vals);}

3757:   /* process the received data */
3758:   while (iptr = *nodes++)
3759:     {
3760:       msgwait(*ids_in++);
3761:       while (*iptr >= 0)
3762:         {*(dptr1 + *iptr++) *= *in2++;}
3763:     }

3765:   /* replace vals */
3766:   while (*pw >= 0)
3767:     {*(in_vals + *pw++) = *dptr1++;}

3769:   /* clear isend message handles */
3770:   while (*ids_out >= 0)
3771:     {msgwait(*ids_out++);}

3773: #elif defined MPISRC
3774:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
3775:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
3776:   register int *pw, *list, *size, **nodes;
3777:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3778:   MPI_Status status;


3781:   /* strip and load registers */
3782:   msg_list =list         = gs->pair_list;
3783:   msg_size =size         = gs->msg_sizes;
3784:   msg_nodes=nodes        = gs->node_list;
3785:   iptr=pw                = gs->pw_elm_list;
3786:   dptr1=dptr3            = gs->pw_vals;
3787:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3788:   msg_ids_out = ids_out  = gs->msg_ids_out;
3789:   dptr2                  = gs->out;
3790:   in1=in2                = gs->in;

3792:   /* post the receives */
3793:   /*  msg_nodes=nodes; */
3794:   do
3795:     {
3796:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3797:          second one *list and do list++ afterwards */
3798:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3799:                 gs->gs_comm, msg_ids_in++);
3800:       in1 += *size++;
3801:     }
3802:   while (*++msg_nodes);
3803:   msg_nodes=nodes;

3805:   /* load gs values into in out gs buffers */
3806:   while (*iptr >= 0)
3807:     {*dptr3++ = *(in_vals + *iptr++);}

3809:   /* load out buffers and post the sends */
3810:   while ((iptr = *msg_nodes++))
3811:     {
3812:       dptr3 = dptr2;
3813:       while (*iptr >= 0)
3814:         {*dptr2++ = *(dptr1 + *iptr++);}
3815:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3816:       /* is msg_ids_out++ correct? */
3817:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
3818:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3819:     }

3821:   if (gs->max_left_over)
3822:     {gs_gop_tree_times(gs,in_vals);}

3824:   /* process the received data */
3825:   msg_nodes=nodes;
3826:   while ((iptr = *nodes++))
3827:     {
3828:       /* Should I check the return value of MPI_Wait() or status? */
3829:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3830:       MPI_Wait(ids_in++, &status);
3831:       while (*iptr >= 0)
3832:         {*(dptr1 + *iptr++) *= *in2++;}
3833:     }

3835:   /* replace vals */
3836:   while (*pw >= 0)
3837:     {*(in_vals + *pw++) = *dptr1++;}

3839:   /* clear isend message handles */
3840:   /* This changed for clarity though it could be the same */
3841:   while (*msg_nodes++)
3842:     /* Should I check the return value of MPI_Wait() or status? */
3843:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3844:     {MPI_Wait(ids_out++, &status);}
3845: #else
3846:   return;
3847: #endif
3848: }



3852: /******************************************************************************
3853: Function: gather_scatter

3855: Input : 
3856: Output: 
3857: Return: 
3858: Description: 
3859: ******************************************************************************/
3860: static
3861: void
3862: gs_gop_tree_times(gs_id *gs, REAL *vals)
3863: {
3864:   int size;
3865:   int *in, *out;
3866:   REAL *buf, *work;
3867:   /*int op[] = {GL_MULT,0};*/


3870: #ifdef DEBUG
3871:   error_msg_warning("start gs_gop_tree_times()");
3872: #endif
3873: 
3874:   in   = gs->tree_map_in;
3875:   out  = gs->tree_map_out;
3876:   buf  = gs->tree_buf;
3877:   work = gs->tree_work;
3878:   size = gs->tree_nel;

3880: #if defined  BLAS||CBLAS
3881:   *work = 1.0;
3882:   copy(size,work,0,buf,1);
3883: #else
3884:   rvec_one(buf,size);
3885: #endif

3887:   while (*in >= 0)
3888:     {*(buf + *out++) = *(vals + *in++);}

3890:   in   = gs->tree_map_in;
3891:   out  = gs->tree_map_out;
3892: #if defined(NXSRC) && defined(r8)
3893:   gdprod(buf,size,work);
3894:   while (*in >= 0)
3895:     {*(vals + *in++) = *(buf + *out++);}
3896: #elif defined MPISRC
3897:   MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_PROD,gs->gs_comm);
3898:   while (*in >= 0)
3899:     {*(vals + *in++) = *(work + *out++);}
3900: #else
3901:   grop(buf,work,size,op);
3902:   while (*in >= 0)
3903:     {*(vals + *in++) = *(buf + *out++);}
3904: #endif

3906: #ifdef DEBUG
3907:   error_msg_warning("end gs_gop_tree_times()");
3908: #endif
3909: }



3913: /******************************************************************************
3914: Function: gather_scatter


3917: Input : 
3918: Output: 
3919: Return: 
3920: Description: 
3921: ******************************************************************************/
3922: static void
3923: gs_gop_plus(register gs_id *gs, register REAL *vals)
3924: {
3925: #ifdef DEBUG
3926:   error_msg_warning("start gs_gop_plus()\n");
3927: #endif

3929:   /* local only operations!!! */
3930:   if (gs->num_local)
3931:     {gs_gop_local_plus(gs,vals);}

3933:   /* if intersection tree/pairwise and local isn't empty */
3934:   if (gs->num_local_gop)
3935:     {
3936:       gs_gop_local_in_plus(gs,vals);

3938:       /* pairwise will NOT do tree inside ... */
3939:       if (gs->num_pairs)
3940:         {gs_gop_pairwise_plus(gs,vals);}

3942:       /* tree */
3943:       if (gs->max_left_over)
3944:         {gs_gop_tree_plus(gs,vals);}
3945: 
3946:       gs_gop_local_out(gs,vals);
3947:     }
3948:   /* if intersection tree/pairwise and local is empty */
3949:   else
3950:     {
3951:       /* pairwise will NOT do tree inside */
3952:       if (gs->num_pairs)
3953:         {gs_gop_pairwise_plus(gs,vals);}
3954: 
3955:       /* tree */
3956:       if (gs->max_left_over)
3957:         {gs_gop_tree_plus(gs,vals);}
3958:     }

3960: #ifdef DEBUG
3961:   error_msg_warning("end gs_gop_plus()\n");
3962: #endif
3963: }



3967: /******************************************************************************
3968: Function: gather_scatter

3970: Input : 
3971: Output: 
3972: Return: 
3973: Description: 
3974: ******************************************************************************/
3975: static
3976: void
3977: gs_gop_local_plus(register gs_id *gs, register REAL *vals)
3978: {
3979:   register int *num, *map, **reduce;
3980:   register REAL tmp;


3983: #ifdef DEBUG
3984:   error_msg_warning("begin gs_gop_local_plus()\n");
3985: #endif

3987:   num    = gs->num_local_reduce;
3988:   reduce = gs->local_reduce;
3989:   while ((map = *reduce))
3990:     {
3991:       /* wall */
3992:       if (*num == 2)
3993:         {
3994:           num ++; reduce++;
3995:           vals[map[1]] = vals[map[0]] += vals[map[1]];
3996:         }
3997:       /* corner shared by three elements */
3998:       else if (*num == 3)
3999:         {
4000:           num ++; reduce++;
4001:           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
4002:         }
4003:       /* corner shared by four elements */
4004:       else if (*num == 4)
4005:         {
4006:           num ++; reduce++;
4007:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
4008:                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
4009:         }
4010:       /* general case ... odd geoms ... 3D*/
4011:       else
4012:         {
4013:           num ++;
4014:            tmp = 0.0;
4015:           while (*map >= 0)
4016:             {tmp += *(vals + *map++);}

4018:           map = *reduce++;
4019:           while (*map >= 0)
4020:             {*(vals + *map++) = tmp;}
4021:         }
4022:     }
4023: #ifdef DEBUG
4024:   error_msg_warning("end gs_gop_local_plus()\n");
4025: #endif
4026: }



4030: /******************************************************************************
4031: Function: gather_scatter

4033: Input : 
4034: Output: 
4035: Return: 
4036: Description: 
4037: ******************************************************************************/
4038: static
4039: void
4040: gs_gop_local_in_plus(register gs_id *gs, register REAL *vals)
4041: {
4042:   register int *num, *map, **reduce;
4043:   register REAL *base;


4046: #ifdef DEBUG
4047:   error_msg_warning("begin gs_gop_local_in_plus()\n");
4048: #endif

4050:   num    = gs->num_gop_local_reduce;
4051:   reduce = gs->gop_local_reduce;
4052:   while ((map = *reduce++))
4053:     {
4054:       /* wall */
4055:       if (*num == 2)
4056:         {
4057:           num ++;
4058:           vals[map[0]] += vals[map[1]];
4059:         }
4060:       /* corner shared by three elements */
4061:       else if (*num == 3)
4062:         {
4063:           num ++;
4064:           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
4065:         }
4066:       /* corner shared by four elements */
4067:       else if (*num == 4)
4068:         {
4069:           num ++;
4070:           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
4071:         }
4072:       /* general case ... odd geoms ... 3D*/
4073:       else
4074:         {
4075:           num++;
4076:           base = vals + *map++;
4077:           while (*map >= 0)
4078:             {*base += *(vals + *map++);}
4079:         }
4080:     }
4081: #ifdef DEBUG
4082:   error_msg_warning("end gs_gop_local_in_plus()\n");
4083: #endif
4084: }



4088: /******************************************************************************
4089: Function: gather_scatter

4091: VERSION 3 :: 

4093: Input : 
4094: Output: 
4095: Return: 
4096: Description: 
4097: ******************************************************************************/
4098: static
4099: void
4100: gs_gop_pairwise_plus(register gs_id *gs, register REAL *in_vals)
4101: {
4102: #if   defined NXSRC
4103:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
4104:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
4105:   register int *pw, *list, *size, **nodes;
4106:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;


4109: #ifdef DEBUG
4110:   error_msg_warning("gs_gop_pairwise_plus() start\n");
4111: #endif

4113:   /* strip and load registers */
4114:   msg_list =list         = gs->pair_list;
4115:   msg_size =size         = gs->msg_sizes;
4116:   msg_nodes=nodes        = gs->node_list;
4117:   iptr=pw                = gs->pw_elm_list;
4118:   dptr1=dptr3            = gs->pw_vals;
4119:   msg_ids_in  = ids_in   = gs->msg_ids_in;
4120:   msg_ids_out = ids_out  = gs->msg_ids_out;
4121:   dptr2                  = gs->out;
4122:   in1=in2                = gs->in;

4124:   /* post the receives */
4125:   do
4126:     {
4127:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
4128:       in1 += *size++;
4129:     }
4130:   while (*msg_ids_in >= 0);

4132:   /* load gs values into in out gs buffers */
4133:   while (*iptr >= 0)
4134:     {*dptr3++ = *(in_vals + *iptr++);}

4136:   /* load out buffers and post the sends */
4137:   while (iptr = *msg_nodes++)
4138:     {
4139:       dptr3 = dptr2;
4140:       while (*iptr >= 0)
4141:         {*dptr2++ = *(dptr1 + *iptr++);}
4142:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,*(msg_size++)*REAL_LEN,
4143:                              *msg_list++,0);
4144:     }

4146:   /* post the receives ... was here*/
4147: 
4148:   /* do the tree while we're waiting */
4149:   /*
4150:   if (gs->max_left_over)
4151:     {gs_gop_tree_plus(gs,in_vals);}
4152:   */

4154:   /* process the received data */
4155:   while (iptr = *nodes++)
4156:     {
4157:       msgwait(*ids_in++);
4158:       while (*iptr >= 0)
4159:         {*(dptr1 + *iptr++) += *in2++;}
4160:     }

4162:   /* replace vals */
4163:   while (*pw >= 0)
4164:     {*(in_vals + *pw++) = *dptr1++;}

4166:   /* clear isend message handles */
4167:   while (*ids_out >= 0)
4168:     {msgwait(*ids_out++);}

4170: #ifdef DEBUG
4171:   error_msg_warning("gs_gop_pairwise_plus() end\n");
4172: #endif

4174: #elif defined MPISRC
4175:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
4176:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
4177:   register int *pw, *list, *size, **nodes;
4178:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
4179:   MPI_Status status;


4182: #ifdef DEBUG
4183:   error_msg_warning("gs_gop_pairwise_plus() start\n");
4184: #endif

4186:   /* strip and load registers */
4187:   msg_list =list         = gs->pair_list;
4188:   msg_size =size         = gs->msg_sizes;
4189:   msg_nodes=nodes        = gs->node_list;
4190:   iptr=pw                = gs->pw_elm_list;
4191:   dptr1=dptr3            = gs->pw_vals;
4192:   msg_ids_in  = ids_in   = gs->msg_ids_in;
4193:   msg_ids_out = ids_out  = gs->msg_ids_out;
4194:   dptr2                  = gs->out;
4195:   in1=in2                = gs->in;

4197:   /* post the receives */
4198:   /*  msg_nodes=nodes; */
4199:   do
4200:     {
4201:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
4202:          second one *list and do list++ afterwards */
4203:       MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
4204:                 gs->gs_comm, msg_ids_in++);
4205:       in1 += *size++;
4206:     }
4207:   while (*++msg_nodes);
4208:   msg_nodes=nodes;

4210:   /* load gs values into in out gs buffers */
4211:   while (*iptr >= 0)
4212:     {*dptr3++ = *(in_vals + *iptr++);}

4214:   /* load out buffers and post the sends */
4215:   while ((iptr = *msg_nodes++))
4216:     {
4217:       dptr3 = dptr2;
4218:       while (*iptr >= 0)
4219:         {*dptr2++ = *(dptr1 + *iptr++);}
4220:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
4221:       /* is msg_ids_out++ correct? */
4222:       MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *msg_list++,
4223:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
4224:     }

4226:   /* do the tree while we're waiting */
4227:   if (gs->max_left_over)
4228:     {gs_gop_tree_plus(gs,in_vals);}

4230:   /* process the received data */
4231:   msg_nodes=nodes;
4232:   while ((iptr = *nodes++))
4233:     {
4234:       /* Should I check the return value of MPI_Wait() or status? */
4235:       /* Can this loop be replaced by a call to MPI_Waitall()? */
4236:       MPI_Wait(ids_in++, &status);
4237:       while (*iptr >= 0)
4238:         {*(dptr1 + *iptr++) += *in2++;}
4239:     }

4241:   /* replace vals */
4242:   while (*pw >= 0)
4243:     {*(in_vals + *pw++) = *dptr1++;}

4245:   /* clear isend message handles */
4246:   /* This changed for clarity though it could be the same */
4247:   while (*msg_nodes++)
4248:     /* Should I check the return value of MPI_Wait() or status? */
4249:     /* Can this loop be replaced by a call to MPI_Waitall()? */
4250:     {MPI_Wait(ids_out++, &status);}

4252: #ifdef DEBUG
4253:   error_msg_warning("gs_gop_pairwise_plus() end\n");
4254: #endif

4256: #else
4257:   return;
4258: #endif
4259: }



4263: /******************************************************************************
4264: Function: gather_scatter

4266: Input : 
4267: Output: 
4268: Return: 
4269: Description: 
4270: ******************************************************************************/
4271: static
4272: void
4273: gs_gop_tree_plus(gs_id *gs, REAL *vals)
4274: {
4275:   int size;
4276:   int *in, *out;
4277:   REAL *buf, *work;
4278:   /*int op[] = {GL_ADD,0}; */


4281: #ifdef DEBUG
4282:   error_msg_warning("start gs_gop_tree_plus()\n");
4283: #endif
4284: 
4285:   in   = gs->tree_map_in;
4286:   out  = gs->tree_map_out;
4287:   buf  = gs->tree_buf;
4288:   work = gs->tree_work;
4289:   size = gs->tree_nel;

4291: #if defined  BLAS||CBLAS
4292:   *work = 0.0;
4293:   copy(size,work,0,buf,1);
4294: #else
4295:   rvec_zero(buf,size);
4296: #endif

4298:   while (*in >= 0)
4299:     {*(buf + *out++) = *(vals + *in++);}

4301:   in   = gs->tree_map_in;
4302:   out  = gs->tree_map_out;
4303: #if defined(NXSRC) && defined(r8)
4304:   gdsum(buf,size,work);

4306:   /* grop(buf,work,size,op);  */
4307:   while (*in >= 0)
4308:     {*(vals + *in++) = *(buf + *out++);}
4309: #elif defined MPISRC
4310:   MPI_Allreduce(buf,work,size,REAL_TYPE,MPI_SUM,gs->gs_comm);
4311:   while (*in >= 0)
4312:     {*(vals + *in++) = *(work + *out++);}
4313: #else
4314:   grop(buf,work,size,op);
4315:   while (*in >= 0)
4316:     {*(vals + *in++) = *(buf + *out++);}
4317: #endif

4319: #ifdef DEBUG
4320:   error_msg_warning("end gs_gop_tree_plus()\n");
4321: #endif
4322: }



4326: /******************************************************************************
4327: Function: level_best_guess()

4329: Input : 
4330: Output: 
4331: Return: 
4332: Description: 
4333: ******************************************************************************/
4334: #if defined(not_used)
4335: static
4336: int 
4337: level_best_guess(void)
4338: {
4339:   /* full pairwise for now */
4340:   return(num_nodes);
4341: }
4342: #endif


4345: /******************************************************************************
4346: Function: gs_print_template()

4348: Input : 

4350: Output: 

4352: Return: 

4354: Description:  
4355: ******************************************************************************/
4356: #if defined(not_used)
4357: static
4358: void
4359: gs_print_template(register gs_id* gs, int who)
4360: {
4361:   register int j, k, *iptr, *iptr2;

4363: 
4364:   if ((my_id == who) && (num_gs_ids))
4365:     {
4366:       printf("\n\nP#%d's GS#%d template:\n", my_id, gs->id);
4367:       printf("id=%d\n",          gs->id);
4368:       printf("nel(unique)=%d\n", gs->nel);
4369:       printf("nel_max=%d\n",     gs->nel_max);
4370:       printf("nel_min=%d\n",     gs->nel_min);
4371:       printf("nel_sum=%d\n",     gs->nel_sum);
4372:       printf("negl=%d\n",        gs->negl);
4373:       printf("gl_max=%d\n",      gs->gl_max);
4374:       printf("gl_min=%d\n",      gs->gl_min);
4375:       printf("elms ordered=%d\n",gs->ordered);
4376:       printf("repeats=%d\n",     gs->repeats);
4377:       printf("positive=%d\n",    gs->positive);
4378:       printf("elms=%ld\n",        (PTRINT) gs->elms);
4379:       printf("elms(total)=%ld\n", (PTRINT) gs->local_elms);
4380:       printf("vals=%ld\n",        (PTRINT) gs->vals);
4381:       printf("gl_bss_min=%d\n",  gs->gl_bss_min);
4382:       printf("gl_perm_min=%d\n", gs->gl_perm_min);
4383:       printf("level=%d\n",       gs->level);
4384:       printf("proc_mask_sz=%d\n",gs->mask_sz);
4385:       printf("sh_proc_mask=%ld\n",(PTRINT) gs->nghs);
4386:       printf("ngh_buf_size=%d\n",gs->ngh_buf_sz);
4387:       printf("ngh_buf=%ld\n",     (PTRINT) gs->ngh_buf);
4388:       printf("num_nghs=%d\n",    gs->num_nghs);
4389:       printf("max_nghs=%d\n",    gs->max_nghs);

4391:       /* pairwise exchange information */
4392:       printf("\nPaiwise Info:\n");
4393:       printf("num_pairs=%d\n",   gs->num_pairs);
4394:       printf("max_pairs=%d\n",   gs->max_pairs);
4395:       printf("len_pw_list=%d\n", gs->len_pw_list);
4396:       printf("pair_list=%ld\n",   (PTRINT) gs->pair_list);
4397:       printf("msg_sizes=%ld\n",   (PTRINT) gs->msg_sizes);
4398:       printf("node_list=%ld\n",   (PTRINT) gs->node_list);
4399:       printf("pw_elm_list=%ld\n", (PTRINT) gs->pw_elm_list);

4401:       printf("pw_elm_list: ");
4402:       if ((iptr = gs->pw_elm_list))
4403:         {
4404:           for (j=0;j<gs->len_pw_list;j++)
4405:             {printf("%d ", *iptr); iptr++;}
4406:         }
4407:       printf("\n");

4409:       printf("processor_list: ");
4410:       if ((iptr = gs->pair_list))
4411:         {
4412:           for (j=0;j<gs->num_pairs;j++)
4413:             {printf("%d ", *iptr); iptr++;}
4414:         }
4415:       printf("\n");

4417:       printf("loc_node_pairs=%d\n",   gs->loc_node_pairs);
4418:       printf("max_node_pairs=%d\n",   gs->max_node_pairs);
4419:       printf("min_node_pairs=%d\n",   gs->min_node_pairs);
4420:       printf("avg_node_pairs=%d\n",   gs->avg_node_pairs);

4422:       printf("size_list: ");
4423:       if ((iptr = gs->msg_sizes))
4424:         {
4425:           for (j=0;j<gs->num_pairs;j++)
4426:             {printf("%d ", *iptr); iptr++;}
4427:         }
4428:       printf("\n");
4429:       if ((iptr = gs->pair_list))
4430:         {
4431:           for (j=0;j<gs->num_pairs;j++)
4432:             {
4433:               printf("node_list %d: ", *iptr);
4434:               if ((iptr2 = (gs->node_list)[j]))
4435:                 {
4436:                   for (k=0;k<(gs->msg_sizes)[j];k++)
4437:                     {printf("%d ", *iptr2); iptr2++;}
4438:                 }
4439:               iptr++;
4440:               printf("\n");
4441:             }
4442:         }
4443:       printf("\n");
4444: 
4445:       printf("elm_list(U): ");
4446:       if ((iptr = gs->elms))
4447:         {
4448:           for (j=0;j<gs->nel;j++)
4449:             {printf("%d ", *iptr); iptr++;}
4450:         }
4451:       printf("\n");
4452:       printf("\n");
4453: 
4454:       printf("elm_list(T): ");
4455:       if ((iptr = gs->local_elms))
4456:         {
4457:           for (j=0;j<gs->nel_total;j++)
4458:             {printf("%d ", *iptr); iptr++;}
4459:         }
4460:       printf("\n");
4461:       printf("\n");
4462: 
4463:       printf("map_list(T): ");
4464:       if ((iptr = gs->companion))
4465:         {
4466:           for (j=0;j<gs->nel;j++)
4467:             {printf("%d ", *iptr); iptr++;}
4468:         }
4469:       printf("\n");
4470:       printf("\n");
4471: 

4473:       /* local exchange information */
4474:       printf("\nLocal Info:\n");
4475:       printf("local_strength=%d\n",   gs->local_strength);
4476:       printf("num_local_total=%d\n",  gs->num_local_total);
4477:       printf("num_local=%d\n",        gs->num_local);
4478:       printf("num_local_gop=%d\n",    gs->num_local_gop);
4479:       printf("num_local_reduce=%ld\n", (PTRINT) gs->num_local_reduce);
4480:       printf("local_reduce=%ld\n",     (PTRINT) gs->local_reduce);
4481:       printf("num_gop_local_reduce=%ld\n", (PTRINT) gs->num_gop_local_reduce);
4482:       printf("gop_local_reduce=%ld\n",     (PTRINT) gs->gop_local_reduce);
4483:       printf("\n");

4485:       for (j=0;j<gs->num_local;j++)
4486:         {
4487:           printf("local reduce_list %d: ", j);
4488:           if ((iptr2 = (gs->local_reduce)[j]))
4489:             {
4490:               if ((gs->num_local_reduce)[j] <= 0)
4491:                 {printf("oops");}
4492: 
4493:               for (k=0;k<(gs->num_local_reduce)[j];k++)
4494:                 {printf("%d ", *iptr2); iptr2++;}
4495:             }
4496:           printf("\n");
4497:         }
4498: 
4499:       printf("\n");
4500:       printf("\n");
4501: 
4502:       for (j=0;j<gs->num_local_gop;j++)
4503:         {
4504:           printf("gop reduce_list %d: ", j);
4505:           iptr2 = (gs->gop_local_reduce)[j];
4506: 
4507:           if ((gs->num_gop_local_reduce)[j] <= 0)
4508:             {printf("oops");}
4509: 

4511:           for (k=0;k<(gs->num_gop_local_reduce)[j];k++)
4512:             {printf("%d ", *iptr2); iptr2++;}
4513:           printf("\n");
4514:         }
4515:       printf("\n");
4516:       printf("\n");

4518:       /* crystal router information */
4519:       printf("\n\n");
4520:       printf("Tree Info:\n");
4521:       printf("max_left_over=%d\n",   gs->max_left_over);
4522:       printf("num_in_list=%ld\n",    (PTRINT) gs->in_num);
4523:       printf("in_list=%ld\n",        (PTRINT) gs->in_list);
4524:       printf("num_out_list=%ld\n",   (PTRINT) gs->out_num);
4525:       printf("out_list=%ld\n",       (PTRINT) gs->out_list);

4527:       printf("\n\n");
4528:     }
4529:   fflush(stdout);
4530: }
4531: #endif



4535: /******************************************************************************
4536: Function: gs_free()

4538: Input : 

4540: Output: 

4542: Return: 

4544: Description:  
4545:   if (gs->sss) {perm_free((void*) gs->sss);}
4546: ******************************************************************************/
4547: void
4548: gs_free(register gs_id *gs)
4549: {
4550:   register int i;


4553: #ifdef DEBUG  
4554:   error_msg_warning("start gs_gop_xxx()\n");
4555:   if (!gs) {error_msg_warning("NULL ptr passed to gs_free()"); return;}
4556: #endif

4558:   if (gs->nghs) {perm_free((void*) gs->nghs);}
4559:   if (gs->pw_nghs) {perm_free((void*) gs->pw_nghs);}

4561:   /* tree */
4562:   if (gs->max_left_over)
4563:     {
4564:       if (gs->tree_elms) {bss_free((void*) gs->tree_elms);}
4565:       if (gs->tree_buf) {bss_free((void*) gs->tree_buf);}
4566:       if (gs->tree_work) {bss_free((void*) gs->tree_work);}
4567:       if (gs->tree_map_in) {bss_free((void*) gs->tree_map_in);}
4568:       if (gs->tree_map_out) {bss_free((void*) gs->tree_map_out);}
4569:     }

4571:   /* pairwise info */
4572:   if (gs->num_pairs)
4573:     {
4574:       /* should be NULL already */
4575:       if (gs->ngh_buf) {bss_free((void*) gs->ngh_buf);}
4576:       if (gs->elms) {bss_free((void*) gs->elms);}
4577:       if (gs->local_elms) {bss_free((void*) gs->local_elms);}
4578:       if (gs->companion) {bss_free((void*) gs->companion);}
4579: 
4580:       /* only set if pairwise */
4581:       if (gs->vals) {perm_free((void*) gs->vals);}
4582:       if (gs->in) {perm_free((void*) gs->in);}
4583:       if (gs->out) {perm_free((void*) gs->out);}
4584:       if (gs->msg_ids_in) {perm_free((void*) gs->msg_ids_in);}
4585:       if (gs->msg_ids_out) {perm_free((void*) gs->msg_ids_out);}
4586:       if (gs->pw_vals) {perm_free((void*) gs->pw_vals);}
4587:       if (gs->pw_elm_list) {perm_free((void*) gs->pw_elm_list);}
4588:       if (gs->node_list)
4589:         {
4590:           for (i=0;i<gs->num_pairs;i++)
4591:             {if (gs->node_list[i]) {perm_free((void*) gs->node_list[i]);}}
4592:           perm_free((void*) gs->node_list);
4593:         }
4594:       if (gs->msg_sizes) {perm_free((void*) gs->msg_sizes);}
4595:       if (gs->pair_list) {perm_free((void*) gs->pair_list);}
4596:     }

4598:   /* local info */
4599:   if (gs->num_local_total>=0)
4600:     {
4601:       for (i=0;i<gs->num_local_total+1;i++)
4602:         /*      for (i=0;i<gs->num_local_total;i++) */
4603:         {
4604:           if (gs->num_gop_local_reduce[i])
4605:             {perm_free((void*) gs->gop_local_reduce[i]);}
4606:         }
4607:     }

4609:   /* if intersection tree/pairwise and local isn't empty */
4610:   if (gs->gop_local_reduce) {perm_free((void*) gs->gop_local_reduce);}
4611:   if (gs->num_gop_local_reduce) {perm_free((void*) gs->num_gop_local_reduce);}

4613:   perm_free((void *) gs);
4614: }






4621: /******************************************************************************
4622: Function: gather_scatter

4624: Input : 
4625: Output: 
4626: Return: 
4627: Description: 
4628: ******************************************************************************/
4629: void
4630: gs_gop_vec(register gs_id *gs, register REAL *vals, register const char *op, register int step)
4631: {
4632: #ifdef DEBUG
4633:   error_msg_warning("gs_gop_vec() start");
4634:   if (!gs) {error_msg_fatal("gs_gop_vec() :: passed NULL gs handle!!!");}
4635:   if (!op) {error_msg_fatal("gs_gop_vec() :: passed NULL operation!!!");}

4637:   /* check top make sure that segments being requested aren't larger */
4638:   /* then what I reserved earlier ... fix is to allow user to reset  */
4639:   if (step>gs->vec_sz)
4640:     {error_msg_fatal("gs_gop_vec() :: %d > %d!\n",step,gs->vec_sz);}
4641: #endif

4643:   switch (*op) {
4644:   case '+':
4645:     gs_gop_vec_plus(gs,vals,step);
4646:     break;
4647: #ifdef NOT_YET    
4648:   case '*':
4649:     gs_gop_times(gs,vals);
4650:     break;
4651:   case 'a':
4652:     gs_gop_min_abs(gs,vals);
4653:     break;
4654:   case 'A':
4655:     gs_gop_max_abs(gs,vals);
4656:     break;
4657:   case 'e':
4658:     gs_gop_exists(gs,vals);
4659:     break;
4660:   case 'm':
4661:     gs_gop_min(gs,vals);
4662:     break;
4663:   case 'M':
4664:     gs_gop_max(gs,vals); break;
4665:     /*
4666:     if (*(op+1)=='\0')
4667:       {gs_gop_max(gs,vals); break;}
4668:     else if (*(op+1)=='X')
4669:       {gs_gop_max_abs(gs,vals); break;}
4670:     else if (*(op+1)=='N')
4671:       {gs_gop_min_abs(gs,vals); break;}
4672:     */
4673: #endif
4674:   default:
4675:     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
4676:     error_msg_warning("gs_gop_vec() :: default :: plus");
4677:     gs_gop_vec_plus(gs,vals,step);
4678:     break;
4679:   }
4680: #ifdef DEBUG
4681:   error_msg_warning("gs_gop_vec() end");
4682: #endif
4683: }



4687: /******************************************************************************
4688: Function: gather_scatter

4690: Input : 
4691: Output: 
4692: Return: 
4693: Description: 
4694: ******************************************************************************/
4695: static void
4696: gs_gop_vec_plus(register gs_id *gs, register REAL *vals, register int step)
4697: {
4698: #ifdef DEBUG
4699:   error_msg_warning("gs_gop_vec_plus() start");
4700: #endif

4702:   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}

4704:   /* local only operations!!! */
4705:   if (gs->num_local)
4706:     {gs_gop_vec_local_plus(gs,vals,step);}

4708:   /* if intersection tree/pairwise and local isn't empty */
4709:   if (gs->num_local_gop)
4710:     {
4711:       gs_gop_vec_local_in_plus(gs,vals,step);

4713:       /* pairwise */
4714:       if (gs->num_pairs)
4715:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

4717:       /* tree */
4718:       else if (gs->max_left_over)
4719:         {gs_gop_vec_tree_plus(gs,vals,step);}

4721:       gs_gop_vec_local_out(gs,vals,step);
4722:     }
4723:   /* if intersection tree/pairwise and local is empty */
4724:   else
4725:     {
4726:       /* pairwise */
4727:       if (gs->num_pairs)
4728:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

4730:       /* tree */
4731:       else if (gs->max_left_over)
4732:         {gs_gop_vec_tree_plus(gs,vals,step);}
4733:     }
4734: #ifdef DEBUG
4735:   error_msg_warning("gs_gop_vec_plus() end");
4736: #endif
4737: }



4741: /******************************************************************************
4742: Function: gather_scatter

4744: Input : 
4745: Output: 
4746: Return: 
4747: Description: 
4748: ******************************************************************************/
4749: static
4750: void
4751: gs_gop_vec_local_plus(register gs_id *gs, register REAL *vals, 
4752:                       register int step)
4753: {
4754:   register int *num, *map, **reduce;
4755:   register REAL *base;


4758: #ifdef DEBUG
4759:   error_msg_warning("gs_gop_vec_local_plus() start");
4760: #endif

4762:   num    = gs->num_local_reduce;
4763:   reduce = gs->local_reduce;
4764:   while ((map = *reduce))
4765:     {
4766:       base = vals + map[0] * step;

4768:       /* wall */
4769:       if (*num == 2)
4770:         {
4771:           num++; reduce++;
4772:           rvec_add (base,vals+map[1]*step,step);
4773:           rvec_copy(vals+map[1]*step,base,step);
4774:         }
4775:       /* corner shared by three elements */
4776:       else if (*num == 3)
4777:         {
4778:           num++; reduce++;
4779:           rvec_add (base,vals+map[1]*step,step);
4780:           rvec_add (base,vals+map[2]*step,step);
4781:           rvec_copy(vals+map[2]*step,base,step);
4782:           rvec_copy(vals+map[1]*step,base,step);
4783:         }
4784:       /* corner shared by four elements */
4785:       else if (*num == 4)
4786:         {
4787:           num++; reduce++;
4788:           rvec_add (base,vals+map[1]*step,step);
4789:           rvec_add (base,vals+map[2]*step,step);
4790:           rvec_add (base,vals+map[3]*step,step);
4791:           rvec_copy(vals+map[3]*step,base,step);
4792:           rvec_copy(vals+map[2]*step,base,step);
4793:           rvec_copy(vals+map[1]*step,base,step);
4794:         }
4795:       /* general case ... odd geoms ... 3D */
4796:       else
4797:         {
4798:           num++;
4799:           while (*++map >= 0)
4800:             {rvec_add (base,vals+*map*step,step);}
4801: 
4802:           map = *reduce;
4803:           while (*++map >= 0)
4804:             {rvec_copy(vals+*map*step,base,step);}
4805: 
4806:           reduce++;
4807:         }
4808:     }
4809: #ifdef DEBUG
4810:   error_msg_warning("gs_gop_vec_local_plus() end");
4811: #endif
4812: }



4816: /******************************************************************************
4817: Function: gather_scatter

4819: Input : 
4820: Output: 
4821: Return: 
4822: Description: 
4823: ******************************************************************************/
4824: static
4825: void
4826: gs_gop_vec_local_in_plus(register gs_id *gs, register REAL *vals, 
4827:                          register int step)
4828: {
4829:   register int  *num, *map, **reduce;
4830:   register REAL *base;


4833: #ifdef DEBUG
4834:   error_msg_warning("gs_gop_vec_locel_in_plus() start");
4835: #endif

4837:   num    = gs->num_gop_local_reduce;
4838:   reduce = gs->gop_local_reduce;
4839:   while ((map = *reduce++))
4840:     {
4841:       base = vals + map[0] * step;

4843:       /* wall */
4844:       if (*num == 2)
4845:         {
4846:           num ++;
4847:           rvec_add(base,vals+map[1]*step,step);
4848:         }
4849:       /* corner shared by three elements */
4850:       else if (*num == 3)
4851:         {
4852:           num ++;
4853:           rvec_add(base,vals+map[1]*step,step);
4854:           rvec_add(base,vals+map[2]*step,step);
4855:         }
4856:       /* corner shared by four elements */
4857:       else if (*num == 4)
4858:         {
4859:           num ++;
4860:           rvec_add(base,vals+map[1]*step,step);
4861:           rvec_add(base,vals+map[2]*step,step);
4862:           rvec_add(base,vals+map[3]*step,step);
4863:         }
4864:       /* general case ... odd geoms ... 3D*/
4865:       else
4866:         {
4867:           num++;
4868:           while (*++map >= 0)
4869:             {rvec_add(base,vals+*map*step,step);}
4870:         }
4871:     }
4872: #ifdef DEBUG
4873:   error_msg_warning("gs_gop_vec_local_in_plus() end");
4874: #endif
4875: }


4878: /******************************************************************************
4879: Function: gather_scatter

4881: Input : 
4882: Output: 
4883: Return: 
4884: Description: 
4885: ******************************************************************************/
4886: static
4887: void
4888: gs_gop_vec_local_out(register gs_id *gs, register REAL *vals, 
4889:                      register int step)
4890: {
4891:   register int *num, *map, **reduce;
4892:   register REAL *base;


4895: #ifdef DEBUG
4896:   error_msg_warning("gs_gop_vec_local_out() start");
4897: #endif

4899:   num    = gs->num_gop_local_reduce;
4900:   reduce = gs->gop_local_reduce;
4901:   while ((map = *reduce++))
4902:     {
4903:       base = vals + map[0] * step;

4905:       /* wall */
4906:       if (*num == 2)
4907:         {
4908:           num ++;
4909:           rvec_copy(vals+map[1]*step,base,step);
4910:         }
4911:       /* corner shared by three elements */
4912:       else if (*num == 3)
4913:         {
4914:           num ++;
4915:           rvec_copy(vals+map[1]*step,base,step);
4916:           rvec_copy(vals+map[2]*step,base,step);
4917:         }
4918:       /* corner shared by four elements */
4919:       else if (*num == 4)
4920:         {
4921:           num ++;
4922:           rvec_copy(vals+map[1]*step,base,step);
4923:           rvec_copy(vals+map[2]*step,base,step);
4924:           rvec_copy(vals+map[3]*step,base,step);
4925:         }
4926:       /* general case ... odd geoms ... 3D*/
4927:       else
4928:         {
4929:           num++;
4930:           while (*++map >= 0)
4931:             {rvec_copy(vals+*map*step,base,step);}
4932:         }
4933:     }
4934: #ifdef DEBUG
4935:   error_msg_warning("gs_gop_vec_local_out() end");
4936: #endif
4937: }



4941: /******************************************************************************
4942: Function: gather_scatter

4944: VERSION 3 :: 

4946: Input : 
4947: Output: 
4948: Return: 
4949: Description: 
4950: ******************************************************************************/
4951: static
4952: void
4953: gs_gop_vec_pairwise_plus(register gs_id *gs, register REAL *in_vals,
4954:                          register int step)
4955: {
4956: #if   defined NXSRC
4957:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
4958:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
4959:   register int *pw, *list, *size, **nodes;
4960:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
4961:   register int i;

4963: #ifdef DEBUG
4964:   error_msg_warning("gs_gop_vec_pairwise_plus() start");
4965: #endif

4967:   /* strip and load registers */
4968:   msg_list =list         = gs->pair_list;
4969:   msg_size =size         = gs->msg_sizes;
4970:   msg_nodes=nodes        = gs->node_list;
4971:   iptr=pw                = gs->pw_elm_list;
4972:   dptr1=dptr3            = gs->pw_vals;
4973:   msg_ids_in  = ids_in   = gs->msg_ids_in;
4974:   msg_ids_out = ids_out  = gs->msg_ids_out;
4975:   dptr2                  = gs->out;
4976:   in1=in2                = gs->in;

4978:   /* post the receives */
4979:   do
4980:     {
4981:       *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN*step);
4982:       in1 += *size++ * step;
4983:     }
4984:   while (*msg_ids_in >= 0);

4986:   /* load gs values into in out gs buffers */
4987:   while (*iptr >= 0)
4988:     {
4989:       rvec_copy(dptr3,in_vals + *iptr*step,step);
4990:       dptr3+=step;
4991:       iptr++;
4992:     }

4994:   /* load out buffers and post the sends */
4995:   while (iptr = *msg_nodes++)
4996:     {
4997:       dptr3 = dptr2;
4998:       while (*iptr >= 0)
4999:         {
5000:           rvec_copy(dptr2,dptr1 + *iptr*step,step);
5001:           dptr2+=step;
5002:           iptr++;
5003:         }
5004:       *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,
5005:                                    *(msg_size++)*REAL_LEN*step,*msg_list++,0);
5006:     }

5008:   /* post the receives ... was here*/
5009:   /* tree */
5010:   if (gs->max_left_over)
5011:     {gs_gop_vec_tree_plus(gs,in_vals,step);}

5013:   /* process the received data */
5014:   while (iptr = *nodes++)
5015:     {
5016:       msgwait(*ids_in++);
5017:       while (*iptr >= 0)
5018:         {
5019: #if defined BLAS||CBLAS
5020:           axpy(step,1.0,in2,1,dptr1 + *iptr*step,1);
5021: #else
5022:           rvec_add(dptr1 + *iptr*step,in2,step);
5023: #endif
5024:           in2+=step;
5025:           iptr++;
5026:         }
5027:     }

5029:   /* replace vals */
5030:   while (*pw >= 0)
5031:     {
5032:       rvec_copy(in_vals + *pw*step,dptr1,step);
5033:       dptr1+=step;
5034:       pw++;
5035:     }

5037:   /* clear isend message handles */
5038:   while (*ids_out >= 0)
5039:     {msgwait(*ids_out++);}

5041: #ifdef DEBUG
5042:   error_msg_warning("gs_gop_vec_pairwise_plus() end");
5043: #endif

5045: #elif defined MPISRC
5046:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
5047:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
5048:   register int *pw, *list, *size, **nodes;
5049:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
5050:   MPI_Status status;


5053: #ifdef DEBUG
5054:   error_msg_warning("gs_gop_vec_pairwise_plus() start");
5055: #endif

5057:   /* strip and load registers */
5058:   msg_list =list         = gs->pair_list;
5059:   msg_size =size         = gs->msg_sizes;
5060:   msg_nodes=nodes        = gs->node_list;
5061:   iptr=pw                = gs->pw_elm_list;
5062:   dptr1=dptr3            = gs->pw_vals;
5063:   msg_ids_in  = ids_in   = gs->msg_ids_in;
5064:   msg_ids_out = ids_out  = gs->msg_ids_out;
5065:   dptr2                  = gs->out;
5066:   in1=in2                = gs->in;

5068:   /* post the receives */
5069:   /*  msg_nodes=nodes; */
5070:   do
5071:     {
5072:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
5073:          second one *list and do list++ afterwards */
5074:       MPI_Irecv(in1, *size *step, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
5075:                 gs->gs_comm, msg_ids_in++);
5076:       in1 += *size++ *step;
5077:     }
5078:   while (*++msg_nodes);
5079:   msg_nodes=nodes;

5081:   /* load gs values into in out gs buffers */
5082:   while (*iptr >= 0)
5083:     {
5084:       rvec_copy(dptr3,in_vals + *iptr*step,step);
5085:       dptr3+=step;
5086:       iptr++;
5087:     }

5089:   /* load out buffers and post the sends */
5090:   while ((iptr = *msg_nodes++))
5091:     {
5092:       dptr3 = dptr2;
5093:       while (*iptr >= 0)
5094:         {
5095:           rvec_copy(dptr2,dptr1 + *iptr*step,step);
5096:           dptr2+=step;
5097:           iptr++;
5098:         }
5099:       MPI_Isend(dptr3, *msg_size++ *step, REAL_TYPE, *msg_list++,
5100:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
5101:     }

5103:   /* tree */
5104:   if (gs->max_left_over)
5105:     {gs_gop_vec_tree_plus(gs,in_vals,step);}

5107:   /* process the received data */
5108:   msg_nodes=nodes;
5109:   while ((iptr = *nodes++))
5110:     {
5111:       /* Should I check the return value of MPI_Wait() or status? */
5112:       /* Can this loop be replaced by a call to MPI_Waitall()? */
5113:       MPI_Wait(ids_in++, &status);
5114:       while (*iptr >= 0)
5115:         {
5116: #if defined BLAS||CBLAS
5117:           axpy(step,1.0,in2,1,dptr1 + *iptr*step,1);
5118: #else
5119:           rvec_add(dptr1 + *iptr*step,in2,step);
5120: #endif
5121:           in2+=step;
5122:           iptr++;
5123:         }
5124:     }

5126:   /* replace vals */
5127:   while (*pw >= 0)
5128:     {
5129:       rvec_copy(in_vals + *pw*step,dptr1,step);
5130:       dptr1+=step;
5131:       pw++;
5132:     }

5134:   /* clear isend message handles */
5135:   /* This changed for clarity though it could be the same */
5136:   while (*msg_nodes++)
5137:     /* Should I check the return value of MPI_Wait() or status? */
5138:     /* Can this loop be replaced by a call to MPI_Waitall()? */
5139:     {MPI_Wait(ids_out++, &status);}

5141: #ifdef DEBUG
5142:   error_msg_warning("gs_gop_vec_pairwise_plus() end");
5143: #endif

5145: #else
5146:   return;
5147: #endif
5148: }



5152: /******************************************************************************
5153: Function: gather_scatter

5155: Input : 
5156: Output: 
5157: Return: 
5158: Description: 
5159: ******************************************************************************/
5160: static
5161: void
5162: gs_gop_vec_tree_plus(register gs_id *gs, register REAL *vals, register int step)
5163: {
5164:   register int size, *in, *out;
5165:   register REAL *buf, *work;
5166:   int op[] = {GL_ADD,0};

5168: #ifdef DEBUG
5169:   error_msg_warning("start gs_gop_vec_tree_plus()");
5170: #endif

5172:   /* copy over to local variables */
5173:   in   = gs->tree_map_in;
5174:   out  = gs->tree_map_out;
5175:   buf  = gs->tree_buf;
5176:   work = gs->tree_work;
5177:   size = gs->tree_nel*step;

5179:   /* zero out collection buffer */
5180: #if defined  BLAS||CBLAS
5181:   *work = 0.0;
5182:   copy(size,work,0,buf,1);
5183: #else
5184:   rvec_zero(buf,size);
5185: #endif


5188:   /* copy over my contributions */
5189:   while (*in >= 0)
5190:     {
5191: #if defined  BLAS||CBLAS
5192:       copy(step,vals + *in++*step,1,buf + *out++*step,1);
5193: #else
5194:       rvec_copy(buf + *out++*step,vals + *in++*step,step);
5195: #endif
5196:     }

5198:   /* perform fan in/out on full buffer */
5199:   /* must change grop to handle the blas */
5200:   grop(buf,work,size,op);

5202:   /* reset */
5203:   in   = gs->tree_map_in;
5204:   out  = gs->tree_map_out;

5206:   /* get the portion of the results I need */
5207:   while (*in >= 0)
5208:     {
5209: #if defined  BLAS||CBLAS
5210:       copy(step,buf + *out++*step,1,vals + *in++*step,1);
5211: #else
5212:       rvec_copy(vals + *in++*step,buf + *out++*step,step);
5213: #endif
5214:     }

5216: #ifdef DEBUG
5217:   error_msg_warning("start gs_gop_vec_tree_plus()");
5218: #endif
5219: }



5223: /******************************************************************************
5224: Function: gather_scatter

5226: Input : 
5227: Output: 
5228: Return: 
5229: Description: 
5230: ******************************************************************************/
5231: void
5232: gs_gop_hc(register gs_id *gs, register REAL *vals, register const char *op, register int dim)
5233: {
5234: #ifdef DEBUG
5235:   error_msg_warning("gs_gop_hc() start\n");
5236:   if (!gs) {error_msg_fatal("gs_gop_vec() :: passed NULL gs handle!!!\n");}
5237:   if (!op) {error_msg_fatal("gs_gop_vec() :: passed NULL operation!!!\n");}
5238: #endif

5240:   switch (*op) {
5241:   case '+':
5242:     gs_gop_plus_hc(gs,vals,dim);
5243:     break;
5244: #ifdef NOT_YET    
5245:   case '*':
5246:     gs_gop_times(gs,vals);
5247:     break;
5248:   case 'a':
5249:     gs_gop_min_abs(gs,vals);
5250:     break;
5251:   case 'A':
5252:     gs_gop_max_abs(gs,vals);
5253:     break;
5254:   case 'e':
5255:     gs_gop_exists(gs,vals);
5256:     break;
5257:   case 'm':
5258:     gs_gop_min(gs,vals);
5259:     break;
5260:   case 'M':
5261:     gs_gop_max(gs,vals); break;
5262:     /*
5263:     if (*(op+1)=='\0')
5264:       {gs_gop_max(gs,vals); break;}
5265:     else if (*(op+1)=='X')
5266:       {gs_gop_max_abs(gs,vals); break;}
5267:     else if (*(op+1)=='N')
5268:       {gs_gop_min_abs(gs,vals); break;}
5269:     */
5270: #endif
5271:   default:
5272:     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
5273:     error_msg_warning("gs_gop_hc() :: default :: plus\n");
5274:     gs_gop_plus_hc(gs,vals,dim);
5275:     break;
5276:   }
5277: #ifdef DEBUG
5278:   error_msg_warning("gs_gop_hc() end\n");
5279: #endif
5280: }



5284: /******************************************************************************
5285: Function: gather_scatter

5287: Input : 
5288: Output: 
5289: Return: 
5290: Description: 
5291: ******************************************************************************/
5292: static void
5293: gs_gop_plus_hc(register gs_id *gs, register REAL *vals, int dim)
5294: {
5295: #ifdef DEBUG
5296:   error_msg_warning("start gs_gop_hc()\n");
5297:   if (!gs) {error_msg_fatal("gs_gop_hc() passed NULL gs handle!!!\n");}
5298: #endif

5300:   /* if there's nothing to do return */
5301:   if (dim<=0)
5302:     {return;}

5304:   /* can't do more dimensions then exist */
5305:   dim = MIN(dim,i_log2_num_nodes);

5307:   /* local only operations!!! */
5308:   if (gs->num_local)
5309:     {gs_gop_local_plus(gs,vals);}

5311:   /* if intersection tree/pairwise and local isn't empty */
5312:   if (gs->num_local_gop)
5313:     {
5314:       gs_gop_local_in_plus(gs,vals);

5316:       /* pairwise will do tree inside ... */
5317:       if (gs->num_pairs)
5318:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}

5320:       /* tree only */
5321:       else if (gs->max_left_over)
5322:         {gs_gop_tree_plus_hc(gs,vals,dim);}
5323: 
5324:       gs_gop_local_out(gs,vals);
5325:     }
5326:   /* if intersection tree/pairwise and local is empty */
5327:   else
5328:     {
5329:       /* pairwise will do tree inside */
5330:       if (gs->num_pairs)
5331:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
5332: 
5333:       /* tree */
5334:       else if (gs->max_left_over)
5335:         {gs_gop_tree_plus_hc(gs,vals,dim);}
5336:     }

5338: #ifdef DEBUG
5339:   error_msg_warning("end gs_gop_hc()\n");
5340: #endif
5341: }


5344: /******************************************************************************
5345: VERSION 3 :: 

5347: Input : 
5348: Output: 
5349: Return: 
5350: Description: 
5351: ******************************************************************************/
5352: static
5353: void
5354: gs_gop_pairwise_plus_hc(register gs_id *gs, register REAL *in_vals, int dim)
5355: {
5356: #if   defined NXSRC
5357:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
5358:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
5359:   register int *pw, *list, *size, **nodes;
5360:   register int *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
5361:   int i, mask=1;

5363:   for (i=1; i<dim; i++)
5364:     {mask<<=1; mask++;}


5367: #ifdef DEBUG
5368:   error_msg_warning("gs_gop_pairwise_hc() start\n");
5369: #endif

5371:   /* strip and load registers */
5372:   msg_list =list         = gs->pair_list;
5373:   msg_size =size         = gs->msg_sizes;
5374:   msg_nodes=nodes        = gs->node_list;
5375:   iptr=pw                = gs->pw_elm_list;
5376:   dptr1=dptr3            = gs->pw_vals;
5377:   msg_ids_in  = ids_in   = gs->msg_ids_in;
5378:   msg_ids_out = ids_out  = gs->msg_ids_out;
5379:   dptr2                  = gs->out;
5380:   in1=in2                = gs->in;

5382:   /* post the receives */
5383:   do
5384:     {
5385:       if ((my_id|mask)==(*list|mask))
5386:         {
5387:           *msg_ids_in++ = (int) irecv(MSGTAG1 + *list++,(char *)in1,*size*REAL_LEN);
5388:           in1 += *size++;
5389:         }
5390:       else
5391:         {list++; size++;}
5392:     }
5393:   while (*++msg_nodes);

5395:   /* load gs values into in out gs buffers */
5396:   while (*iptr >= 0)
5397:     {*dptr3++ = *(in_vals + *iptr++);}

5399:   /* load out buffers and post the sends */
5400:   list = msg_list;
5401:   msg_nodes=nodes;
5402:   while (iptr = *msg_nodes++)
5403:     {
5404:       if ((my_id|mask)==(*list|mask))
5405:         {
5406:           dptr3 = dptr2;
5407:           while (*iptr >= 0)
5408:             {*dptr2++ = *(dptr1 + *iptr++);}
5409:           *msg_ids_out++ = (int) isend(MSGTAG1+my_id,(char *)dptr3,
5410:                                        *(msg_size++)*REAL_LEN,*list++,0);
5411:         }
5412:       else
5413:         {msg_size++; list++;}
5414:     }
5415:   /* post the receives ... was here*/
5416: 
5417:   /* do the tree while we're waiting */
5418:   if (gs->max_left_over)
5419:     {gs_gop_tree_plus_hc(gs,in_vals,dim);}

5421:   /* process the received data */
5422:   list = msg_list;
5423:   msg_nodes=nodes;
5424:   while (iptr = *msg_nodes++)
5425:     {
5426:       if ((my_id|mask)==(*list|mask))
5427:         {
5428:           msgwait(*ids_in++);
5429:           while (*iptr >= 0)
5430:             {*(dptr1 + *iptr++) += *in2++;}
5431:         }
5432:       list++;
5433:     }

5435:   /* replace vals */
5436:   while (*pw >= 0)
5437:     {*(in_vals + *pw++) = *dptr1++;}

5439:   /* clear isend message handles */
5440:   while (iptr = *nodes++)
5441:     {
5442:       if ((my_id|mask)==(*msg_list|mask))
5443:         {msgwait(*ids_out++);}
5444:       msg_list++;
5445:     }

5447: #ifdef DEBUG
5448:   error_msg_warning("gs_gop_pairwise_hc() end\n");
5449: #endif

5451: #elif defined MPISRC
5452:   register REAL *dptr1, *dptr2, *dptr3, *in1, *in2;
5453:   register int *iptr, *msg_list, *msg_size, **msg_nodes;
5454:   register int *pw, *list, *size, **nodes;
5455:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
5456:   MPI_Status status;
5457:   int i, mask=1;

5459:   for (i=1; i<dim; i++)
5460:     {mask<<=1; mask++;}


5463: #ifdef DEBUG
5464:   error_msg_warning("gs_gop_pairwise_hc() start\n");
5465: #endif

5467:   /* strip and load registers */
5468:   msg_list =list         = gs->pair_list;
5469:   msg_size =size         = gs->msg_sizes;
5470:   msg_nodes=nodes        = gs->node_list;
5471:   iptr=pw                = gs->pw_elm_list;
5472:   dptr1=dptr3            = gs->pw_vals;
5473:   msg_ids_in  = ids_in   = gs->msg_ids_in;
5474:   msg_ids_out = ids_out  = gs->msg_ids_out;
5475:   dptr2                  = gs->out;
5476:   in1=in2                = gs->in;

5478:   /* post the receives */
5479:   /*  msg_nodes=nodes; */
5480:   do
5481:     {
5482:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
5483:          second one *list and do list++ afterwards */
5484:       if ((my_id|mask)==(*list|mask))
5485:         {
5486:           MPI_Irecv(in1, *size, REAL_TYPE, MPI_ANY_SOURCE, MSGTAG1 + *list++,
5487:                     gs->gs_comm, msg_ids_in++);
5488:           in1 += *size++;
5489:         }
5490:       else
5491:         {list++; size++;}
5492:     }
5493:   while (*++msg_nodes);

5495:   /* load gs values into in out gs buffers */
5496:   while (*iptr >= 0)
5497:     {*dptr3++ = *(in_vals + *iptr++);}

5499:   /* load out buffers and post the sends */
5500:   msg_nodes=nodes;
5501:   list = msg_list;
5502:   while ((iptr = *msg_nodes++))
5503:     {
5504:       if ((my_id|mask)==(*list|mask))
5505:         {
5506:           dptr3 = dptr2;
5507:           while (*iptr >= 0)
5508:             {*dptr2++ = *(dptr1 + *iptr++);}
5509:           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
5510:           /* is msg_ids_out++ correct? */
5511:           MPI_Isend(dptr3, *msg_size++, REAL_TYPE, *list++,
5512:                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
5513:         }
5514:       else
5515:         {list++; msg_size++;}
5516:     }

5518:   /* do the tree while we're waiting */
5519:   if (gs->max_left_over)
5520:     {gs_gop_tree_plus_hc(gs,in_vals,dim);}

5522:   /* process the received data */
5523:   msg_nodes=nodes;
5524:   list = msg_list;
5525:   while ((iptr = *nodes++))
5526:     {
5527:       if ((my_id|mask)==(*list|mask))
5528:         {
5529:           /* Should I check the return value of MPI_Wait() or status? */
5530:           /* Can this loop be replaced by a call to MPI_Waitall()? */
5531:           MPI_Wait(ids_in++, &status);
5532:           while (*iptr >= 0)
5533:             {*(dptr1 + *iptr++) += *in2++;}
5534:         }
5535:       list++;
5536:     }

5538:   /* replace vals */
5539:   while (*pw >= 0)
5540:     {*(in_vals + *pw++) = *dptr1++;}

5542:   /* clear isend message handles */
5543:   /* This changed for clarity though it could be the same */
5544:   while (*msg_nodes++)
5545:     {
5546:       if ((my_id|mask)==(*msg_list|mask))
5547:         {
5548:           /* Should I check the return value of MPI_Wait() or status? */
5549:           /* Can this loop be replaced by a call to MPI_Waitall()? */
5550:           MPI_Wait(ids_out++, &status);
5551:         }
5552:       msg_list++;
5553:     }

5555: #ifdef DEBUG
5556:   error_msg_warning("gs_gop_pairwise_hc() end\n");
5557: #endif

5559: #else
5560:   return;
5561: #endif
5562: }



5566: /******************************************************************************
5567: Function: gather_scatter

5569: Input : 
5570: Output: 
5571: Return: 
5572: Description: 
5573: ******************************************************************************/
5574: static
5575: void
5576: gs_gop_tree_plus_hc(gs_id *gs, REAL *vals, int dim)
5577: {
5578:   int size;
5579:   int *in, *out;
5580:   REAL *buf, *work;
5581:   int op[] = {GL_ADD,0};

5583: #ifdef DEBUG
5584:   error_msg_warning("start gs_gop_tree_plus_hc()\n");
5585: #endif
5586: 
5587:   in   = gs->tree_map_in;
5588:   out  = gs->tree_map_out;
5589:   buf  = gs->tree_buf;
5590:   work = gs->tree_work;
5591:   size = gs->tree_nel;

5593: #if defined  BLAS||CBLAS
5594:   *work = 0.0;
5595:   copy(size,work,0,buf,1);
5596: #else
5597:   rvec_zero(buf,size);
5598: #endif

5600:   while (*in >= 0)
5601:     {*(buf + *out++) = *(vals + *in++);}

5603:   in   = gs->tree_map_in;
5604:   out  = gs->tree_map_out;

5606:   grop_hc(buf,work,size,op,dim);

5608:   while (*in >= 0)
5609:     {*(vals + *in++) = *(buf + *out++);}

5611: #ifdef DEBUG
5612:   error_msg_warning("end gs_gop_tree_plus_hc()\n");
5613: #endif
5614: }