Actual source code: xyt.c

  1: /*$Id: xyt.c,v 1.6 2001/04/12 21:03:28 balay Exp $*/
  2: /*************************************xyt.c************************************
  3: Module Name: xyt
  4: Module Info:

  6: author:  Henry M. Tufo III
  7: e-mail:  hmt@asci.uchicago.edu
  8: contact:
  9: +--------------------------------+--------------------------------+
 10: |MCS Division - Building 221     |Department of Computer Science  |
 11: |Argonne National Laboratory     |Ryerson 152                     |
 12: |9700 S. Cass Avenue             |The University of Chicago       |
 13: |Argonne, IL  60439              |Chicago, IL  60637              |
 14: |(630) 252-5354/5986 ph/fx       |(773) 702-6019/8487 ph/fx       |
 15: +--------------------------------+--------------------------------+

 17: Last Modification: 3.20.01
 18: **************************************xyt.c***********************************/


 21: /*************************************xyt.c************************************
 22: NOTES ON USAGE: 

 24: **************************************xyt.c***********************************/
 25: #include <stdio.h>
 26: #include <stdlib.h>
 27: #include <limits.h>
 28: #include <float.h>
 29: #include <math.h>
 30: #if   defined NXSRC
 31: #include <nx.h>
 32: #elif defined MPISRC
 33: #include <mpi.h>
 34: #endif

 36:  #include const.h
 37:  #include types.h
 38:  #include comm.h
 39:  #include error.h
 40:  #include ivec.h
 41: #include "bss_malloc.h"
 42:  #include queue.h
 43:  #include gs.h
 44: #ifdef MLSRC
 45: #include "ml_include.h"
 46: #endif
 47:  #include blas.h
 48:  #include xyt.h

 50: #define LEFT  -1
 51: #define RIGHT  1
 52: #define BOTH   0
 53: #define MAX_FORTRAN_HANDLES  10

 55: typedef struct xyt_solver_info {
 56:   int n, m, n_global, m_global;
 57:   int nnz, max_nnz, msg_buf_sz;
 58:   int *nsep, *lnsep, *fo, nfo, *stages;
 59:   int *xcol_sz, *xcol_indices;
 60:   REAL **xcol_vals, *x, *solve_uu, *solve_w;
 61:   int *ycol_sz, *ycol_indices;
 62:   REAL **ycol_vals, *y;
 63:   int nsolves;
 64:   REAL tot_solve_time;
 65: } xyt_info;

 67: 
 68: typedef struct matvec_info {
 69:   int n, m, n_global, m_global;
 70:   int *local2global;
 71:   gs_ADT gs_handle;
 72:   int (*matvec)(struct matvec_info*,REAL*,REAL*);
 73:   void *grid_data;
 74: } mv_info;

 76: struct xyt_CDT{
 77:   int id;
 78:   int ns;
 79:   int level;
 80:   xyt_info *info;
 81:   mv_info  *mvi;
 82: };

 84: static int n_xyt=0;
 85: static int n_xyt_handles=0;

 87: /* prototypes */
 88: static void do_xyt_solve(xyt_ADT xyt_handle, REAL *rhs);
 89: static void check_init(void);
 90: static void check_handle(xyt_ADT xyt_handle);
 91: static void det_separators(xyt_ADT xyt_handle);
 92: static void do_matvec(mv_info *A, REAL *v, REAL *u);
 93: static int xyt_generate(xyt_ADT xyt_handle);
 94: static int do_xyt_factor(xyt_ADT xyt_handle);
 95: static mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data);
 96: #ifdef MLSRC
 97: void ML_XYT_solve(xyt_ADT xyt_handle, int lx, double *x, int lb, double *b);
 98: int  ML_XYT_factor(xyt_ADT xyt_handle, int *local2global, int n, int m,
 99:                    void *matvec, void *grid_data, int grid_tag, ML *my_ml);
100: #endif


103: /*************************************xyt.c************************************
104: Function: XYT_new()

106: Input :
107: Output:
108: Return:
109: Description:
110: **************************************xyt.c***********************************/
111: xyt_ADT 
112: XYT_new(void)
113: {
114:   xyt_ADT xyt_handle;


117: #ifdef DEBUG
118:   error_msg_warning("XYT_new() :: start %d\n",n_xyt_handles);
119: #endif

121:   /* rolling count on n_xyt ... pot. problem here */
122:   n_xyt_handles++;
123:   xyt_handle       = (xyt_ADT)bss_malloc(sizeof(struct xyt_CDT));
124:   xyt_handle->id   = ++n_xyt;
125:   xyt_handle->info = NULL;
126:   xyt_handle->mvi  = NULL;

128: #ifdef DEBUG
129:   error_msg_warning("XYT_new() :: end   %d\n",n_xyt_handles);
130: #endif

132:   return(xyt_handle);
133: }


136: /*************************************xyt.c************************************
137: Function: XYT_factor()

139: Input :
140: Output:
141: Return:
142: Description:
143: **************************************xyt.c***********************************/
144: int
145: XYT_factor(xyt_ADT xyt_handle, /* prev. allocated xyt  handle */
146:            int *local2global,  /* global column mapping       */
147:            int n,              /* local num rows              */
148:            int m,              /* local num cols              */
149:            void *matvec,       /* b_loc=A_local.x_loc         */
150:            void *grid_data     /* grid data for matvec        */
151:            )
152: {
153: #ifdef DEBUG
154:   int flag;


157:   error_msg_warning("XYT_factor() :: start %d\n",n_xyt_handles);
158: #endif

160:   check_init();
161:   check_handle(xyt_handle);

163:   /* only 2^k for now and all nodes participating */
164:   if ((1<<(xyt_handle->level=i_log2_num_nodes))!=num_nodes)
165:     {error_msg_fatal("only 2^k for now and MPI_COMM_WORLD!!! %d != %d\n",1<<i_log2_num_nodes,num_nodes);}

167:   /* space for X info */
168:   xyt_handle->info = (xyt_info*)bss_malloc(sizeof(xyt_info));

170:   /* set up matvec handles */
171:   xyt_handle->mvi  = set_mvi(local2global, n, m, matvec, grid_data);

173:   /* matrix is assumed to be of full rank */
174:   /* LATER we can reset to indicate rank def. */
175:   xyt_handle->ns=0;

177:   /* determine separators and generate firing order - NB xyt info set here */
178:   det_separators(xyt_handle);

180: #ifdef DEBUG
181:   flag = do_xyt_factor(xyt_handle);
182:   error_msg_warning("XYT_factor() :: end   %d (flag=%d)\n",n_xyt_handles,flag);
183:   return(flag);
184: #else
185:   return(do_xyt_factor(xyt_handle));
186: #endif
187: }


190: /*************************************xyt.c************************************
191: Function: XYT_solve

193: Input :
194: Output:
195: Return:
196: Description:
197: **************************************xyt.c***********************************/
198: int
199: XYT_solve(xyt_ADT xyt_handle, double *x, double *b)
200: {
201: #if defined( NXSRC) && defined(TIMING)
202:   double dclock(),    time=0.0;
203: #elif defined(MPISRC) && defined(TIMING)
204:   double MPI_Wtime(), time=0.0;
205: #endif
206: #ifdef INFO
207:   REAL vals[3], work[3];
208:   int op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
209: #endif


212: #ifdef DEBUG
213:   error_msg_warning("XYT_solve() :: start %d\n",n_xyt_handles);
214: #endif

216:   check_init();
217:   check_handle(xyt_handle);

219: #if defined(NXSRC) && defined(TIMING)
220:   time = dclock();
221: #elif defined(MPISRC) && defined(TIMING)
222:   time = MPI_Wtime();
223: #endif

225:   /* need to copy b into x? */
226:   if (b)
227:     {rvec_copy(x,b,xyt_handle->mvi->n);}
228:   do_xyt_solve(xyt_handle,x);

230: #if defined(NXSRC) && defined(TIMING)
231:   time = dclock() - time;
232: #elif defined(MPISRC) && defined(TIMING)
233:   time = MPI_Wtime() - time;
234: #endif

236: #ifdef TIMING
237:   xyt_handle->info->nsolves++;
238:   xyt_handle->info->tot_solve_time+=time;

240: #ifdef INFO
241:   vals[0]=vals[1]=vals[2]= (REAL) time;
242:   grop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
243:   if (!my_id)
244:     {
245:       printf("%d :: min   xyt_slv=%g\n",my_id,vals[0]);
246:       printf("%d :: max   xyt_slv=%g\n",my_id,vals[1]);
247:       printf("%d :: avg   xyt_slv=%g\n",my_id,vals[2]/num_nodes);
248:     }
249: #endif
250: #endif

252: #ifdef DEBUG
253:   error_msg_warning("XYT_solve() :: end   %d\n",n_xyt_handles);
254: #endif

256:   return(0);
257: }


260: /*************************************xyt.c************************************
261: Function: XYT_free()

263: Input :
264: Output:
265: Return:
266: Description:
267: **************************************xyt.c***********************************/
268: int
269: XYT_free(xyt_ADT xyt_handle)
270: {
271: #ifdef DEBUG
272:   error_msg_warning("XYT_free() :: start %d\n",n_xyt_handles);
273: #endif

275:   check_init();
276:   check_handle(xyt_handle);
277:   n_xyt_handles--;

279:   bss_free(xyt_handle->info->nsep);
280:   bss_free(xyt_handle->info->lnsep);
281:   bss_free(xyt_handle->info->fo);
282:   bss_free(xyt_handle->info->stages);
283:   bss_free(xyt_handle->info->solve_uu);
284:   bss_free(xyt_handle->info->solve_w);
285:   bss_free(xyt_handle->info->x);
286:   bss_free(xyt_handle->info->xcol_vals);
287:   bss_free(xyt_handle->info->xcol_sz);
288:   bss_free(xyt_handle->info->xcol_indices);
289:   bss_free(xyt_handle->info->y);
290:   bss_free(xyt_handle->info->ycol_vals);
291:   bss_free(xyt_handle->info->ycol_sz);
292:   bss_free(xyt_handle->info->ycol_indices);
293:   bss_free(xyt_handle->info);
294:   bss_free(xyt_handle->mvi->local2global);
295:    gs_free(xyt_handle->mvi->gs_handle);
296:   bss_free(xyt_handle->mvi);
297:   bss_free(xyt_handle);

299: 
300: #ifdef DEBUG
301:   error_msg_warning("perm frees = %d\n",perm_frees());
302:   error_msg_warning("perm calls = %d\n",perm_calls());
303:   error_msg_warning("bss frees  = %d\n",bss_frees());
304:   error_msg_warning("bss calls  = %d\n",bss_calls());
305:   error_msg_warning("XYT_free() :: end   %d\n",n_xyt_handles);
306: #endif

308:   /* if the check fails we nuke */
309:   /* if NULL pointer passed to bss_free we nuke */
310:   /* if the calls to free fail that's not my problem */
311:   return(0);
312: }


315: #ifdef MLSRC
316: /*************************************xyt.c************************************
317: Function: ML_XYT_factor()

319: Input :
320: Output:
321: Return:
322: Description:

324: ML requires that the solver call be checked in
325: **************************************xyt.c***********************************/
326: int
327: ML_XYT_factor(xyt_ADT xyt_handle,  /* prev. allocated xyt  handle */
328:               int *local2global,   /* global column mapping       */
329:               int n,               /* local num rows              */
330:               int m,               /* local num cols              */
331:               void *matvec,        /* b_loc=A_local.x_loc         */
332:               void *grid_data,     /* grid data for matvec        */
333:               int grid_tag,        /* grid tag for ML_Set_CSolve  */
334:               ML *my_ml            /* ML handle                   */
335:               )
336: {
337: #ifdef DEBUG
338:   int flag;
339: #endif


342: #ifdef DEBUG
343:   error_msg_warning("ML_XYT_factor() :: start %d\n",n_xyt_handles);
344: #endif

346:   check_init();
347:   check_handle(xyt_handle);
348:   if (my_ml->comm->ML_mypid!=my_id)
349:     {error_msg_fatal("ML_XYT_factor bad my_id %d\t%d\n",
350:                      my_ml->comm->ML_mypid,my_id);}
351:   if (my_ml->comm->ML_nprocs!=num_nodes)
352:     {error_msg_fatal("ML_XYT_factor bad np %d\t%d\n",
353:                      my_ml->comm->ML_nprocs,num_nodes);}

355:   my_ml->SingleLevel[grid_tag].csolve->func->external = ML_XYT_solve;
356:   my_ml->SingleLevel[grid_tag].csolve->func->ML_id = ML_EXTERNAL;
357:   my_ml->SingleLevel[grid_tag].csolve->data = xyt_handle;

359:   /* done ML specific stuff ... back to reg sched pgm */
360: #ifdef DEBUG
361:   flag = XYT_factor(xyt_handle, local2global, n, m, matvec, grid_data);
362:   error_msg_warning("ML_XYT_factor() :: end   %d (flag=%d)\n",n_xyt_handles,flag);
363:   return(flag);
364: #else
365:   return(XYT_factor(xyt_handle, local2global, n, m, matvec, grid_data));
366: #endif
367: }


370: /*************************************xyt.c************************************
371: Function: ML_XYT_solve

373: Input :
374: Output:
375: Return:
376: Description:
377: **************************************xyt.c***********************************/
378: void 
379: ML_XYT_solve(xyt_ADT xyt_handle, int lx, double *sol, int lb, double *rhs)
380: {
381:   XYT_solve(xyt_handle, sol, rhs);
382: }
383: #endif


386: /*************************************xyt.c************************************
387: Function: 

389: Input : 
390: Output: 
391: Return: 
392: Description:  
393: **************************************xyt.c***********************************/
394: int
395: XYT_stats(xyt_ADT xyt_handle)
396: {
397:   int  op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
398:   int fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
399:   int   vals[9],  work[9];
400:   REAL fvals[3], fwork[3];


403: #ifdef DEBUG
404:   error_msg_warning("xyt_stats() :: begin\n");
405: #endif

407:   check_init();
408:   check_handle(xyt_handle);

410:   /* if factorization not done there are no stats */
411:   if (!xyt_handle->info||!xyt_handle->mvi)
412:     {
413:       if (!my_id)
414:         {printf("XYT_stats() :: no stats available!\n");}
415:       return 1;
416:     }

418:   vals[0]=vals[1]=vals[2]=xyt_handle->info->nnz;
419:   vals[3]=vals[4]=vals[5]=xyt_handle->mvi->n;
420:   vals[6]=vals[7]=vals[8]=xyt_handle->info->msg_buf_sz;
421:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);

423:   fvals[0]=fvals[1]=fvals[2]
424:     =xyt_handle->info->tot_solve_time/xyt_handle->info->nsolves++;
425:   grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);

427:   if (!my_id)
428:     {
429:       printf("%d :: min   xyt_nnz=%d\n",my_id,vals[0]);
430:       printf("%d :: max   xyt_nnz=%d\n",my_id,vals[1]);
431:       printf("%d :: avg   xyt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes);
432:       printf("%d :: tot   xyt_nnz=%d\n",my_id,vals[2]);
433:       printf("%d :: xyt   C(2d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5)));
434:       printf("%d :: xyt   C(3d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667)));
435:       printf("%d :: min   xyt_n  =%d\n",my_id,vals[3]);
436:       printf("%d :: max   xyt_n  =%d\n",my_id,vals[4]);
437:       printf("%d :: avg   xyt_n  =%g\n",my_id,1.0*vals[5]/num_nodes);
438:       printf("%d :: tot   xyt_n  =%d\n",my_id,vals[5]);
439:       printf("%d :: min   xyt_buf=%d\n",my_id,vals[6]);
440:       printf("%d :: max   xyt_buf=%d\n",my_id,vals[7]);
441:       printf("%d :: avg   xyt_buf=%g\n",my_id,1.0*vals[8]/num_nodes);
442:       printf("%d :: min   xyt_slv=%g\n",my_id,fvals[0]);
443:       printf("%d :: max   xyt_slv=%g\n",my_id,fvals[1]);
444:       printf("%d :: avg   xyt_slv=%g\n",my_id,fvals[2]/num_nodes);
445:     }

447: #ifdef DEBUG
448:   error_msg_warning("xyt_stats() :: end\n");
449: #endif

451:   return(0);
452: }


455: /*************************************xyt.c************************************
456: Function: do_xyt_factor

458: Input : 
459: Output: 
460: Return: 
461: Description: get A_local, local portion of global coarse matrix which 
462: is a row dist. nxm matrix w/ n<m.
463:    o my_ml holds address of ML struct associated w/A_local and coarse grid
464:    o local2global holds global number of column i (i=0,...,m-1)
465:    o local2global holds global number of row    i (i=0,...,n-1)
466:    o mylocmatvec performs A_local . vec_local (note that gs is performed using 
467:    gs_init/gop).

469: mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
470: mylocmatvec (void :: void *data, double *in, double *out)
471: **************************************xyt.c***********************************/
472: static
473: int
474: do_xyt_factor(xyt_ADT xyt_handle)
475: {
476:   int flag;


479: #ifdef DEBUG
480:   error_msg_warning("do_xyt_factor() :: begin\n");
481: #endif

483:   flag=xyt_generate(xyt_handle);

485: #ifdef INFO
486:   XYT_stats(xyt_handle);
487:   bss_stats();
488:   perm_stats();
489: #endif

491: #ifdef DEBUG
492:   error_msg_warning("do_xyt_factor() :: end\n");
493: #endif

495:   return(flag);
496: }


499: /*************************************xyt.c************************************
500: Function: 

502: Input : 
503: Output: 
504: Return: 
505: Description:  
506: **************************************xyt.c***********************************/
507: static
508: int
509: xyt_generate(xyt_ADT xyt_handle)
510: {
511:   int i,j,k,idx;
512:   int dim, col;
513:   REAL *u, *uu, *v, *z, *w, alpha, alpha_w;
514:   int *segs;
515:   int op[] = {GL_ADD,0};
516:   int off, len;
517:   REAL *x_ptr, *y_ptr;
518:   int *iptr, flag;
519:   int start=0, end, work;
520:   int op2[] = {GL_MIN,0};
521:   gs_ADT gs_handle;
522:   int *nsep, *lnsep, *fo;
523:   int a_n=xyt_handle->mvi->n;
524:   int a_m=xyt_handle->mvi->m;
525:   int *a_local2global=xyt_handle->mvi->local2global;
526:   int level;
527:   int n, m;
528:   int *xcol_sz, *xcol_indices, *stages;
529:   REAL **xcol_vals, *x;
530:   int *ycol_sz, *ycol_indices;
531:   REAL **ycol_vals, *y;
532:   int n_global;
533:   int xt_nnz=0, xt_max_nnz=0;
534:   int yt_nnz=0, yt_max_nnz=0;
535:   int xt_zero_nnz  =0;
536:   int xt_zero_nnz_0=0;
537:   int yt_zero_nnz  =0;
538:   int yt_zero_nnz_0=0;


541: #ifdef DEBUG
542:   error_msg_warning("xyt_generate() :: begin\n");
543: #endif

545:   n=xyt_handle->mvi->n;
546:   nsep=xyt_handle->info->nsep;
547:   lnsep=xyt_handle->info->lnsep;
548:   fo=xyt_handle->info->fo;
549:   end=lnsep[0];
550:   level=xyt_handle->level;
551:   gs_handle=xyt_handle->mvi->gs_handle;

553:   /* is there a null space? */
554:   /* LATER add in ability to detect null space by checking alpha */
555:   for (i=0, j=0; i<=level; i++)
556:     {j+=nsep[i];}

558:   m = j-xyt_handle->ns;
559:   if (m!=j)
560:     {printf("xyt_generate() :: null space exists %d %d %d\n",m,j,xyt_handle->ns);}

562:   error_msg_warning("xyt_generate() :: X(%d,%d)\n",n,m);

564:   /* get and initialize storage for x local         */
565:   /* note that x local is nxm and stored by columns */
566:   xcol_sz = (int *) bss_malloc(m*INT_LEN);
567:   xcol_indices = (int *) bss_malloc((2*m+1)*sizeof(int));
568:   xcol_vals = (REAL **) bss_malloc(m*sizeof(REAL *));
569:   for (i=j=0; i<m; i++, j+=2)
570:     {
571:       xcol_indices[j]=xcol_indices[j+1]=xcol_sz[i]=-1;
572:       xcol_vals[i] = NULL;
573:     }
574:   xcol_indices[j]=-1;

576:   /* get and initialize storage for y local         */
577:   /* note that y local is nxm and stored by columns */
578:   ycol_sz = (int *) bss_malloc(m*INT_LEN);
579:   ycol_indices = (int *) bss_malloc((2*m+1)*sizeof(int));
580:   ycol_vals = (REAL **) bss_malloc(m*sizeof(REAL *));
581:   for (i=j=0; i<m; i++, j+=2)
582:     {
583:       ycol_indices[j]=ycol_indices[j+1]=ycol_sz[i]=-1;
584:       ycol_vals[i] = NULL;
585:     }
586:   ycol_indices[j]=-1;

588:   /* size of separators for each sub-hc working from bottom of tree to top */
589:   /* this looks like nsep[]=segments */
590:   stages = (int *) bss_malloc((level+1)*INT_LEN);
591:   segs   = (int *) bss_malloc((level+1)*INT_LEN);
592:   ivec_zero(stages,level+1);
593:   ivec_copy(segs,nsep,level+1);
594:   for (i=0; i<level; i++)
595:     {segs[i+1] += segs[i];}
596:   stages[0] = segs[0];

598:   /* temporary vectors  */
599:   u  = (REAL *) bss_malloc(n*sizeof(REAL));
600:   z  = (REAL *) bss_malloc(n*sizeof(REAL));
601:   v  = (REAL *) bss_malloc(a_m*sizeof(REAL));
602:   uu = (REAL *) bss_malloc(m*sizeof(REAL));
603:   w  = (REAL *) bss_malloc(m*sizeof(REAL));

605:   /* extra nnz due to replication of vertices across separators */
606:   for (i=1, j=0; i<=level; i++)
607:     {j+=nsep[i];}

609:   /* storage for sparse x values */
610:   n_global = xyt_handle->info->n_global;
611:   xt_max_nnz = yt_max_nnz = (int)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes;
612:   x = (REAL *) bss_malloc(xt_max_nnz*sizeof(REAL));
613:   y = (REAL *) bss_malloc(yt_max_nnz*sizeof(REAL));

615:   /* LATER - can embed next sep to fire in gs */
616:   /* time to make the donuts - generate X factor */
617:   for (dim=i=j=0;i<m;i++)
618:     {
619:       /* time to move to the next level? */
620:       while (i==segs[dim])
621:         {
622: #ifdef SAFE          
623:           if (dim==level)
624:             {error_msg_fatal("dim about to exceed level\n"); break;}
625: #endif

627:           stages[dim++]=i;
628:           end+=lnsep[dim];
629:         }
630:       stages[dim]=i;

632:       /* which column are we firing? */
633:       /* i.e. set v_l */
634:       /* use new seps and do global min across hc to determine which one to fire */
635:       (start<end) ? (col=fo[start]) : (col=INT_MAX);
636:       giop_hc(&col,&work,1,op2,dim);

638:       /* shouldn't need this */
639:       if (col==INT_MAX)
640:         {
641:           error_msg_warning("hey ... col==INT_MAX??\n");
642:           continue;
643:         }

645:       /* do I own it? I should */
646:       rvec_zero(v ,a_m);
647:       if (col==fo[start])
648:         {
649:           start++;
650:           idx=ivec_linear_search(col, a_local2global, a_n);
651:           if (idx!=-1)
652:             {v[idx] = 1.0; j++;}
653:           else
654:             {error_msg_fatal("NOT FOUND!\n");}
655:         }
656:       else
657:         {
658:           idx=ivec_linear_search(col, a_local2global, a_m);
659:           if (idx!=-1)
660:             {v[idx] = 1.0;}
661:         }

663:       /* perform u = A.v_l */
664:       rvec_zero(u,n);
665:       do_matvec(xyt_handle->mvi,v,u);

667:       /* uu =  X^T.u_l (local portion) */
668:       /* technically only need to zero out first i entries */
669:       /* later turn this into an XYT_solve call ? */
670:       rvec_zero(uu,m);
671:       y_ptr=y;
672:       iptr = ycol_indices;
673:       for (k=0; k<i; k++)
674:         {
675:           off = *iptr++;
676:           len = *iptr++;

678: #if   BLAS||CBLAS
679:           uu[k] = dot(len,u+off,1,y_ptr,1);
680: #else
681:           uu[k] = rvec_dot(u+off,y_ptr,len);
682: #endif
683:           y_ptr+=len;
684:         }

686:       /* uu = X^T.u_l (comm portion) */
687:       ssgl_radd  (uu, w, dim, stages);

689:       /* z = X.uu */
690:       rvec_zero(z,n);
691:       x_ptr=x;
692:       iptr = xcol_indices;
693:       for (k=0; k<i; k++)
694:         {
695:           off = *iptr++;
696:           len = *iptr++;

698: #if   BLAS||CBLAS
699:           axpy(len,uu[k],x_ptr,1,z+off,1);
700: #else
701:           rvec_axpy(z+off,x_ptr,uu[k],len);
702: #endif
703:           x_ptr+=len;
704:         }

706:       /* compute v_l = v_l - z */
707:       rvec_zero(v+a_n,a_m-a_n);
708: #if   BLAS||CBLAS
709:       axpy(n,-1.0,z,1,v,1);
710: #else
711:       rvec_axpy(v,z,-1.0,n);
712: #endif

714:       /* compute u_l = A.v_l */
715:       if (a_n!=a_m)
716:         {gs_gop_hc(gs_handle,v,"+\0",dim);}
717:       rvec_zero(u,n);
718:      do_matvec(xyt_handle->mvi,v,u);

720:       /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - local portion */
721: #if   BLAS||CBLAS
722:       alpha = ddot(n,u,1,u,1);
723: #else
724:       alpha = rvec_dot(u,u,n);
725: #endif
726:       /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - comm portion */
727:       grop_hc(&alpha, &alpha_w, 1, op, dim);

729:       alpha = (REAL) sqrt((double)alpha);

731:       /* check for small alpha                             */
732:       /* LATER use this to detect and determine null space */
733: #ifdef r8
734:       if (fabs(alpha)<1.0e-14)
735:         {error_msg_fatal("bad alpha! %g\n",alpha);}
736: #else
737:       if (fabs((double) alpha) < 1.0e-6)
738:         {error_msg_fatal("bad alpha! %g\n",alpha);}
739: #endif

741:       /* compute v_l = v_l/sqrt(alpha) */
742:       rvec_scale(v,1.0/alpha,n);
743:       rvec_scale(u,1.0/alpha,n);

745:       /* add newly generated column, v_l, to X */
746:       flag = 1;
747:       off=len=0;
748:       for (k=0; k<n; k++)
749:         {
750:           if (v[k]!=0.0)
751:             {
752:               len=k;
753:               if (flag)
754:                 {off=k; flag=0;}
755:             }
756:         }

758:       len -= (off-1);

760:       if (len>0)
761:         {
762:           if ((xt_nnz+len)>xt_max_nnz)
763:             {
764:               error_msg_warning("increasing space for X by 2x!\n");
765:               xt_max_nnz *= 2;
766:               x_ptr = (REAL *) bss_malloc(xt_max_nnz*sizeof(REAL));
767:               rvec_copy(x_ptr,x,xt_nnz);
768:               bss_free(x);
769:               x = x_ptr;
770:               x_ptr+=xt_nnz;
771:             }
772:           xt_nnz += len;
773:           rvec_copy(x_ptr,v+off,len);

775:           /* keep track of number of zeros */
776:           if (dim)
777:             {
778:               for (k=0; k<len; k++)
779:                 {
780:                   if (x_ptr[k]==0.0)
781:                     {xt_zero_nnz++;}
782:                 }
783:             }
784:           else
785:             {
786:               for (k=0; k<len; k++)
787:                 {
788:                   if (x_ptr[k]==0.0)
789:                     {xt_zero_nnz_0++;}
790:                 }
791:             }
792:           xcol_indices[2*i] = off;
793:           xcol_sz[i] = xcol_indices[2*i+1] = len;
794:           xcol_vals[i] = x_ptr;
795:         }
796:       else
797:         {
798:           xcol_indices[2*i] = 0;
799:           xcol_sz[i] = xcol_indices[2*i+1] = 0;
800:           xcol_vals[i] = x_ptr;
801:         }


804:       /* add newly generated column, u_l, to Y */
805:       flag = 1;
806:       off=len=0;
807:       for (k=0; k<n; k++)
808:         {
809:           if (u[k]!=0.0)
810:             {
811:               len=k;
812:               if (flag)
813:                 {off=k; flag=0;}
814:             }
815:         }

817:       len -= (off-1);

819:       if (len>0)
820:         {
821:           if ((yt_nnz+len)>yt_max_nnz)
822:             {
823:               error_msg_warning("increasing space for Y by 2x!\n");
824:               yt_max_nnz *= 2;
825:               y_ptr = (REAL *) bss_malloc(yt_max_nnz*sizeof(REAL));
826:               rvec_copy(y_ptr,y,yt_nnz);
827:               bss_free(y);
828:               y = y_ptr;
829:               y_ptr+=yt_nnz;
830:             }
831:           yt_nnz += len;
832:           rvec_copy(y_ptr,u+off,len);

834:           /* keep track of number of zeros */
835:           if (dim)
836:             {
837:               for (k=0; k<len; k++)
838:                 {
839:                   if (y_ptr[k]==0.0)
840:                     {yt_zero_nnz++;}
841:                 }
842:             }
843:           else
844:             {
845:               for (k=0; k<len; k++)
846:                 {
847:                   if (y_ptr[k]==0.0)
848:                     {yt_zero_nnz_0++;}
849:                 }
850:             }
851:           ycol_indices[2*i] = off;
852:           ycol_sz[i] = ycol_indices[2*i+1] = len;
853:           ycol_vals[i] = y_ptr;
854:         }
855:       else
856:         {
857:           ycol_indices[2*i] = 0;
858:           ycol_sz[i] = ycol_indices[2*i+1] = 0;
859:           ycol_vals[i] = y_ptr;
860:         }
861:     }

863:   /* close off stages for execution phase */
864:   while (dim!=level)
865:     {
866:       stages[dim++]=i;
867:       error_msg_warning("disconnected!!! dim(%d)!=level(%d)\n",dim,level);
868:     }
869:   stages[dim]=i;

871:   xyt_handle->info->n=xyt_handle->mvi->n;
872:   xyt_handle->info->m=m;
873:   xyt_handle->info->nnz=xt_nnz + yt_nnz;
874:   xyt_handle->info->max_nnz=xt_max_nnz + yt_max_nnz;
875:   xyt_handle->info->msg_buf_sz=stages[level]-stages[0];
876:   xyt_handle->info->solve_uu = (REAL *) bss_malloc(m*sizeof(REAL));
877:   xyt_handle->info->solve_w  = (REAL *) bss_malloc(m*sizeof(REAL));
878:   xyt_handle->info->x=x;
879:   xyt_handle->info->xcol_vals=xcol_vals;
880:   xyt_handle->info->xcol_sz=xcol_sz;
881:   xyt_handle->info->xcol_indices=xcol_indices;
882:   xyt_handle->info->stages=stages;
883:   xyt_handle->info->y=y;
884:   xyt_handle->info->ycol_vals=ycol_vals;
885:   xyt_handle->info->ycol_sz=ycol_sz;
886:   xyt_handle->info->ycol_indices=ycol_indices;

888:   bss_free(segs);
889:   bss_free(u);
890:   bss_free(v);
891:   bss_free(uu);
892:   bss_free(z);
893:   bss_free(w);

895: #ifdef DEBUG
896:   error_msg_warning("xyt_generate() :: end\n");
897: #endif

899:   return(0);
900: }


903: /*************************************xyt.c************************************
904: Function: 

906: Input : 
907: Output: 
908: Return: 
909: Description:  
910: **************************************xyt.c***********************************/
911: static
912: void
913: do_xyt_solve(xyt_ADT xyt_handle, register REAL *uc)
914: {
915:   register int off, len, *iptr;
916:   int level       =xyt_handle->level;
917:   int n           =xyt_handle->info->n;
918:   int m           =xyt_handle->info->m;
919:   int *stages     =xyt_handle->info->stages;
920:   int *xcol_indices=xyt_handle->info->xcol_indices;
921:   int *ycol_indices=xyt_handle->info->ycol_indices;
922:   register REAL *x_ptr, *y_ptr, *uu_ptr;
923: #if   BLAS||CBLAS
924:   REAL zero=0.0;
925: #endif
926:   REAL *solve_uu=xyt_handle->info->solve_uu;
927:   REAL *solve_w =xyt_handle->info->solve_w;
928:   REAL *x       =xyt_handle->info->x;
929:   REAL *y       =xyt_handle->info->y;

931: #ifdef DEBUG
932:   error_msg_warning("do_xyt_solve() :: begin\n");
933: #endif

935:   uu_ptr=solve_uu;
936: #if   BLAS||CBLAS
937:   copy(m,&zero,0,uu_ptr,1);
938: #else
939:   rvec_zero(uu_ptr,m);
940: #endif

942:   /* x  = X.Y^T.b */
943:   /* uu = Y^T.b */
944:   for (y_ptr=y,iptr=ycol_indices; *iptr!=-1; y_ptr+=len)
945:     {
946:       off=*iptr++; len=*iptr++;
947: #if   BLAS||CBLAS
948:       *uu_ptr++ = dot(len,uc+off,1,y_ptr,1);
949: #else
950:       *uu_ptr++ = rvec_dot(uc+off,y_ptr,len);
951: #endif
952:     }

954:   /* comunication of beta */
955:   uu_ptr=solve_uu;
956:   if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);}

958: #if   BLAS&&CBLAS
959:   copy(n,&zero,0,uc,1);
960: #else
961:   rvec_zero(uc,n);
962: #endif

964:   /* x = X.uu */
965:   for (x_ptr=x,iptr=xcol_indices; *iptr!=-1; x_ptr+=len)
966:     {
967:       off=*iptr++; len=*iptr++;
968: #if   BLAS&&CBLAS
969:       axpy(len,*uu_ptr++,x_ptr,1,uc+off,1);
970: #else
971:       rvec_axpy(uc+off,x_ptr,*uu_ptr++,len);
972: #endif
973:     }

975: #ifdef DEBUG
976:   error_msg_warning("do_xyt_solve() :: end\n");
977: #endif
978: }


981: /*************************************Xyt.c************************************
982: Function: check_init

984: Input :
985: Output:
986: Return:
987: Description:
988: **************************************xyt.c***********************************/
989: static
990: void
991: check_init(void)
992: {
993: #ifdef DEBUG
994:   error_msg_warning("check_init() :: start %d\n",n_xyt_handles);
995: #endif

997:   comm_init();
998:   /*
999:   perm_init(); 
1000:   bss_init();
1001:   */

1003: #ifdef DEBUG
1004:   error_msg_warning("check_init() :: end   %d\n",n_xyt_handles);
1005: #endif
1006: }


1009: /*************************************xyt.c************************************
1010: Function: check_handle()

1012: Input :
1013: Output:
1014: Return:
1015: Description:
1016: **************************************xyt.c***********************************/
1017: static
1018: void 
1019: check_handle(xyt_ADT xyt_handle)
1020: {
1021: #ifdef SAFE
1022:   int vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};
1023: #endif


1026: #ifdef DEBUG
1027:   error_msg_warning("check_handle() :: start %d\n",n_xyt_handles);
1028: #endif

1030:   if (xyt_handle==NULL)
1031:     {error_msg_fatal("check_handle() :: bad handle :: NULL %d\n",xyt_handle);}

1033: #ifdef SAFE
1034:   vals[0]=vals[1]=xyt_handle->id;
1035:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
1036:   if ((vals[0]!=vals[1])||(xyt_handle->id<=0))
1037:     {error_msg_fatal("check_handle() :: bad handle :: id mismatch min/max %d/%d %d\n",
1038:                      vals[0],vals[1], xyt_handle->id);}
1039: #endif

1041: #ifdef DEBUG
1042:   error_msg_warning("check_handle() :: end   %d\n",n_xyt_handles);
1043: #endif
1044: }


1047: /*************************************xyt.c************************************
1048: Function: det_separators

1050: Input :
1051: Output:
1052: Return:
1053: Description:
1054:   det_separators(xyt_handle, local2global, n, m, mylocmatvec, grid_data);
1055: **************************************xyt.c***********************************/
1056: static 
1057: void 
1058: det_separators(xyt_ADT xyt_handle)
1059: {
1060:   int i, ct, id;
1061:   int mask, edge, *iptr;
1062:   int *dir, *used;
1063:   int sum[4], w[4];
1064:   REAL rsum[4], rw[4];
1065:   int op[] = {GL_ADD,0};
1066:   REAL *lhs, *rhs;
1067:   int *nsep, *lnsep, *fo, nfo=0;
1068:   gs_ADT gs_handle=xyt_handle->mvi->gs_handle;
1069:   int *local2global=xyt_handle->mvi->local2global;
1070:   int  n=xyt_handle->mvi->n;
1071:   int  m=xyt_handle->mvi->m;
1072:   int level=xyt_handle->level;
1073:   int shared=FALSE;

1075: #ifdef DEBUG
1076:   error_msg_warning("det_separators() :: start %d %d %d\n",level,n,m);
1077: #endif
1078: 
1079:   dir  = (int*)bss_malloc(INT_LEN*(level+1));
1080:   nsep = (int*)bss_malloc(INT_LEN*(level+1));
1081:   lnsep= (int*)bss_malloc(INT_LEN*(level+1));
1082:   fo   = (int*)bss_malloc(INT_LEN*(n+1));
1083:   used = (int*)bss_malloc(INT_LEN*n);

1085:   ivec_zero(dir  ,level+1);
1086:   ivec_zero(nsep ,level+1);
1087:   ivec_zero(lnsep,level+1);
1088:   ivec_set (fo   ,-1,n+1);
1089:   ivec_zero(used,n);

1091:   lhs  = (double*)bss_malloc(REAL_LEN*m);
1092:   rhs  = (double*)bss_malloc(REAL_LEN*m);

1094:   /* determine the # of unique dof */
1095:   rvec_zero(lhs,m);
1096:   rvec_set(lhs,1.0,n);
1097:   gs_gop_hc(gs_handle,lhs,"+\0",level);
1098:   error_msg_warning("done first gs_gop_hc\n");
1099:   rvec_zero(rsum,2);
1100:   for (ct=i=0;i<n;i++)
1101:     {
1102:       if (lhs[i]!=0.0)
1103:         {rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];}

1105:       if (lhs[i]!=1.0)
1106:         {
1107:           shared=TRUE;
1108:         }
1109:     }

1111:   grop_hc(rsum,rw,2,op,level);
1112:   rsum[0]+=0.1;
1113:   rsum[1]+=0.1;

1115:   /*
1116:       if (!my_id)
1117:       {
1118:       printf("xyt n unique = %d (%g)\n",(int) rsum[0], rsum[0]);
1119:       printf("xyt n shared = %d (%g)\n",(int) rsum[1], rsum[1]);
1120:       }
1121:   */

1123:   xyt_handle->info->n_global=xyt_handle->info->m_global=(int) rsum[0];
1124:   xyt_handle->mvi->n_global =xyt_handle->mvi->m_global =(int) rsum[0];

1126:   /* determine separator sets top down */
1127:   if (shared)
1128:     {
1129:       /* solution is to do as in the symmetric shared case but then */
1130:       /* pick the sub-hc with the most free dofs and do a mat-vec   */
1131:       /* and pick up the responses on the other sub-hc from the     */
1132:       /* initial separator set obtained from the symm. shared case  */
1133:       error_msg_fatal("shared dof separator determination not ready ... see hmt!!!\n");
1134:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
1135:         {
1136:           /* set rsh of hc, fire, and collect lhs responses */
1137:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
1138:           gs_gop_hc(gs_handle,lhs,"+\0",edge);
1139: 
1140:           /* set lsh of hc, fire, and collect rhs responses */
1141:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
1142:           gs_gop_hc(gs_handle,rhs,"+\0",edge);
1143: 
1144:           for (i=0;i<n;i++)
1145:             {
1146:               if (id< mask)
1147:                 {
1148:                   if (lhs[i]!=0.0)
1149:                     {lhs[i]=1.0;}
1150:                 }
1151:               if (id>=mask)
1152:                 {
1153:                   if (rhs[i]!=0.0)
1154:                     {rhs[i]=1.0;}
1155:                 }
1156:             }

1158:           if (id< mask)
1159:             {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);}
1160:           else
1161:             {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);}

1163:           /* count number of dofs I own that have signal and not in sep set */
1164:           rvec_zero(rsum,4);
1165:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
1166:             {
1167:               if (!used[i])
1168:                 {
1169:                   /* number of unmarked dofs on node */
1170:                   ct++;
1171:                   /* number of dofs to be marked on lhs hc */
1172:                   if (id< mask)
1173:                     {
1174:                       if (lhs[i]!=0.0)
1175:                         {sum[0]++; rsum[0]+=1.0/lhs[i];}
1176:                     }
1177:                   /* number of dofs to be marked on rhs hc */
1178:                   if (id>=mask)
1179:                     {
1180:                       if (rhs[i]!=0.0)
1181:                         {sum[1]++; rsum[1]+=1.0/rhs[i];}
1182:                     }
1183:                 }
1184:             }

1186:           /* go for load balance - choose half with most unmarked dofs, bias LHS */
1187:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
1188:           (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
1189:           giop_hc(sum,w,4,op,edge);
1190:           grop_hc(rsum,rw,4,op,edge);
1191:           rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;

1193:           if (id<mask)
1194:             {
1195:               /* mark dofs I own that have signal and not in sep set */
1196:               for (ct=i=0;i<n;i++)
1197:                 {
1198:                   if ((!used[i])&&(lhs[i]!=0.0))
1199:                     {
1200:                       ct++; nfo++;

1202:                       if (nfo>n)
1203:                         {error_msg_fatal("nfo about to exceed n\n");}

1205:                       *--iptr = local2global[i];
1206:                       used[i]=edge;
1207:                     }
1208:                 }
1209:               if (ct>1) {ivec_sort(iptr,ct);}

1211:               lnsep[edge]=ct;
1212:               nsep[edge]=(int) rsum[0];
1213:               dir [edge]=LEFT;
1214:             }

1216:           if (id>=mask)
1217:             {
1218:               /* mark dofs I own that have signal and not in sep set */
1219:               for (ct=i=0;i<n;i++)
1220:                 {
1221:                   if ((!used[i])&&(rhs[i]!=0.0))
1222:                     {
1223:                       ct++; nfo++;

1225:                       if (nfo>n)
1226:                         {error_msg_fatal("nfo about to exceed n\n");}

1228:                       *--iptr = local2global[i];
1229:                       used[i]=edge;
1230:                     }
1231:                 }
1232:               if (ct>1) {ivec_sort(iptr,ct);}

1234:               lnsep[edge]=ct;
1235:               nsep[edge]= (int) rsum[1];
1236:               dir [edge]=RIGHT;
1237:             }

1239:           /* LATER or we can recur on these to order seps at this level */
1240:           /* do we need full set of separators for this?                */

1242:           /* fold rhs hc into lower */
1243:           if (id>=mask)
1244:             {id-=mask;}
1245:         }
1246:     }
1247:   else
1248:     {
1249:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
1250:         {
1251:           /* set rsh of hc, fire, and collect lhs responses */
1252:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
1253:           gs_gop_hc(gs_handle,lhs,"+\0",edge);

1255:           /* set lsh of hc, fire, and collect rhs responses */
1256:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
1257:           gs_gop_hc(gs_handle,rhs,"+\0",edge);

1259:           /* count number of dofs I own that have signal and not in sep set */
1260:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
1261:             {
1262:               if (!used[i])
1263:                 {
1264:                   /* number of unmarked dofs on node */
1265:                   ct++;
1266:                   /* number of dofs to be marked on lhs hc */
1267:                   if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;}
1268:                   /* number of dofs to be marked on rhs hc */
1269:                   if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;}
1270:                 }
1271:             }

1273:           /* for the non-symmetric case we need separators of width 2 */
1274:           /* so take both sides */
1275:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
1276:           giop_hc(sum,w,4,op,edge);

1278:           ct=0;
1279:           if (id<mask)
1280:             {
1281:               /* mark dofs I own that have signal and not in sep set */
1282:               for (i=0;i<n;i++)
1283:                 {
1284:                   if ((!used[i])&&(lhs[i]!=0.0))
1285:                     {
1286:                       ct++; nfo++;
1287:                       *--iptr = local2global[i];
1288:                       used[i]=edge;
1289:                     }
1290:                 }
1291:               /* LSH hc summation of ct should be sum[0] */
1292:             }
1293:           else
1294:             {
1295:               /* mark dofs I own that have signal and not in sep set */
1296:               for (i=0;i<n;i++)
1297:                 {
1298:                   if ((!used[i])&&(rhs[i]!=0.0))
1299:                     {
1300:                       ct++; nfo++;
1301:                       *--iptr = local2global[i];
1302:                       used[i]=edge;
1303:                     }
1304:                 }
1305:               /* RSH hc summation of ct should be sum[1] */
1306:             }

1308:           if (ct>1) {ivec_sort(iptr,ct);}
1309:           lnsep[edge]=ct;
1310:           nsep[edge]=sum[0]+sum[1];
1311:           dir [edge]=BOTH;

1313:           /* LATER or we can recur on these to order seps at this level */
1314:           /* do we need full set of separators for this?                */

1316:           /* fold rhs hc into lower */
1317:           if (id>=mask)
1318:             {id-=mask;}
1319:         }
1320:     }

1322:   /* level 0 is on processor case - so mark the remainder */
1323:   for (ct=i=0;i<n;i++)
1324:     {
1325:       if (!used[i])
1326:         {
1327:           ct++; nfo++;
1328:           *--iptr = local2global[i];
1329:           used[i]=edge;
1330:         }
1331:     }
1332:   if (ct>1) {ivec_sort(iptr,ct);}
1333:   lnsep[edge]=ct;
1334:   nsep [edge]=ct;
1335:   dir  [edge]=BOTH;

1337:   xyt_handle->info->nsep=nsep;
1338:   xyt_handle->info->lnsep=lnsep;
1339:   xyt_handle->info->fo=fo;
1340:   xyt_handle->info->nfo=nfo;

1342:   bss_free(dir);
1343:   bss_free(lhs);
1344:   bss_free(rhs);
1345:   bss_free(used);

1347: #ifdef DEBUG  
1348:   error_msg_warning("det_separators() :: end\n");
1349: #endif
1350: }


1353: /*************************************xyt.c************************************
1354: Function: set_mvi

1356: Input :
1357: Output:
1358: Return:
1359: Description:
1360: **************************************xyt.c***********************************/
1361: static
1362: mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data)
1363: {
1364:   mv_info *mvi;


1367: #ifdef DEBUG
1368:   error_msg_warning("set_mvi() :: start\n");
1369: #endif

1371:   mvi = (mv_info*)bss_malloc(sizeof(mv_info));
1372:   mvi->n=n;
1373:   mvi->m=m;
1374:   mvi->n_global=-1;
1375:   mvi->m_global=-1;
1376:   mvi->local2global=(int*)bss_malloc((m+1)*INT_LEN);
1377:   ivec_copy(mvi->local2global,local2global,m);
1378:   mvi->local2global[m] = INT_MAX;
1379:   mvi->matvec=(int (*)(mv_info*,REAL*,REAL*))matvec;
1380:   mvi->grid_data=grid_data;

1382:   /* set xyt communication handle to perform restricted matvec */
1383:   mvi->gs_handle = gs_init(local2global, m, num_nodes);

1385: #ifdef DEBUG
1386:   error_msg_warning("set_mvi() :: end   \n");
1387: #endif
1388: 
1389:   return(mvi);
1390: }


1393: /*************************************xyt.c************************************
1394: Function: set_mvi

1396: Input :
1397: Output:
1398: Return:
1399: Description:

1401:       computes u = A.v 
1402:       do_matvec(xyt_handle->mvi,v,u);
1403: **************************************xyt.c***********************************/
1404: static
1405: void do_matvec(mv_info *A, REAL *v, REAL *u)
1406: {
1407:   A->matvec((mv_info*)A->grid_data,v,u);
1408: }