Actual source code: maij.c

  1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include src/mat/impls/maij/maij.h
 20:  #include vecimpl.h

 24: int MatMAIJGetAIJ(Mat A,Mat *B)
 25: {
 26:   int         ierr;
 27:   PetscTruth  ismpimaij,isseqmaij;

 30:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 31:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 32:   if (ismpimaij) {
 33:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 35:     *B = b->A;
 36:   } else if (isseqmaij) {
 37:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 39:     *B = b->AIJ;
 40:   } else {
 41:     *B = A;
 42:   }
 43:   return(0);
 44: }

 48: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 49: {
 51:   Mat Aij;

 54:   MatMAIJGetAIJ(A,&Aij);
 55:   MatCreateMAIJ(Aij,dof,B);
 56:   return(0);
 57: }

 61: int MatDestroy_SeqMAIJ(Mat A)
 62: {
 63:   int         ierr;
 64:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 67:   if (b->AIJ) {
 68:     MatDestroy(b->AIJ);
 69:   }
 70:   PetscFree(b);
 71:   return(0);
 72: }

 76: int MatDestroy_MPIMAIJ(Mat A)
 77: {
 78:   int         ierr;
 79:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 82:   if (b->AIJ) {
 83:     MatDestroy(b->AIJ);
 84:   }
 85:   if (b->OAIJ) {
 86:     MatDestroy(b->OAIJ);
 87:   }
 88:   if (b->A) {
 89:     MatDestroy(b->A);
 90:   }
 91:   if (b->ctx) {
 92:     VecScatterDestroy(b->ctx);
 93:   }
 94:   if (b->w) {
 95:     VecDestroy(b->w);
 96:   }
 97:   PetscFree(b);
 98:   return(0);
 99: }

101: /*MC
102:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
103:   multicomponent problems, interpolating or restricting each component the same way independently.
104:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

106:   Operations provided:
107: . MatMult
108: . MatMultTranspose
109: . MatMultAdd
110: . MatMultTransposeAdd

112:   Level: advanced

114: .seealso: MatCreateSeqDense
115: M*/

117: EXTERN_C_BEGIN
120: int MatCreate_MAIJ(Mat A)
121: {
122:   int         ierr;
123:   Mat_MPIMAIJ *b;

126:   PetscNew(Mat_MPIMAIJ,&b);
127:   A->data  = (void*)b;
128:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
129:   PetscMemzero(A->ops,sizeof(struct _MatOps));
130:   A->factor           = 0;
131:   A->mapping          = 0;

133:   b->AIJ  = 0;
134:   b->dof  = 0;
135:   b->OAIJ = 0;
136:   b->ctx  = 0;
137:   b->w    = 0;
138:   return(0);
139: }
140: EXTERN_C_END

142: /* --------------------------------------------------------------------------------------*/
145: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146: {
147:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
148:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
149:   PetscScalar   *x,*y,*v,sum1, sum2;
150:   int           ierr,m = b->AIJ->m,*idx,*ii;
151:   int           n,i,jrow,j;

154:   VecGetArray(xx,&x);
155:   VecGetArray(yy,&y);
156:   idx  = a->j;
157:   v    = a->a;
158:   ii   = a->i;

160:   for (i=0; i<m; i++) {
161:     jrow = ii[i];
162:     n    = ii[i+1] - jrow;
163:     sum1  = 0.0;
164:     sum2  = 0.0;
165:     for (j=0; j<n; j++) {
166:       sum1 += v[jrow]*x[2*idx[jrow]];
167:       sum2 += v[jrow]*x[2*idx[jrow]+1];
168:       jrow++;
169:      }
170:     y[2*i]   = sum1;
171:     y[2*i+1] = sum2;
172:   }

174:   PetscLogFlops(4*a->nz - 2*m);
175:   VecRestoreArray(xx,&x);
176:   VecRestoreArray(yy,&y);
177:   return(0);
178: }

182: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
187:   int           ierr,m = b->AIJ->m,n,i,*idx;

190:   VecSet(&zero,yy);
191:   VecGetArray(xx,&x);
192:   VecGetArray(yy,&y);
193: 
194:   for (i=0; i<m; i++) {
195:     idx    = a->j + a->i[i] ;
196:     v      = a->a + a->i[i] ;
197:     n      = a->i[i+1] - a->i[i];
198:     alpha1 = x[2*i];
199:     alpha2 = x[2*i+1];
200:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201:   }
202:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203:   VecRestoreArray(xx,&x);
204:   VecRestoreArray(yy,&y);
205:   return(0);
206: }

210: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211: {
212:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
213:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
214:   PetscScalar   *x,*y,*v,sum1, sum2;
215:   int           ierr,m = b->AIJ->m,*idx,*ii;
216:   int           n,i,jrow,j;

219:   if (yy != zz) {VecCopy(yy,zz);}
220:   VecGetArray(xx,&x);
221:   VecGetArray(zz,&y);
222:   idx  = a->j;
223:   v    = a->a;
224:   ii   = a->i;

226:   for (i=0; i<m; i++) {
227:     jrow = ii[i];
228:     n    = ii[i+1] - jrow;
229:     sum1  = 0.0;
230:     sum2  = 0.0;
231:     for (j=0; j<n; j++) {
232:       sum1 += v[jrow]*x[2*idx[jrow]];
233:       sum2 += v[jrow]*x[2*idx[jrow]+1];
234:       jrow++;
235:      }
236:     y[2*i]   += sum1;
237:     y[2*i+1] += sum2;
238:   }

240:   PetscLogFlops(4*a->nz - 2*m);
241:   VecRestoreArray(xx,&x);
242:   VecRestoreArray(zz,&y);
243:   return(0);
244: }
247: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248: {
249:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
250:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
251:   PetscScalar   *x,*y,*v,alpha1,alpha2;
252:   int           ierr,m = b->AIJ->m,n,i,*idx;

255:   if (yy != zz) {VecCopy(yy,zz);}
256:   VecGetArray(xx,&x);
257:   VecGetArray(zz,&y);
258: 
259:   for (i=0; i<m; i++) {
260:     idx   = a->j + a->i[i] ;
261:     v     = a->a + a->i[i] ;
262:     n     = a->i[i+1] - a->i[i];
263:     alpha1 = x[2*i];
264:     alpha2 = x[2*i+1];
265:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266:   }
267:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268:   VecRestoreArray(xx,&x);
269:   VecRestoreArray(zz,&y);
270:   return(0);
271: }
272: /* --------------------------------------------------------------------------------------*/
275: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276: {
277:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
278:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
279:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
280:   int           ierr,m = b->AIJ->m,*idx,*ii;
281:   int           n,i,jrow,j;

284:   VecGetArray(xx,&x);
285:   VecGetArray(yy,&y);
286:   idx  = a->j;
287:   v    = a->a;
288:   ii   = a->i;

290:   for (i=0; i<m; i++) {
291:     jrow = ii[i];
292:     n    = ii[i+1] - jrow;
293:     sum1  = 0.0;
294:     sum2  = 0.0;
295:     sum3  = 0.0;
296:     for (j=0; j<n; j++) {
297:       sum1 += v[jrow]*x[3*idx[jrow]];
298:       sum2 += v[jrow]*x[3*idx[jrow]+1];
299:       sum3 += v[jrow]*x[3*idx[jrow]+2];
300:       jrow++;
301:      }
302:     y[3*i]   = sum1;
303:     y[3*i+1] = sum2;
304:     y[3*i+2] = sum3;
305:   }

307:   PetscLogFlops(6*a->nz - 3*m);
308:   VecRestoreArray(xx,&x);
309:   VecRestoreArray(yy,&y);
310:   return(0);
311: }

315: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
318:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
319:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320:   int           ierr,m = b->AIJ->m,n,i,*idx;

323:   VecSet(&zero,yy);
324:   VecGetArray(xx,&x);
325:   VecGetArray(yy,&y);
326: 
327:   for (i=0; i<m; i++) {
328:     idx    = a->j + a->i[i];
329:     v      = a->a + a->i[i];
330:     n      = a->i[i+1] - a->i[i];
331:     alpha1 = x[3*i];
332:     alpha2 = x[3*i+1];
333:     alpha3 = x[3*i+2];
334:     while (n-->0) {
335:       y[3*(*idx)]   += alpha1*(*v);
336:       y[3*(*idx)+1] += alpha2*(*v);
337:       y[3*(*idx)+2] += alpha3*(*v);
338:       idx++; v++;
339:     }
340:   }
341:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342:   VecRestoreArray(xx,&x);
343:   VecRestoreArray(yy,&y);
344:   return(0);
345: }

349: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350: {
351:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
352:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
353:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
354:   int           ierr,m = b->AIJ->m,*idx,*ii;
355:   int           n,i,jrow,j;

358:   if (yy != zz) {VecCopy(yy,zz);}
359:   VecGetArray(xx,&x);
360:   VecGetArray(zz,&y);
361:   idx  = a->j;
362:   v    = a->a;
363:   ii   = a->i;

365:   for (i=0; i<m; i++) {
366:     jrow = ii[i];
367:     n    = ii[i+1] - jrow;
368:     sum1  = 0.0;
369:     sum2  = 0.0;
370:     sum3  = 0.0;
371:     for (j=0; j<n; j++) {
372:       sum1 += v[jrow]*x[3*idx[jrow]];
373:       sum2 += v[jrow]*x[3*idx[jrow]+1];
374:       sum3 += v[jrow]*x[3*idx[jrow]+2];
375:       jrow++;
376:      }
377:     y[3*i]   += sum1;
378:     y[3*i+1] += sum2;
379:     y[3*i+2] += sum3;
380:   }

382:   PetscLogFlops(6*a->nz);
383:   VecRestoreArray(xx,&x);
384:   VecRestoreArray(zz,&y);
385:   return(0);
386: }
389: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390: {
391:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
392:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
393:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
394:   int           ierr,m = b->AIJ->m,n,i,*idx;

397:   if (yy != zz) {VecCopy(yy,zz);}
398:   VecGetArray(xx,&x);
399:   VecGetArray(zz,&y);
400:   for (i=0; i<m; i++) {
401:     idx    = a->j + a->i[i] ;
402:     v      = a->a + a->i[i] ;
403:     n      = a->i[i+1] - a->i[i];
404:     alpha1 = x[3*i];
405:     alpha2 = x[3*i+1];
406:     alpha3 = x[3*i+2];
407:     while (n-->0) {
408:       y[3*(*idx)]   += alpha1*(*v);
409:       y[3*(*idx)+1] += alpha2*(*v);
410:       y[3*(*idx)+2] += alpha3*(*v);
411:       idx++; v++;
412:     }
413:   }
414:   PetscLogFlops(6*a->nz);
415:   VecRestoreArray(xx,&x);
416:   VecRestoreArray(zz,&y);
417:   return(0);
418: }

420: /* ------------------------------------------------------------------------------*/
423: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424: {
425:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
426:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
427:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
428:   int           ierr,m = b->AIJ->m,*idx,*ii;
429:   int           n,i,jrow,j;

432:   VecGetArray(xx,&x);
433:   VecGetArray(yy,&y);
434:   idx  = a->j;
435:   v    = a->a;
436:   ii   = a->i;

438:   for (i=0; i<m; i++) {
439:     jrow = ii[i];
440:     n    = ii[i+1] - jrow;
441:     sum1  = 0.0;
442:     sum2  = 0.0;
443:     sum3  = 0.0;
444:     sum4  = 0.0;
445:     for (j=0; j<n; j++) {
446:       sum1 += v[jrow]*x[4*idx[jrow]];
447:       sum2 += v[jrow]*x[4*idx[jrow]+1];
448:       sum3 += v[jrow]*x[4*idx[jrow]+2];
449:       sum4 += v[jrow]*x[4*idx[jrow]+3];
450:       jrow++;
451:      }
452:     y[4*i]   = sum1;
453:     y[4*i+1] = sum2;
454:     y[4*i+2] = sum3;
455:     y[4*i+3] = sum4;
456:   }

458:   PetscLogFlops(8*a->nz - 4*m);
459:   VecRestoreArray(xx,&x);
460:   VecRestoreArray(yy,&y);
461:   return(0);
462: }

466: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467: {
468:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
470:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471:   int           ierr,m = b->AIJ->m,n,i,*idx;

474:   VecSet(&zero,yy);
475:   VecGetArray(xx,&x);
476:   VecGetArray(yy,&y);
477:   for (i=0; i<m; i++) {
478:     idx    = a->j + a->i[i] ;
479:     v      = a->a + a->i[i] ;
480:     n      = a->i[i+1] - a->i[i];
481:     alpha1 = x[4*i];
482:     alpha2 = x[4*i+1];
483:     alpha3 = x[4*i+2];
484:     alpha4 = x[4*i+3];
485:     while (n-->0) {
486:       y[4*(*idx)]   += alpha1*(*v);
487:       y[4*(*idx)+1] += alpha2*(*v);
488:       y[4*(*idx)+2] += alpha3*(*v);
489:       y[4*(*idx)+3] += alpha4*(*v);
490:       idx++; v++;
491:     }
492:   }
493:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494:   VecRestoreArray(xx,&x);
495:   VecRestoreArray(yy,&y);
496:   return(0);
497: }

501: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506:   int           ierr,m = b->AIJ->m,*idx,*ii;
507:   int           n,i,jrow,j;

510:   if (yy != zz) {VecCopy(yy,zz);}
511:   VecGetArray(xx,&x);
512:   VecGetArray(zz,&y);
513:   idx  = a->j;
514:   v    = a->a;
515:   ii   = a->i;

517:   for (i=0; i<m; i++) {
518:     jrow = ii[i];
519:     n    = ii[i+1] - jrow;
520:     sum1  = 0.0;
521:     sum2  = 0.0;
522:     sum3  = 0.0;
523:     sum4  = 0.0;
524:     for (j=0; j<n; j++) {
525:       sum1 += v[jrow]*x[4*idx[jrow]];
526:       sum2 += v[jrow]*x[4*idx[jrow]+1];
527:       sum3 += v[jrow]*x[4*idx[jrow]+2];
528:       sum4 += v[jrow]*x[4*idx[jrow]+3];
529:       jrow++;
530:      }
531:     y[4*i]   += sum1;
532:     y[4*i+1] += sum2;
533:     y[4*i+2] += sum3;
534:     y[4*i+3] += sum4;
535:   }

537:   PetscLogFlops(8*a->nz - 4*m);
538:   VecRestoreArray(xx,&x);
539:   VecRestoreArray(zz,&y);
540:   return(0);
541: }
544: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545: {
546:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
547:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
548:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549:   int           ierr,m = b->AIJ->m,n,i,*idx;

552:   if (yy != zz) {VecCopy(yy,zz);}
553:   VecGetArray(xx,&x);
554:   VecGetArray(zz,&y);
555: 
556:   for (i=0; i<m; i++) {
557:     idx    = a->j + a->i[i] ;
558:     v      = a->a + a->i[i] ;
559:     n      = a->i[i+1] - a->i[i];
560:     alpha1 = x[4*i];
561:     alpha2 = x[4*i+1];
562:     alpha3 = x[4*i+2];
563:     alpha4 = x[4*i+3];
564:     while (n-->0) {
565:       y[4*(*idx)]   += alpha1*(*v);
566:       y[4*(*idx)+1] += alpha2*(*v);
567:       y[4*(*idx)+2] += alpha3*(*v);
568:       y[4*(*idx)+3] += alpha4*(*v);
569:       idx++; v++;
570:     }
571:   }
572:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573:   VecRestoreArray(xx,&x);
574:   VecRestoreArray(zz,&y);
575:   return(0);
576: }
577: /* ------------------------------------------------------------------------------*/

581: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582: {
583:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
584:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
585:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586:   int           ierr,m = b->AIJ->m,*idx,*ii;
587:   int           n,i,jrow,j;

590:   VecGetArray(xx,&x);
591:   VecGetArray(yy,&y);
592:   idx  = a->j;
593:   v    = a->a;
594:   ii   = a->i;

596:   for (i=0; i<m; i++) {
597:     jrow = ii[i];
598:     n    = ii[i+1] - jrow;
599:     sum1  = 0.0;
600:     sum2  = 0.0;
601:     sum3  = 0.0;
602:     sum4  = 0.0;
603:     sum5  = 0.0;
604:     for (j=0; j<n; j++) {
605:       sum1 += v[jrow]*x[5*idx[jrow]];
606:       sum2 += v[jrow]*x[5*idx[jrow]+1];
607:       sum3 += v[jrow]*x[5*idx[jrow]+2];
608:       sum4 += v[jrow]*x[5*idx[jrow]+3];
609:       sum5 += v[jrow]*x[5*idx[jrow]+4];
610:       jrow++;
611:      }
612:     y[5*i]   = sum1;
613:     y[5*i+1] = sum2;
614:     y[5*i+2] = sum3;
615:     y[5*i+3] = sum4;
616:     y[5*i+4] = sum5;
617:   }

619:   PetscLogFlops(10*a->nz - 5*m);
620:   VecRestoreArray(xx,&x);
621:   VecRestoreArray(yy,&y);
622:   return(0);
623: }

627: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628: {
629:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
630:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
631:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632:   int           ierr,m = b->AIJ->m,n,i,*idx;

635:   VecSet(&zero,yy);
636:   VecGetArray(xx,&x);
637:   VecGetArray(yy,&y);
638: 
639:   for (i=0; i<m; i++) {
640:     idx    = a->j + a->i[i] ;
641:     v      = a->a + a->i[i] ;
642:     n      = a->i[i+1] - a->i[i];
643:     alpha1 = x[5*i];
644:     alpha2 = x[5*i+1];
645:     alpha3 = x[5*i+2];
646:     alpha4 = x[5*i+3];
647:     alpha5 = x[5*i+4];
648:     while (n-->0) {
649:       y[5*(*idx)]   += alpha1*(*v);
650:       y[5*(*idx)+1] += alpha2*(*v);
651:       y[5*(*idx)+2] += alpha3*(*v);
652:       y[5*(*idx)+3] += alpha4*(*v);
653:       y[5*(*idx)+4] += alpha5*(*v);
654:       idx++; v++;
655:     }
656:   }
657:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658:   VecRestoreArray(xx,&x);
659:   VecRestoreArray(yy,&y);
660:   return(0);
661: }

665: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
668:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
669:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670:   int           ierr,m = b->AIJ->m,*idx,*ii;
671:   int           n,i,jrow,j;

674:   if (yy != zz) {VecCopy(yy,zz);}
675:   VecGetArray(xx,&x);
676:   VecGetArray(zz,&y);
677:   idx  = a->j;
678:   v    = a->a;
679:   ii   = a->i;

681:   for (i=0; i<m; i++) {
682:     jrow = ii[i];
683:     n    = ii[i+1] - jrow;
684:     sum1  = 0.0;
685:     sum2  = 0.0;
686:     sum3  = 0.0;
687:     sum4  = 0.0;
688:     sum5  = 0.0;
689:     for (j=0; j<n; j++) {
690:       sum1 += v[jrow]*x[5*idx[jrow]];
691:       sum2 += v[jrow]*x[5*idx[jrow]+1];
692:       sum3 += v[jrow]*x[5*idx[jrow]+2];
693:       sum4 += v[jrow]*x[5*idx[jrow]+3];
694:       sum5 += v[jrow]*x[5*idx[jrow]+4];
695:       jrow++;
696:      }
697:     y[5*i]   += sum1;
698:     y[5*i+1] += sum2;
699:     y[5*i+2] += sum3;
700:     y[5*i+3] += sum4;
701:     y[5*i+4] += sum5;
702:   }

704:   PetscLogFlops(10*a->nz);
705:   VecRestoreArray(xx,&x);
706:   VecRestoreArray(zz,&y);
707:   return(0);
708: }

712: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713: {
714:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
715:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
716:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717:   int           ierr,m = b->AIJ->m,n,i,*idx;

720:   if (yy != zz) {VecCopy(yy,zz);}
721:   VecGetArray(xx,&x);
722:   VecGetArray(zz,&y);
723: 
724:   for (i=0; i<m; i++) {
725:     idx    = a->j + a->i[i] ;
726:     v      = a->a + a->i[i] ;
727:     n      = a->i[i+1] - a->i[i];
728:     alpha1 = x[5*i];
729:     alpha2 = x[5*i+1];
730:     alpha3 = x[5*i+2];
731:     alpha4 = x[5*i+3];
732:     alpha5 = x[5*i+4];
733:     while (n-->0) {
734:       y[5*(*idx)]   += alpha1*(*v);
735:       y[5*(*idx)+1] += alpha2*(*v);
736:       y[5*(*idx)+2] += alpha3*(*v);
737:       y[5*(*idx)+3] += alpha4*(*v);
738:       y[5*(*idx)+4] += alpha5*(*v);
739:       idx++; v++;
740:     }
741:   }
742:   PetscLogFlops(10*a->nz);
743:   VecRestoreArray(xx,&x);
744:   VecRestoreArray(zz,&y);
745:   return(0);
746: }

748: /* ------------------------------------------------------------------------------*/
751: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752: {
753:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
754:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
755:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756:   int           ierr,m = b->AIJ->m,*idx,*ii;
757:   int           n,i,jrow,j;

760:   VecGetArray(xx,&x);
761:   VecGetArray(yy,&y);
762:   idx  = a->j;
763:   v    = a->a;
764:   ii   = a->i;

766:   for (i=0; i<m; i++) {
767:     jrow = ii[i];
768:     n    = ii[i+1] - jrow;
769:     sum1  = 0.0;
770:     sum2  = 0.0;
771:     sum3  = 0.0;
772:     sum4  = 0.0;
773:     sum5  = 0.0;
774:     sum6  = 0.0;
775:     for (j=0; j<n; j++) {
776:       sum1 += v[jrow]*x[6*idx[jrow]];
777:       sum2 += v[jrow]*x[6*idx[jrow]+1];
778:       sum3 += v[jrow]*x[6*idx[jrow]+2];
779:       sum4 += v[jrow]*x[6*idx[jrow]+3];
780:       sum5 += v[jrow]*x[6*idx[jrow]+4];
781:       sum6 += v[jrow]*x[6*idx[jrow]+5];
782:       jrow++;
783:      }
784:     y[6*i]   = sum1;
785:     y[6*i+1] = sum2;
786:     y[6*i+2] = sum3;
787:     y[6*i+3] = sum4;
788:     y[6*i+4] = sum5;
789:     y[6*i+5] = sum6;
790:   }

792:   PetscLogFlops(12*a->nz - 6*m);
793:   VecRestoreArray(xx,&x);
794:   VecRestoreArray(yy,&y);
795:   return(0);
796: }

800: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801: {
802:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
803:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
804:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805:   int           ierr,m = b->AIJ->m,n,i,*idx;

808:   VecSet(&zero,yy);
809:   VecGetArray(xx,&x);
810:   VecGetArray(yy,&y);

812:   for (i=0; i<m; i++) {
813:     idx    = a->j + a->i[i] ;
814:     v      = a->a + a->i[i] ;
815:     n      = a->i[i+1] - a->i[i];
816:     alpha1 = x[6*i];
817:     alpha2 = x[6*i+1];
818:     alpha3 = x[6*i+2];
819:     alpha4 = x[6*i+3];
820:     alpha5 = x[6*i+4];
821:     alpha6 = x[6*i+5];
822:     while (n-->0) {
823:       y[6*(*idx)]   += alpha1*(*v);
824:       y[6*(*idx)+1] += alpha2*(*v);
825:       y[6*(*idx)+2] += alpha3*(*v);
826:       y[6*(*idx)+3] += alpha4*(*v);
827:       y[6*(*idx)+4] += alpha5*(*v);
828:       y[6*(*idx)+5] += alpha6*(*v);
829:       idx++; v++;
830:     }
831:   }
832:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833:   VecRestoreArray(xx,&x);
834:   VecRestoreArray(yy,&y);
835:   return(0);
836: }

840: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841: {
842:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
843:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
844:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845:   int           ierr,m = b->AIJ->m,*idx,*ii;
846:   int           n,i,jrow,j;

849:   if (yy != zz) {VecCopy(yy,zz);}
850:   VecGetArray(xx,&x);
851:   VecGetArray(zz,&y);
852:   idx  = a->j;
853:   v    = a->a;
854:   ii   = a->i;

856:   for (i=0; i<m; i++) {
857:     jrow = ii[i];
858:     n    = ii[i+1] - jrow;
859:     sum1  = 0.0;
860:     sum2  = 0.0;
861:     sum3  = 0.0;
862:     sum4  = 0.0;
863:     sum5  = 0.0;
864:     sum6  = 0.0;
865:     for (j=0; j<n; j++) {
866:       sum1 += v[jrow]*x[6*idx[jrow]];
867:       sum2 += v[jrow]*x[6*idx[jrow]+1];
868:       sum3 += v[jrow]*x[6*idx[jrow]+2];
869:       sum4 += v[jrow]*x[6*idx[jrow]+3];
870:       sum5 += v[jrow]*x[6*idx[jrow]+4];
871:       sum6 += v[jrow]*x[6*idx[jrow]+5];
872:       jrow++;
873:      }
874:     y[6*i]   += sum1;
875:     y[6*i+1] += sum2;
876:     y[6*i+2] += sum3;
877:     y[6*i+3] += sum4;
878:     y[6*i+4] += sum5;
879:     y[6*i+5] += sum6;
880:   }

882:   PetscLogFlops(12*a->nz);
883:   VecRestoreArray(xx,&x);
884:   VecRestoreArray(zz,&y);
885:   return(0);
886: }

890: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
893:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
894:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895:   int           ierr,m = b->AIJ->m,n,i,*idx;

898:   if (yy != zz) {VecCopy(yy,zz);}
899:   VecGetArray(xx,&x);
900:   VecGetArray(zz,&y);
901: 
902:   for (i=0; i<m; i++) {
903:     idx    = a->j + a->i[i] ;
904:     v      = a->a + a->i[i] ;
905:     n      = a->i[i+1] - a->i[i];
906:     alpha1 = x[6*i];
907:     alpha2 = x[6*i+1];
908:     alpha3 = x[6*i+2];
909:     alpha4 = x[6*i+3];
910:     alpha5 = x[6*i+4];
911:     alpha6 = x[6*i+5];
912:     while (n-->0) {
913:       y[6*(*idx)]   += alpha1*(*v);
914:       y[6*(*idx)+1] += alpha2*(*v);
915:       y[6*(*idx)+2] += alpha3*(*v);
916:       y[6*(*idx)+3] += alpha4*(*v);
917:       y[6*(*idx)+4] += alpha5*(*v);
918:       y[6*(*idx)+5] += alpha6*(*v);
919:       idx++; v++;
920:     }
921:   }
922:   PetscLogFlops(12*a->nz);
923:   VecRestoreArray(xx,&x);
924:   VecRestoreArray(zz,&y);
925:   return(0);
926: }

928: /* ------------------------------------------------------------------------------*/
931: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932: {
933:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
934:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
935:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936:   int           ierr,m = b->AIJ->m,*idx,*ii;
937:   int           n,i,jrow,j;

940:   VecGetArray(xx,&x);
941:   VecGetArray(yy,&y);
942:   idx  = a->j;
943:   v    = a->a;
944:   ii   = a->i;

946:   for (i=0; i<m; i++) {
947:     jrow = ii[i];
948:     n    = ii[i+1] - jrow;
949:     sum1  = 0.0;
950:     sum2  = 0.0;
951:     sum3  = 0.0;
952:     sum4  = 0.0;
953:     sum5  = 0.0;
954:     sum6  = 0.0;
955:     sum7  = 0.0;
956:     sum8  = 0.0;
957:     for (j=0; j<n; j++) {
958:       sum1 += v[jrow]*x[8*idx[jrow]];
959:       sum2 += v[jrow]*x[8*idx[jrow]+1];
960:       sum3 += v[jrow]*x[8*idx[jrow]+2];
961:       sum4 += v[jrow]*x[8*idx[jrow]+3];
962:       sum5 += v[jrow]*x[8*idx[jrow]+4];
963:       sum6 += v[jrow]*x[8*idx[jrow]+5];
964:       sum7 += v[jrow]*x[8*idx[jrow]+6];
965:       sum8 += v[jrow]*x[8*idx[jrow]+7];
966:       jrow++;
967:      }
968:     y[8*i]   = sum1;
969:     y[8*i+1] = sum2;
970:     y[8*i+2] = sum3;
971:     y[8*i+3] = sum4;
972:     y[8*i+4] = sum5;
973:     y[8*i+5] = sum6;
974:     y[8*i+6] = sum7;
975:     y[8*i+7] = sum8;
976:   }

978:   PetscLogFlops(16*a->nz - 8*m);
979:   VecRestoreArray(xx,&x);
980:   VecRestoreArray(yy,&y);
981:   return(0);
982: }

986: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987: {
988:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
989:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
990:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991:   int           ierr,m = b->AIJ->m,n,i,*idx;

994:   VecSet(&zero,yy);
995:   VecGetArray(xx,&x);
996:   VecGetArray(yy,&y);

998:   for (i=0; i<m; i++) {
999:     idx    = a->j + a->i[i] ;
1000:     v      = a->a + a->i[i] ;
1001:     n      = a->i[i+1] - a->i[i];
1002:     alpha1 = x[8*i];
1003:     alpha2 = x[8*i+1];
1004:     alpha3 = x[8*i+2];
1005:     alpha4 = x[8*i+3];
1006:     alpha5 = x[8*i+4];
1007:     alpha6 = x[8*i+5];
1008:     alpha7 = x[8*i+6];
1009:     alpha8 = x[8*i+7];
1010:     while (n-->0) {
1011:       y[8*(*idx)]   += alpha1*(*v);
1012:       y[8*(*idx)+1] += alpha2*(*v);
1013:       y[8*(*idx)+2] += alpha3*(*v);
1014:       y[8*(*idx)+3] += alpha4*(*v);
1015:       y[8*(*idx)+4] += alpha5*(*v);
1016:       y[8*(*idx)+5] += alpha6*(*v);
1017:       y[8*(*idx)+6] += alpha7*(*v);
1018:       y[8*(*idx)+7] += alpha8*(*v);
1019:       idx++; v++;
1020:     }
1021:   }
1022:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023:   VecRestoreArray(xx,&x);
1024:   VecRestoreArray(yy,&y);
1025:   return(0);
1026: }

1030: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031: {
1032:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1033:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1034:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035:   int           ierr,m = b->AIJ->m,*idx,*ii;
1036:   int           n,i,jrow,j;

1039:   if (yy != zz) {VecCopy(yy,zz);}
1040:   VecGetArray(xx,&x);
1041:   VecGetArray(zz,&y);
1042:   idx  = a->j;
1043:   v    = a->a;
1044:   ii   = a->i;

1046:   for (i=0; i<m; i++) {
1047:     jrow = ii[i];
1048:     n    = ii[i+1] - jrow;
1049:     sum1  = 0.0;
1050:     sum2  = 0.0;
1051:     sum3  = 0.0;
1052:     sum4  = 0.0;
1053:     sum5  = 0.0;
1054:     sum6  = 0.0;
1055:     sum7  = 0.0;
1056:     sum8  = 0.0;
1057:     for (j=0; j<n; j++) {
1058:       sum1 += v[jrow]*x[8*idx[jrow]];
1059:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1060:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1061:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1062:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1063:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1064:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1065:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1066:       jrow++;
1067:      }
1068:     y[8*i]   += sum1;
1069:     y[8*i+1] += sum2;
1070:     y[8*i+2] += sum3;
1071:     y[8*i+3] += sum4;
1072:     y[8*i+4] += sum5;
1073:     y[8*i+5] += sum6;
1074:     y[8*i+6] += sum7;
1075:     y[8*i+7] += sum8;
1076:   }

1078:   PetscLogFlops(16*a->nz);
1079:   VecRestoreArray(xx,&x);
1080:   VecRestoreArray(zz,&y);
1081:   return(0);
1082: }

1086: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087: {
1088:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1089:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1090:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091:   int           ierr,m = b->AIJ->m,n,i,*idx;

1094:   if (yy != zz) {VecCopy(yy,zz);}
1095:   VecGetArray(xx,&x);
1096:   VecGetArray(zz,&y);
1097:   for (i=0; i<m; i++) {
1098:     idx    = a->j + a->i[i] ;
1099:     v      = a->a + a->i[i] ;
1100:     n      = a->i[i+1] - a->i[i];
1101:     alpha1 = x[8*i];
1102:     alpha2 = x[8*i+1];
1103:     alpha3 = x[8*i+2];
1104:     alpha4 = x[8*i+3];
1105:     alpha5 = x[8*i+4];
1106:     alpha6 = x[8*i+5];
1107:     alpha7 = x[8*i+6];
1108:     alpha8 = x[8*i+7];
1109:     while (n-->0) {
1110:       y[8*(*idx)]   += alpha1*(*v);
1111:       y[8*(*idx)+1] += alpha2*(*v);
1112:       y[8*(*idx)+2] += alpha3*(*v);
1113:       y[8*(*idx)+3] += alpha4*(*v);
1114:       y[8*(*idx)+4] += alpha5*(*v);
1115:       y[8*(*idx)+5] += alpha6*(*v);
1116:       y[8*(*idx)+6] += alpha7*(*v);
1117:       y[8*(*idx)+7] += alpha8*(*v);
1118:       idx++; v++;
1119:     }
1120:   }
1121:   PetscLogFlops(16*a->nz);
1122:   VecRestoreArray(xx,&x);
1123:   VecRestoreArray(zz,&y);
1124:   return(0);
1125: }

1127: /* ------------------------------------------------------------------------------*/
1130: int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1131: {
1132:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1133:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1134:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1135:   int           ierr,m = b->AIJ->m,*idx,*ii;
1136:   int           n,i,jrow,j;

1139:   VecGetArray(xx,&x);
1140:   VecGetArray(yy,&y);
1141:   idx  = a->j;
1142:   v    = a->a;
1143:   ii   = a->i;

1145:   for (i=0; i<m; i++) {
1146:     jrow = ii[i];
1147:     n    = ii[i+1] - jrow;
1148:     sum1  = 0.0;
1149:     sum2  = 0.0;
1150:     sum3  = 0.0;
1151:     sum4  = 0.0;
1152:     sum5  = 0.0;
1153:     sum6  = 0.0;
1154:     sum7  = 0.0;
1155:     sum8  = 0.0;
1156:     sum9  = 0.0;
1157:     for (j=0; j<n; j++) {
1158:       sum1 += v[jrow]*x[9*idx[jrow]];
1159:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1160:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1161:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1162:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1163:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1164:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1165:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1166:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1167:       jrow++;
1168:      }
1169:     y[9*i]   = sum1;
1170:     y[9*i+1] = sum2;
1171:     y[9*i+2] = sum3;
1172:     y[9*i+3] = sum4;
1173:     y[9*i+4] = sum5;
1174:     y[9*i+5] = sum6;
1175:     y[9*i+6] = sum7;
1176:     y[9*i+7] = sum8;
1177:     y[9*i+8] = sum9;
1178:   }

1180:   PetscLogFlops(18*a->nz - 9*m);
1181:   VecRestoreArray(xx,&x);
1182:   VecRestoreArray(yy,&y);
1183:   return(0);
1184: }

1188: int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1189: {
1190:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1191:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1192:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1193:   int           ierr,m = b->AIJ->m,n,i,*idx;

1196:   VecSet(&zero,yy);
1197:   VecGetArray(xx,&x);
1198:   VecGetArray(yy,&y);

1200:   for (i=0; i<m; i++) {
1201:     idx    = a->j + a->i[i] ;
1202:     v      = a->a + a->i[i] ;
1203:     n      = a->i[i+1] - a->i[i];
1204:     alpha1 = x[9*i];
1205:     alpha2 = x[9*i+1];
1206:     alpha3 = x[9*i+2];
1207:     alpha4 = x[9*i+3];
1208:     alpha5 = x[9*i+4];
1209:     alpha6 = x[9*i+5];
1210:     alpha7 = x[9*i+6];
1211:     alpha8 = x[9*i+7];
1212:     alpha9 = x[9*i+8];
1213:     while (n-->0) {
1214:       y[9*(*idx)]   += alpha1*(*v);
1215:       y[9*(*idx)+1] += alpha2*(*v);
1216:       y[9*(*idx)+2] += alpha3*(*v);
1217:       y[9*(*idx)+3] += alpha4*(*v);
1218:       y[9*(*idx)+4] += alpha5*(*v);
1219:       y[9*(*idx)+5] += alpha6*(*v);
1220:       y[9*(*idx)+6] += alpha7*(*v);
1221:       y[9*(*idx)+7] += alpha8*(*v);
1222:       y[9*(*idx)+8] += alpha9*(*v);
1223:       idx++; v++;
1224:     }
1225:   }
1226:   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1227:   VecRestoreArray(xx,&x);
1228:   VecRestoreArray(yy,&y);
1229:   return(0);
1230: }

1234: int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1235: {
1236:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1237:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1238:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1239:   int           ierr,m = b->AIJ->m,*idx,*ii;
1240:   int           n,i,jrow,j;

1243:   if (yy != zz) {VecCopy(yy,zz);}
1244:   VecGetArray(xx,&x);
1245:   VecGetArray(zz,&y);
1246:   idx  = a->j;
1247:   v    = a->a;
1248:   ii   = a->i;

1250:   for (i=0; i<m; i++) {
1251:     jrow = ii[i];
1252:     n    = ii[i+1] - jrow;
1253:     sum1  = 0.0;
1254:     sum2  = 0.0;
1255:     sum3  = 0.0;
1256:     sum4  = 0.0;
1257:     sum5  = 0.0;
1258:     sum6  = 0.0;
1259:     sum7  = 0.0;
1260:     sum8  = 0.0;
1261:     sum9  = 0.0;
1262:     for (j=0; j<n; j++) {
1263:       sum1 += v[jrow]*x[9*idx[jrow]];
1264:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1265:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1266:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1267:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1268:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1269:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1270:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1271:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1272:       jrow++;
1273:      }
1274:     y[9*i]   += sum1;
1275:     y[9*i+1] += sum2;
1276:     y[9*i+2] += sum3;
1277:     y[9*i+3] += sum4;
1278:     y[9*i+4] += sum5;
1279:     y[9*i+5] += sum6;
1280:     y[9*i+6] += sum7;
1281:     y[9*i+7] += sum8;
1282:     y[9*i+8] += sum9;
1283:   }

1285:   PetscLogFlops(18*a->nz);
1286:   VecRestoreArray(xx,&x);
1287:   VecRestoreArray(zz,&y);
1288:   return(0);
1289: }

1293: int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1294: {
1295:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1296:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1297:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1298:   int           ierr,m = b->AIJ->m,n,i,*idx;

1301:   if (yy != zz) {VecCopy(yy,zz);}
1302:   VecGetArray(xx,&x);
1303:   VecGetArray(zz,&y);
1304:   for (i=0; i<m; i++) {
1305:     idx    = a->j + a->i[i] ;
1306:     v      = a->a + a->i[i] ;
1307:     n      = a->i[i+1] - a->i[i];
1308:     alpha1 = x[9*i];
1309:     alpha2 = x[9*i+1];
1310:     alpha3 = x[9*i+2];
1311:     alpha4 = x[9*i+3];
1312:     alpha5 = x[9*i+4];
1313:     alpha6 = x[9*i+5];
1314:     alpha7 = x[9*i+6];
1315:     alpha8 = x[9*i+7];
1316:     alpha9 = x[9*i+8];
1317:     while (n-->0) {
1318:       y[9*(*idx)]   += alpha1*(*v);
1319:       y[9*(*idx)+1] += alpha2*(*v);
1320:       y[9*(*idx)+2] += alpha3*(*v);
1321:       y[9*(*idx)+3] += alpha4*(*v);
1322:       y[9*(*idx)+4] += alpha5*(*v);
1323:       y[9*(*idx)+5] += alpha6*(*v);
1324:       y[9*(*idx)+6] += alpha7*(*v);
1325:       y[9*(*idx)+7] += alpha8*(*v);
1326:       y[9*(*idx)+8] += alpha9*(*v);
1327:       idx++; v++;
1328:     }
1329:   }
1330:   PetscLogFlops(18*a->nz);
1331:   VecRestoreArray(xx,&x);
1332:   VecRestoreArray(zz,&y);
1333:   return(0);
1334: }

1336: /*--------------------------------------------------------------------------------------------*/
1339: int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1340: {
1341:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1342:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1343:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1344:   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1345:   int           ierr,m = b->AIJ->m,*idx,*ii;
1346:   int           n,i,jrow,j;

1349:   VecGetArray(xx,&x);
1350:   VecGetArray(yy,&y);
1351:   idx  = a->j;
1352:   v    = a->a;
1353:   ii   = a->i;

1355:   for (i=0; i<m; i++) {
1356:     jrow = ii[i];
1357:     n    = ii[i+1] - jrow;
1358:     sum1  = 0.0;
1359:     sum2  = 0.0;
1360:     sum3  = 0.0;
1361:     sum4  = 0.0;
1362:     sum5  = 0.0;
1363:     sum6  = 0.0;
1364:     sum7  = 0.0;
1365:     sum8  = 0.0;
1366:     sum9  = 0.0;
1367:     sum10 = 0.0;
1368:     sum11 = 0.0;
1369:     sum12 = 0.0;
1370:     sum13 = 0.0;
1371:     sum14 = 0.0;
1372:     sum15 = 0.0;
1373:     sum16 = 0.0;
1374:     for (j=0; j<n; j++) {
1375:       sum1  += v[jrow]*x[16*idx[jrow]];
1376:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1377:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1378:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1379:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1380:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1381:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1382:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1383:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1384:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1385:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1386:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1387:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1388:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1389:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1390:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1391:       jrow++;
1392:      }
1393:     y[16*i]    = sum1;
1394:     y[16*i+1]  = sum2;
1395:     y[16*i+2]  = sum3;
1396:     y[16*i+3]  = sum4;
1397:     y[16*i+4]  = sum5;
1398:     y[16*i+5]  = sum6;
1399:     y[16*i+6]  = sum7;
1400:     y[16*i+7]  = sum8;
1401:     y[16*i+8]  = sum9;
1402:     y[16*i+9]  = sum10;
1403:     y[16*i+10] = sum11;
1404:     y[16*i+11] = sum12;
1405:     y[16*i+12] = sum13;
1406:     y[16*i+13] = sum14;
1407:     y[16*i+14] = sum15;
1408:     y[16*i+15] = sum16;
1409:   }

1411:   PetscLogFlops(32*a->nz - 16*m);
1412:   VecRestoreArray(xx,&x);
1413:   VecRestoreArray(yy,&y);
1414:   return(0);
1415: }

1419: int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1420: {
1421:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1422:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1423:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1424:   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1425:   int           ierr,m = b->AIJ->m,n,i,*idx;

1428:   VecSet(&zero,yy);
1429:   VecGetArray(xx,&x);
1430:   VecGetArray(yy,&y);

1432:   for (i=0; i<m; i++) {
1433:     idx    = a->j + a->i[i] ;
1434:     v      = a->a + a->i[i] ;
1435:     n      = a->i[i+1] - a->i[i];
1436:     alpha1  = x[16*i];
1437:     alpha2  = x[16*i+1];
1438:     alpha3  = x[16*i+2];
1439:     alpha4  = x[16*i+3];
1440:     alpha5  = x[16*i+4];
1441:     alpha6  = x[16*i+5];
1442:     alpha7  = x[16*i+6];
1443:     alpha8  = x[16*i+7];
1444:     alpha9  = x[16*i+8];
1445:     alpha10 = x[16*i+9];
1446:     alpha11 = x[16*i+10];
1447:     alpha12 = x[16*i+11];
1448:     alpha13 = x[16*i+12];
1449:     alpha14 = x[16*i+13];
1450:     alpha15 = x[16*i+14];
1451:     alpha16 = x[16*i+15];
1452:     while (n-->0) {
1453:       y[16*(*idx)]    += alpha1*(*v);
1454:       y[16*(*idx)+1]  += alpha2*(*v);
1455:       y[16*(*idx)+2]  += alpha3*(*v);
1456:       y[16*(*idx)+3]  += alpha4*(*v);
1457:       y[16*(*idx)+4]  += alpha5*(*v);
1458:       y[16*(*idx)+5]  += alpha6*(*v);
1459:       y[16*(*idx)+6]  += alpha7*(*v);
1460:       y[16*(*idx)+7]  += alpha8*(*v);
1461:       y[16*(*idx)+8]  += alpha9*(*v);
1462:       y[16*(*idx)+9]  += alpha10*(*v);
1463:       y[16*(*idx)+10] += alpha11*(*v);
1464:       y[16*(*idx)+11] += alpha12*(*v);
1465:       y[16*(*idx)+12] += alpha13*(*v);
1466:       y[16*(*idx)+13] += alpha14*(*v);
1467:       y[16*(*idx)+14] += alpha15*(*v);
1468:       y[16*(*idx)+15] += alpha16*(*v);
1469:       idx++; v++;
1470:     }
1471:   }
1472:   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1473:   VecRestoreArray(xx,&x);
1474:   VecRestoreArray(yy,&y);
1475:   return(0);
1476: }

1480: int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1481: {
1482:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1483:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1484:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1485:   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1486:   int           ierr,m = b->AIJ->m,*idx,*ii;
1487:   int           n,i,jrow,j;

1490:   if (yy != zz) {VecCopy(yy,zz);}
1491:   VecGetArray(xx,&x);
1492:   VecGetArray(zz,&y);
1493:   idx  = a->j;
1494:   v    = a->a;
1495:   ii   = a->i;

1497:   for (i=0; i<m; i++) {
1498:     jrow = ii[i];
1499:     n    = ii[i+1] - jrow;
1500:     sum1  = 0.0;
1501:     sum2  = 0.0;
1502:     sum3  = 0.0;
1503:     sum4  = 0.0;
1504:     sum5  = 0.0;
1505:     sum6  = 0.0;
1506:     sum7  = 0.0;
1507:     sum8  = 0.0;
1508:     sum9  = 0.0;
1509:     sum10 = 0.0;
1510:     sum11 = 0.0;
1511:     sum12 = 0.0;
1512:     sum13 = 0.0;
1513:     sum14 = 0.0;
1514:     sum15 = 0.0;
1515:     sum16 = 0.0;
1516:     for (j=0; j<n; j++) {
1517:       sum1  += v[jrow]*x[16*idx[jrow]];
1518:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1519:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1520:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1521:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1522:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1523:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1524:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1525:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1526:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1527:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1528:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1529:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1530:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1531:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1532:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1533:       jrow++;
1534:      }
1535:     y[16*i]    += sum1;
1536:     y[16*i+1]  += sum2;
1537:     y[16*i+2]  += sum3;
1538:     y[16*i+3]  += sum4;
1539:     y[16*i+4]  += sum5;
1540:     y[16*i+5]  += sum6;
1541:     y[16*i+6]  += sum7;
1542:     y[16*i+7]  += sum8;
1543:     y[16*i+8]  += sum9;
1544:     y[16*i+9]  += sum10;
1545:     y[16*i+10] += sum11;
1546:     y[16*i+11] += sum12;
1547:     y[16*i+12] += sum13;
1548:     y[16*i+13] += sum14;
1549:     y[16*i+14] += sum15;
1550:     y[16*i+15] += sum16;
1551:   }

1553:   PetscLogFlops(32*a->nz);
1554:   VecRestoreArray(xx,&x);
1555:   VecRestoreArray(zz,&y);
1556:   return(0);
1557: }

1561: int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1562: {
1563:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1564:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1565:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1566:   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1567:   int           ierr,m = b->AIJ->m,n,i,*idx;

1570:   if (yy != zz) {VecCopy(yy,zz);}
1571:   VecGetArray(xx,&x);
1572:   VecGetArray(zz,&y);
1573:   for (i=0; i<m; i++) {
1574:     idx    = a->j + a->i[i] ;
1575:     v      = a->a + a->i[i] ;
1576:     n      = a->i[i+1] - a->i[i];
1577:     alpha1 = x[16*i];
1578:     alpha2 = x[16*i+1];
1579:     alpha3 = x[16*i+2];
1580:     alpha4 = x[16*i+3];
1581:     alpha5 = x[16*i+4];
1582:     alpha6 = x[16*i+5];
1583:     alpha7 = x[16*i+6];
1584:     alpha8 = x[16*i+7];
1585:     alpha9  = x[16*i+8];
1586:     alpha10 = x[16*i+9];
1587:     alpha11 = x[16*i+10];
1588:     alpha12 = x[16*i+11];
1589:     alpha13 = x[16*i+12];
1590:     alpha14 = x[16*i+13];
1591:     alpha15 = x[16*i+14];
1592:     alpha16 = x[16*i+15];
1593:     while (n-->0) {
1594:       y[16*(*idx)]   += alpha1*(*v);
1595:       y[16*(*idx)+1] += alpha2*(*v);
1596:       y[16*(*idx)+2] += alpha3*(*v);
1597:       y[16*(*idx)+3] += alpha4*(*v);
1598:       y[16*(*idx)+4] += alpha5*(*v);
1599:       y[16*(*idx)+5] += alpha6*(*v);
1600:       y[16*(*idx)+6] += alpha7*(*v);
1601:       y[16*(*idx)+7] += alpha8*(*v);
1602:       y[16*(*idx)+8]  += alpha9*(*v);
1603:       y[16*(*idx)+9]  += alpha10*(*v);
1604:       y[16*(*idx)+10] += alpha11*(*v);
1605:       y[16*(*idx)+11] += alpha12*(*v);
1606:       y[16*(*idx)+12] += alpha13*(*v);
1607:       y[16*(*idx)+13] += alpha14*(*v);
1608:       y[16*(*idx)+14] += alpha15*(*v);
1609:       y[16*(*idx)+15] += alpha16*(*v);
1610:       idx++; v++;
1611:     }
1612:   }
1613:   PetscLogFlops(32*a->nz);
1614:   VecRestoreArray(xx,&x);
1615:   VecRestoreArray(zz,&y);
1616:   return(0);
1617: }

1619: /*===================================================================================*/
1622: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1623: {
1624:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1625:   int         ierr;

1628:   /* start the scatter */
1629:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1630:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1631:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1632:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1633:   return(0);
1634: }

1638: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1639: {
1640:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1641:   int         ierr;
1643:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1644:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1645:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1646:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1647:   return(0);
1648: }

1652: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1653: {
1654:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1655:   int         ierr;

1658:   /* start the scatter */
1659:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1660:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1661:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1662:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1663:   return(0);
1664: }

1668: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1671:   int         ierr;
1673:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1674:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1675:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1676:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1677:   return(0);
1678: }

1680: /* ---------------------------------------------------------------------------------- */
1681: /*MC
1682:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
1683:   operations for multicomponent problems.  It interpolates each component the same
1684:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1685:   and MATMPIAIJ for distributed matrices.

1687:   Operations provided:
1688: . MatMult
1689: . MatMultTranspose
1690: . MatMultAdd
1691: . MatMultTransposeAdd

1693:   Level: advanced

1695: M*/
1698: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1699: {
1700:   int         ierr,size,n;
1701:   Mat_MPIMAIJ *b;
1702:   Mat         B;

1705:   PetscObjectReference((PetscObject)A);

1707:   if (dof == 1) {
1708:     *maij = A;
1709:   } else {
1710:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1711:     B->assembled    = PETSC_TRUE;

1713:     MPI_Comm_size(A->comm,&size);
1714:     if (size == 1) {
1715:       MatSetType(B,MATSEQMAIJ);
1716:       B->ops->destroy = MatDestroy_SeqMAIJ;
1717:       b      = (Mat_MPIMAIJ*)B->data;
1718:       b->dof = dof;
1719:       b->AIJ = A;
1720:       if (dof == 2) {
1721:         B->ops->mult             = MatMult_SeqMAIJ_2;
1722:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1723:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1724:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1725:       } else if (dof == 3) {
1726:         B->ops->mult             = MatMult_SeqMAIJ_3;
1727:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1728:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1729:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1730:       } else if (dof == 4) {
1731:         B->ops->mult             = MatMult_SeqMAIJ_4;
1732:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1733:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1734:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1735:       } else if (dof == 5) {
1736:         B->ops->mult             = MatMult_SeqMAIJ_5;
1737:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1738:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1739:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1740:       } else if (dof == 6) {
1741:         B->ops->mult             = MatMult_SeqMAIJ_6;
1742:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1743:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1744:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1745:       } else if (dof == 8) {
1746:         B->ops->mult             = MatMult_SeqMAIJ_8;
1747:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1748:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1749:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1750:       } else if (dof == 9) {
1751:         B->ops->mult             = MatMult_SeqMAIJ_9;
1752:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1753:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1754:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1755:       } else if (dof == 16) {
1756:         B->ops->mult             = MatMult_SeqMAIJ_16;
1757:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1758:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1759:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1760:       } else {
1761:         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1762:       }
1763:     } else {
1764:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1765:       IS         from,to;
1766:       Vec        gvec;
1767:       int        *garray,i;

1769:       MatSetType(B,MATMPIMAIJ);
1770:       B->ops->destroy = MatDestroy_MPIMAIJ;
1771:       b      = (Mat_MPIMAIJ*)B->data;
1772:       b->dof = dof;
1773:       b->A   = A;
1774:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1775:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1777:       VecGetSize(mpiaij->lvec,&n);
1778:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1780:       /* create two temporary Index sets for build scatter gather */
1781:       PetscMalloc((n+1)*sizeof(int),&garray);
1782:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1783:       ISCreateBlock(A->comm,dof,n,garray,&from);
1784:       PetscFree(garray);
1785:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1787:       /* create temporary global vector to generate scatter context */
1788:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1790:       /* generate the scatter context */
1791:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1793:       ISDestroy(from);
1794:       ISDestroy(to);
1795:       VecDestroy(gvec);

1797:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1798:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1799:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1800:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1801:     }
1802:     *maij = B;
1803:   }
1804:   return(0);
1805: }