Actual source code: maij.c
1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include src/mat/impls/maij/maij.h
20: #include vecimpl.h
24: int MatMAIJGetAIJ(Mat A,Mat *B)
25: {
26: int ierr;
27: PetscTruth ismpimaij,isseqmaij;
30: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
31: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
32: if (ismpimaij) {
33: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
35: *B = b->A;
36: } else if (isseqmaij) {
37: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
39: *B = b->AIJ;
40: } else {
41: *B = A;
42: }
43: return(0);
44: }
48: int MatMAIJRedimension(Mat A,int dof,Mat *B)
49: {
51: Mat Aij;
54: MatMAIJGetAIJ(A,&Aij);
55: MatCreateMAIJ(Aij,dof,B);
56: return(0);
57: }
61: int MatDestroy_SeqMAIJ(Mat A)
62: {
63: int ierr;
64: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
67: if (b->AIJ) {
68: MatDestroy(b->AIJ);
69: }
70: PetscFree(b);
71: return(0);
72: }
76: int MatDestroy_MPIMAIJ(Mat A)
77: {
78: int ierr;
79: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
82: if (b->AIJ) {
83: MatDestroy(b->AIJ);
84: }
85: if (b->OAIJ) {
86: MatDestroy(b->OAIJ);
87: }
88: if (b->A) {
89: MatDestroy(b->A);
90: }
91: if (b->ctx) {
92: VecScatterDestroy(b->ctx);
93: }
94: if (b->w) {
95: VecDestroy(b->w);
96: }
97: PetscFree(b);
98: return(0);
99: }
101: /*MC
102: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
103: multicomponent problems, interpolating or restricting each component the same way independently.
104: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
106: Operations provided:
107: . MatMult
108: . MatMultTranspose
109: . MatMultAdd
110: . MatMultTransposeAdd
112: Level: advanced
114: .seealso: MatCreateSeqDense
115: M*/
117: EXTERN_C_BEGIN
120: int MatCreate_MAIJ(Mat A)
121: {
122: int ierr;
123: Mat_MPIMAIJ *b;
126: PetscNew(Mat_MPIMAIJ,&b);
127: A->data = (void*)b;
128: PetscMemzero(b,sizeof(Mat_MPIMAIJ));
129: PetscMemzero(A->ops,sizeof(struct _MatOps));
130: A->factor = 0;
131: A->mapping = 0;
133: b->AIJ = 0;
134: b->dof = 0;
135: b->OAIJ = 0;
136: b->ctx = 0;
137: b->w = 0;
138: return(0);
139: }
140: EXTERN_C_END
142: /* --------------------------------------------------------------------------------------*/
145: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146: {
147: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
148: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
149: PetscScalar *x,*y,*v,sum1, sum2;
150: int ierr,m = b->AIJ->m,*idx,*ii;
151: int n,i,jrow,j;
154: VecGetArray(xx,&x);
155: VecGetArray(yy,&y);
156: idx = a->j;
157: v = a->a;
158: ii = a->i;
160: for (i=0; i<m; i++) {
161: jrow = ii[i];
162: n = ii[i+1] - jrow;
163: sum1 = 0.0;
164: sum2 = 0.0;
165: for (j=0; j<n; j++) {
166: sum1 += v[jrow]*x[2*idx[jrow]];
167: sum2 += v[jrow]*x[2*idx[jrow]+1];
168: jrow++;
169: }
170: y[2*i] = sum1;
171: y[2*i+1] = sum2;
172: }
174: PetscLogFlops(4*a->nz - 2*m);
175: VecRestoreArray(xx,&x);
176: VecRestoreArray(yy,&y);
177: return(0);
178: }
182: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
187: int ierr,m = b->AIJ->m,n,i,*idx;
190: VecSet(&zero,yy);
191: VecGetArray(xx,&x);
192: VecGetArray(yy,&y);
193:
194: for (i=0; i<m; i++) {
195: idx = a->j + a->i[i] ;
196: v = a->a + a->i[i] ;
197: n = a->i[i+1] - a->i[i];
198: alpha1 = x[2*i];
199: alpha2 = x[2*i+1];
200: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201: }
202: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203: VecRestoreArray(xx,&x);
204: VecRestoreArray(yy,&y);
205: return(0);
206: }
210: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211: {
212: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
214: PetscScalar *x,*y,*v,sum1, sum2;
215: int ierr,m = b->AIJ->m,*idx,*ii;
216: int n,i,jrow,j;
219: if (yy != zz) {VecCopy(yy,zz);}
220: VecGetArray(xx,&x);
221: VecGetArray(zz,&y);
222: idx = a->j;
223: v = a->a;
224: ii = a->i;
226: for (i=0; i<m; i++) {
227: jrow = ii[i];
228: n = ii[i+1] - jrow;
229: sum1 = 0.0;
230: sum2 = 0.0;
231: for (j=0; j<n; j++) {
232: sum1 += v[jrow]*x[2*idx[jrow]];
233: sum2 += v[jrow]*x[2*idx[jrow]+1];
234: jrow++;
235: }
236: y[2*i] += sum1;
237: y[2*i+1] += sum2;
238: }
240: PetscLogFlops(4*a->nz - 2*m);
241: VecRestoreArray(xx,&x);
242: VecRestoreArray(zz,&y);
243: return(0);
244: }
247: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248: {
249: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
250: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
251: PetscScalar *x,*y,*v,alpha1,alpha2;
252: int ierr,m = b->AIJ->m,n,i,*idx;
255: if (yy != zz) {VecCopy(yy,zz);}
256: VecGetArray(xx,&x);
257: VecGetArray(zz,&y);
258:
259: for (i=0; i<m; i++) {
260: idx = a->j + a->i[i] ;
261: v = a->a + a->i[i] ;
262: n = a->i[i+1] - a->i[i];
263: alpha1 = x[2*i];
264: alpha2 = x[2*i+1];
265: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266: }
267: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268: VecRestoreArray(xx,&x);
269: VecRestoreArray(zz,&y);
270: return(0);
271: }
272: /* --------------------------------------------------------------------------------------*/
275: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276: {
277: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
279: PetscScalar *x,*y,*v,sum1, sum2, sum3;
280: int ierr,m = b->AIJ->m,*idx,*ii;
281: int n,i,jrow,j;
284: VecGetArray(xx,&x);
285: VecGetArray(yy,&y);
286: idx = a->j;
287: v = a->a;
288: ii = a->i;
290: for (i=0; i<m; i++) {
291: jrow = ii[i];
292: n = ii[i+1] - jrow;
293: sum1 = 0.0;
294: sum2 = 0.0;
295: sum3 = 0.0;
296: for (j=0; j<n; j++) {
297: sum1 += v[jrow]*x[3*idx[jrow]];
298: sum2 += v[jrow]*x[3*idx[jrow]+1];
299: sum3 += v[jrow]*x[3*idx[jrow]+2];
300: jrow++;
301: }
302: y[3*i] = sum1;
303: y[3*i+1] = sum2;
304: y[3*i+2] = sum3;
305: }
307: PetscLogFlops(6*a->nz - 3*m);
308: VecRestoreArray(xx,&x);
309: VecRestoreArray(yy,&y);
310: return(0);
311: }
315: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
318: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
319: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320: int ierr,m = b->AIJ->m,n,i,*idx;
323: VecSet(&zero,yy);
324: VecGetArray(xx,&x);
325: VecGetArray(yy,&y);
326:
327: for (i=0; i<m; i++) {
328: idx = a->j + a->i[i];
329: v = a->a + a->i[i];
330: n = a->i[i+1] - a->i[i];
331: alpha1 = x[3*i];
332: alpha2 = x[3*i+1];
333: alpha3 = x[3*i+2];
334: while (n-->0) {
335: y[3*(*idx)] += alpha1*(*v);
336: y[3*(*idx)+1] += alpha2*(*v);
337: y[3*(*idx)+2] += alpha3*(*v);
338: idx++; v++;
339: }
340: }
341: PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342: VecRestoreArray(xx,&x);
343: VecRestoreArray(yy,&y);
344: return(0);
345: }
349: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350: {
351: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
352: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
353: PetscScalar *x,*y,*v,sum1, sum2, sum3;
354: int ierr,m = b->AIJ->m,*idx,*ii;
355: int n,i,jrow,j;
358: if (yy != zz) {VecCopy(yy,zz);}
359: VecGetArray(xx,&x);
360: VecGetArray(zz,&y);
361: idx = a->j;
362: v = a->a;
363: ii = a->i;
365: for (i=0; i<m; i++) {
366: jrow = ii[i];
367: n = ii[i+1] - jrow;
368: sum1 = 0.0;
369: sum2 = 0.0;
370: sum3 = 0.0;
371: for (j=0; j<n; j++) {
372: sum1 += v[jrow]*x[3*idx[jrow]];
373: sum2 += v[jrow]*x[3*idx[jrow]+1];
374: sum3 += v[jrow]*x[3*idx[jrow]+2];
375: jrow++;
376: }
377: y[3*i] += sum1;
378: y[3*i+1] += sum2;
379: y[3*i+2] += sum3;
380: }
382: PetscLogFlops(6*a->nz);
383: VecRestoreArray(xx,&x);
384: VecRestoreArray(zz,&y);
385: return(0);
386: }
389: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390: {
391: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
392: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
393: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
394: int ierr,m = b->AIJ->m,n,i,*idx;
397: if (yy != zz) {VecCopy(yy,zz);}
398: VecGetArray(xx,&x);
399: VecGetArray(zz,&y);
400: for (i=0; i<m; i++) {
401: idx = a->j + a->i[i] ;
402: v = a->a + a->i[i] ;
403: n = a->i[i+1] - a->i[i];
404: alpha1 = x[3*i];
405: alpha2 = x[3*i+1];
406: alpha3 = x[3*i+2];
407: while (n-->0) {
408: y[3*(*idx)] += alpha1*(*v);
409: y[3*(*idx)+1] += alpha2*(*v);
410: y[3*(*idx)+2] += alpha3*(*v);
411: idx++; v++;
412: }
413: }
414: PetscLogFlops(6*a->nz);
415: VecRestoreArray(xx,&x);
416: VecRestoreArray(zz,&y);
417: return(0);
418: }
420: /* ------------------------------------------------------------------------------*/
423: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424: {
425: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
426: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
427: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
428: int ierr,m = b->AIJ->m,*idx,*ii;
429: int n,i,jrow,j;
432: VecGetArray(xx,&x);
433: VecGetArray(yy,&y);
434: idx = a->j;
435: v = a->a;
436: ii = a->i;
438: for (i=0; i<m; i++) {
439: jrow = ii[i];
440: n = ii[i+1] - jrow;
441: sum1 = 0.0;
442: sum2 = 0.0;
443: sum3 = 0.0;
444: sum4 = 0.0;
445: for (j=0; j<n; j++) {
446: sum1 += v[jrow]*x[4*idx[jrow]];
447: sum2 += v[jrow]*x[4*idx[jrow]+1];
448: sum3 += v[jrow]*x[4*idx[jrow]+2];
449: sum4 += v[jrow]*x[4*idx[jrow]+3];
450: jrow++;
451: }
452: y[4*i] = sum1;
453: y[4*i+1] = sum2;
454: y[4*i+2] = sum3;
455: y[4*i+3] = sum4;
456: }
458: PetscLogFlops(8*a->nz - 4*m);
459: VecRestoreArray(xx,&x);
460: VecRestoreArray(yy,&y);
461: return(0);
462: }
466: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467: {
468: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
469: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
470: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471: int ierr,m = b->AIJ->m,n,i,*idx;
474: VecSet(&zero,yy);
475: VecGetArray(xx,&x);
476: VecGetArray(yy,&y);
477: for (i=0; i<m; i++) {
478: idx = a->j + a->i[i] ;
479: v = a->a + a->i[i] ;
480: n = a->i[i+1] - a->i[i];
481: alpha1 = x[4*i];
482: alpha2 = x[4*i+1];
483: alpha3 = x[4*i+2];
484: alpha4 = x[4*i+3];
485: while (n-->0) {
486: y[4*(*idx)] += alpha1*(*v);
487: y[4*(*idx)+1] += alpha2*(*v);
488: y[4*(*idx)+2] += alpha3*(*v);
489: y[4*(*idx)+3] += alpha4*(*v);
490: idx++; v++;
491: }
492: }
493: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494: VecRestoreArray(xx,&x);
495: VecRestoreArray(yy,&y);
496: return(0);
497: }
501: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
504: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
505: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
506: int ierr,m = b->AIJ->m,*idx,*ii;
507: int n,i,jrow,j;
510: if (yy != zz) {VecCopy(yy,zz);}
511: VecGetArray(xx,&x);
512: VecGetArray(zz,&y);
513: idx = a->j;
514: v = a->a;
515: ii = a->i;
517: for (i=0; i<m; i++) {
518: jrow = ii[i];
519: n = ii[i+1] - jrow;
520: sum1 = 0.0;
521: sum2 = 0.0;
522: sum3 = 0.0;
523: sum4 = 0.0;
524: for (j=0; j<n; j++) {
525: sum1 += v[jrow]*x[4*idx[jrow]];
526: sum2 += v[jrow]*x[4*idx[jrow]+1];
527: sum3 += v[jrow]*x[4*idx[jrow]+2];
528: sum4 += v[jrow]*x[4*idx[jrow]+3];
529: jrow++;
530: }
531: y[4*i] += sum1;
532: y[4*i+1] += sum2;
533: y[4*i+2] += sum3;
534: y[4*i+3] += sum4;
535: }
537: PetscLogFlops(8*a->nz - 4*m);
538: VecRestoreArray(xx,&x);
539: VecRestoreArray(zz,&y);
540: return(0);
541: }
544: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545: {
546: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
547: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
548: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549: int ierr,m = b->AIJ->m,n,i,*idx;
552: if (yy != zz) {VecCopy(yy,zz);}
553: VecGetArray(xx,&x);
554: VecGetArray(zz,&y);
555:
556: for (i=0; i<m; i++) {
557: idx = a->j + a->i[i] ;
558: v = a->a + a->i[i] ;
559: n = a->i[i+1] - a->i[i];
560: alpha1 = x[4*i];
561: alpha2 = x[4*i+1];
562: alpha3 = x[4*i+2];
563: alpha4 = x[4*i+3];
564: while (n-->0) {
565: y[4*(*idx)] += alpha1*(*v);
566: y[4*(*idx)+1] += alpha2*(*v);
567: y[4*(*idx)+2] += alpha3*(*v);
568: y[4*(*idx)+3] += alpha4*(*v);
569: idx++; v++;
570: }
571: }
572: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573: VecRestoreArray(xx,&x);
574: VecRestoreArray(zz,&y);
575: return(0);
576: }
577: /* ------------------------------------------------------------------------------*/
581: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582: {
583: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
584: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
585: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586: int ierr,m = b->AIJ->m,*idx,*ii;
587: int n,i,jrow,j;
590: VecGetArray(xx,&x);
591: VecGetArray(yy,&y);
592: idx = a->j;
593: v = a->a;
594: ii = a->i;
596: for (i=0; i<m; i++) {
597: jrow = ii[i];
598: n = ii[i+1] - jrow;
599: sum1 = 0.0;
600: sum2 = 0.0;
601: sum3 = 0.0;
602: sum4 = 0.0;
603: sum5 = 0.0;
604: for (j=0; j<n; j++) {
605: sum1 += v[jrow]*x[5*idx[jrow]];
606: sum2 += v[jrow]*x[5*idx[jrow]+1];
607: sum3 += v[jrow]*x[5*idx[jrow]+2];
608: sum4 += v[jrow]*x[5*idx[jrow]+3];
609: sum5 += v[jrow]*x[5*idx[jrow]+4];
610: jrow++;
611: }
612: y[5*i] = sum1;
613: y[5*i+1] = sum2;
614: y[5*i+2] = sum3;
615: y[5*i+3] = sum4;
616: y[5*i+4] = sum5;
617: }
619: PetscLogFlops(10*a->nz - 5*m);
620: VecRestoreArray(xx,&x);
621: VecRestoreArray(yy,&y);
622: return(0);
623: }
627: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628: {
629: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
630: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
631: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632: int ierr,m = b->AIJ->m,n,i,*idx;
635: VecSet(&zero,yy);
636: VecGetArray(xx,&x);
637: VecGetArray(yy,&y);
638:
639: for (i=0; i<m; i++) {
640: idx = a->j + a->i[i] ;
641: v = a->a + a->i[i] ;
642: n = a->i[i+1] - a->i[i];
643: alpha1 = x[5*i];
644: alpha2 = x[5*i+1];
645: alpha3 = x[5*i+2];
646: alpha4 = x[5*i+3];
647: alpha5 = x[5*i+4];
648: while (n-->0) {
649: y[5*(*idx)] += alpha1*(*v);
650: y[5*(*idx)+1] += alpha2*(*v);
651: y[5*(*idx)+2] += alpha3*(*v);
652: y[5*(*idx)+3] += alpha4*(*v);
653: y[5*(*idx)+4] += alpha5*(*v);
654: idx++; v++;
655: }
656: }
657: PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658: VecRestoreArray(xx,&x);
659: VecRestoreArray(yy,&y);
660: return(0);
661: }
665: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
668: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
669: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670: int ierr,m = b->AIJ->m,*idx,*ii;
671: int n,i,jrow,j;
674: if (yy != zz) {VecCopy(yy,zz);}
675: VecGetArray(xx,&x);
676: VecGetArray(zz,&y);
677: idx = a->j;
678: v = a->a;
679: ii = a->i;
681: for (i=0; i<m; i++) {
682: jrow = ii[i];
683: n = ii[i+1] - jrow;
684: sum1 = 0.0;
685: sum2 = 0.0;
686: sum3 = 0.0;
687: sum4 = 0.0;
688: sum5 = 0.0;
689: for (j=0; j<n; j++) {
690: sum1 += v[jrow]*x[5*idx[jrow]];
691: sum2 += v[jrow]*x[5*idx[jrow]+1];
692: sum3 += v[jrow]*x[5*idx[jrow]+2];
693: sum4 += v[jrow]*x[5*idx[jrow]+3];
694: sum5 += v[jrow]*x[5*idx[jrow]+4];
695: jrow++;
696: }
697: y[5*i] += sum1;
698: y[5*i+1] += sum2;
699: y[5*i+2] += sum3;
700: y[5*i+3] += sum4;
701: y[5*i+4] += sum5;
702: }
704: PetscLogFlops(10*a->nz);
705: VecRestoreArray(xx,&x);
706: VecRestoreArray(zz,&y);
707: return(0);
708: }
712: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713: {
714: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
715: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
716: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717: int ierr,m = b->AIJ->m,n,i,*idx;
720: if (yy != zz) {VecCopy(yy,zz);}
721: VecGetArray(xx,&x);
722: VecGetArray(zz,&y);
723:
724: for (i=0; i<m; i++) {
725: idx = a->j + a->i[i] ;
726: v = a->a + a->i[i] ;
727: n = a->i[i+1] - a->i[i];
728: alpha1 = x[5*i];
729: alpha2 = x[5*i+1];
730: alpha3 = x[5*i+2];
731: alpha4 = x[5*i+3];
732: alpha5 = x[5*i+4];
733: while (n-->0) {
734: y[5*(*idx)] += alpha1*(*v);
735: y[5*(*idx)+1] += alpha2*(*v);
736: y[5*(*idx)+2] += alpha3*(*v);
737: y[5*(*idx)+3] += alpha4*(*v);
738: y[5*(*idx)+4] += alpha5*(*v);
739: idx++; v++;
740: }
741: }
742: PetscLogFlops(10*a->nz);
743: VecRestoreArray(xx,&x);
744: VecRestoreArray(zz,&y);
745: return(0);
746: }
748: /* ------------------------------------------------------------------------------*/
751: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752: {
753: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
754: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
755: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756: int ierr,m = b->AIJ->m,*idx,*ii;
757: int n,i,jrow,j;
760: VecGetArray(xx,&x);
761: VecGetArray(yy,&y);
762: idx = a->j;
763: v = a->a;
764: ii = a->i;
766: for (i=0; i<m; i++) {
767: jrow = ii[i];
768: n = ii[i+1] - jrow;
769: sum1 = 0.0;
770: sum2 = 0.0;
771: sum3 = 0.0;
772: sum4 = 0.0;
773: sum5 = 0.0;
774: sum6 = 0.0;
775: for (j=0; j<n; j++) {
776: sum1 += v[jrow]*x[6*idx[jrow]];
777: sum2 += v[jrow]*x[6*idx[jrow]+1];
778: sum3 += v[jrow]*x[6*idx[jrow]+2];
779: sum4 += v[jrow]*x[6*idx[jrow]+3];
780: sum5 += v[jrow]*x[6*idx[jrow]+4];
781: sum6 += v[jrow]*x[6*idx[jrow]+5];
782: jrow++;
783: }
784: y[6*i] = sum1;
785: y[6*i+1] = sum2;
786: y[6*i+2] = sum3;
787: y[6*i+3] = sum4;
788: y[6*i+4] = sum5;
789: y[6*i+5] = sum6;
790: }
792: PetscLogFlops(12*a->nz - 6*m);
793: VecRestoreArray(xx,&x);
794: VecRestoreArray(yy,&y);
795: return(0);
796: }
800: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801: {
802: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
803: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
804: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805: int ierr,m = b->AIJ->m,n,i,*idx;
808: VecSet(&zero,yy);
809: VecGetArray(xx,&x);
810: VecGetArray(yy,&y);
812: for (i=0; i<m; i++) {
813: idx = a->j + a->i[i] ;
814: v = a->a + a->i[i] ;
815: n = a->i[i+1] - a->i[i];
816: alpha1 = x[6*i];
817: alpha2 = x[6*i+1];
818: alpha3 = x[6*i+2];
819: alpha4 = x[6*i+3];
820: alpha5 = x[6*i+4];
821: alpha6 = x[6*i+5];
822: while (n-->0) {
823: y[6*(*idx)] += alpha1*(*v);
824: y[6*(*idx)+1] += alpha2*(*v);
825: y[6*(*idx)+2] += alpha3*(*v);
826: y[6*(*idx)+3] += alpha4*(*v);
827: y[6*(*idx)+4] += alpha5*(*v);
828: y[6*(*idx)+5] += alpha6*(*v);
829: idx++; v++;
830: }
831: }
832: PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833: VecRestoreArray(xx,&x);
834: VecRestoreArray(yy,&y);
835: return(0);
836: }
840: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841: {
842: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
843: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
844: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845: int ierr,m = b->AIJ->m,*idx,*ii;
846: int n,i,jrow,j;
849: if (yy != zz) {VecCopy(yy,zz);}
850: VecGetArray(xx,&x);
851: VecGetArray(zz,&y);
852: idx = a->j;
853: v = a->a;
854: ii = a->i;
856: for (i=0; i<m; i++) {
857: jrow = ii[i];
858: n = ii[i+1] - jrow;
859: sum1 = 0.0;
860: sum2 = 0.0;
861: sum3 = 0.0;
862: sum4 = 0.0;
863: sum5 = 0.0;
864: sum6 = 0.0;
865: for (j=0; j<n; j++) {
866: sum1 += v[jrow]*x[6*idx[jrow]];
867: sum2 += v[jrow]*x[6*idx[jrow]+1];
868: sum3 += v[jrow]*x[6*idx[jrow]+2];
869: sum4 += v[jrow]*x[6*idx[jrow]+3];
870: sum5 += v[jrow]*x[6*idx[jrow]+4];
871: sum6 += v[jrow]*x[6*idx[jrow]+5];
872: jrow++;
873: }
874: y[6*i] += sum1;
875: y[6*i+1] += sum2;
876: y[6*i+2] += sum3;
877: y[6*i+3] += sum4;
878: y[6*i+4] += sum5;
879: y[6*i+5] += sum6;
880: }
882: PetscLogFlops(12*a->nz);
883: VecRestoreArray(xx,&x);
884: VecRestoreArray(zz,&y);
885: return(0);
886: }
890: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
893: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
894: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895: int ierr,m = b->AIJ->m,n,i,*idx;
898: if (yy != zz) {VecCopy(yy,zz);}
899: VecGetArray(xx,&x);
900: VecGetArray(zz,&y);
901:
902: for (i=0; i<m; i++) {
903: idx = a->j + a->i[i] ;
904: v = a->a + a->i[i] ;
905: n = a->i[i+1] - a->i[i];
906: alpha1 = x[6*i];
907: alpha2 = x[6*i+1];
908: alpha3 = x[6*i+2];
909: alpha4 = x[6*i+3];
910: alpha5 = x[6*i+4];
911: alpha6 = x[6*i+5];
912: while (n-->0) {
913: y[6*(*idx)] += alpha1*(*v);
914: y[6*(*idx)+1] += alpha2*(*v);
915: y[6*(*idx)+2] += alpha3*(*v);
916: y[6*(*idx)+3] += alpha4*(*v);
917: y[6*(*idx)+4] += alpha5*(*v);
918: y[6*(*idx)+5] += alpha6*(*v);
919: idx++; v++;
920: }
921: }
922: PetscLogFlops(12*a->nz);
923: VecRestoreArray(xx,&x);
924: VecRestoreArray(zz,&y);
925: return(0);
926: }
928: /* ------------------------------------------------------------------------------*/
931: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932: {
933: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
934: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
935: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936: int ierr,m = b->AIJ->m,*idx,*ii;
937: int n,i,jrow,j;
940: VecGetArray(xx,&x);
941: VecGetArray(yy,&y);
942: idx = a->j;
943: v = a->a;
944: ii = a->i;
946: for (i=0; i<m; i++) {
947: jrow = ii[i];
948: n = ii[i+1] - jrow;
949: sum1 = 0.0;
950: sum2 = 0.0;
951: sum3 = 0.0;
952: sum4 = 0.0;
953: sum5 = 0.0;
954: sum6 = 0.0;
955: sum7 = 0.0;
956: sum8 = 0.0;
957: for (j=0; j<n; j++) {
958: sum1 += v[jrow]*x[8*idx[jrow]];
959: sum2 += v[jrow]*x[8*idx[jrow]+1];
960: sum3 += v[jrow]*x[8*idx[jrow]+2];
961: sum4 += v[jrow]*x[8*idx[jrow]+3];
962: sum5 += v[jrow]*x[8*idx[jrow]+4];
963: sum6 += v[jrow]*x[8*idx[jrow]+5];
964: sum7 += v[jrow]*x[8*idx[jrow]+6];
965: sum8 += v[jrow]*x[8*idx[jrow]+7];
966: jrow++;
967: }
968: y[8*i] = sum1;
969: y[8*i+1] = sum2;
970: y[8*i+2] = sum3;
971: y[8*i+3] = sum4;
972: y[8*i+4] = sum5;
973: y[8*i+5] = sum6;
974: y[8*i+6] = sum7;
975: y[8*i+7] = sum8;
976: }
978: PetscLogFlops(16*a->nz - 8*m);
979: VecRestoreArray(xx,&x);
980: VecRestoreArray(yy,&y);
981: return(0);
982: }
986: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987: {
988: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
989: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
990: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991: int ierr,m = b->AIJ->m,n,i,*idx;
994: VecSet(&zero,yy);
995: VecGetArray(xx,&x);
996: VecGetArray(yy,&y);
998: for (i=0; i<m; i++) {
999: idx = a->j + a->i[i] ;
1000: v = a->a + a->i[i] ;
1001: n = a->i[i+1] - a->i[i];
1002: alpha1 = x[8*i];
1003: alpha2 = x[8*i+1];
1004: alpha3 = x[8*i+2];
1005: alpha4 = x[8*i+3];
1006: alpha5 = x[8*i+4];
1007: alpha6 = x[8*i+5];
1008: alpha7 = x[8*i+6];
1009: alpha8 = x[8*i+7];
1010: while (n-->0) {
1011: y[8*(*idx)] += alpha1*(*v);
1012: y[8*(*idx)+1] += alpha2*(*v);
1013: y[8*(*idx)+2] += alpha3*(*v);
1014: y[8*(*idx)+3] += alpha4*(*v);
1015: y[8*(*idx)+4] += alpha5*(*v);
1016: y[8*(*idx)+5] += alpha6*(*v);
1017: y[8*(*idx)+6] += alpha7*(*v);
1018: y[8*(*idx)+7] += alpha8*(*v);
1019: idx++; v++;
1020: }
1021: }
1022: PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023: VecRestoreArray(xx,&x);
1024: VecRestoreArray(yy,&y);
1025: return(0);
1026: }
1030: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031: {
1032: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1033: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1034: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035: int ierr,m = b->AIJ->m,*idx,*ii;
1036: int n,i,jrow,j;
1039: if (yy != zz) {VecCopy(yy,zz);}
1040: VecGetArray(xx,&x);
1041: VecGetArray(zz,&y);
1042: idx = a->j;
1043: v = a->a;
1044: ii = a->i;
1046: for (i=0; i<m; i++) {
1047: jrow = ii[i];
1048: n = ii[i+1] - jrow;
1049: sum1 = 0.0;
1050: sum2 = 0.0;
1051: sum3 = 0.0;
1052: sum4 = 0.0;
1053: sum5 = 0.0;
1054: sum6 = 0.0;
1055: sum7 = 0.0;
1056: sum8 = 0.0;
1057: for (j=0; j<n; j++) {
1058: sum1 += v[jrow]*x[8*idx[jrow]];
1059: sum2 += v[jrow]*x[8*idx[jrow]+1];
1060: sum3 += v[jrow]*x[8*idx[jrow]+2];
1061: sum4 += v[jrow]*x[8*idx[jrow]+3];
1062: sum5 += v[jrow]*x[8*idx[jrow]+4];
1063: sum6 += v[jrow]*x[8*idx[jrow]+5];
1064: sum7 += v[jrow]*x[8*idx[jrow]+6];
1065: sum8 += v[jrow]*x[8*idx[jrow]+7];
1066: jrow++;
1067: }
1068: y[8*i] += sum1;
1069: y[8*i+1] += sum2;
1070: y[8*i+2] += sum3;
1071: y[8*i+3] += sum4;
1072: y[8*i+4] += sum5;
1073: y[8*i+5] += sum6;
1074: y[8*i+6] += sum7;
1075: y[8*i+7] += sum8;
1076: }
1078: PetscLogFlops(16*a->nz);
1079: VecRestoreArray(xx,&x);
1080: VecRestoreArray(zz,&y);
1081: return(0);
1082: }
1086: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087: {
1088: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1089: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1090: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091: int ierr,m = b->AIJ->m,n,i,*idx;
1094: if (yy != zz) {VecCopy(yy,zz);}
1095: VecGetArray(xx,&x);
1096: VecGetArray(zz,&y);
1097: for (i=0; i<m; i++) {
1098: idx = a->j + a->i[i] ;
1099: v = a->a + a->i[i] ;
1100: n = a->i[i+1] - a->i[i];
1101: alpha1 = x[8*i];
1102: alpha2 = x[8*i+1];
1103: alpha3 = x[8*i+2];
1104: alpha4 = x[8*i+3];
1105: alpha5 = x[8*i+4];
1106: alpha6 = x[8*i+5];
1107: alpha7 = x[8*i+6];
1108: alpha8 = x[8*i+7];
1109: while (n-->0) {
1110: y[8*(*idx)] += alpha1*(*v);
1111: y[8*(*idx)+1] += alpha2*(*v);
1112: y[8*(*idx)+2] += alpha3*(*v);
1113: y[8*(*idx)+3] += alpha4*(*v);
1114: y[8*(*idx)+4] += alpha5*(*v);
1115: y[8*(*idx)+5] += alpha6*(*v);
1116: y[8*(*idx)+6] += alpha7*(*v);
1117: y[8*(*idx)+7] += alpha8*(*v);
1118: idx++; v++;
1119: }
1120: }
1121: PetscLogFlops(16*a->nz);
1122: VecRestoreArray(xx,&x);
1123: VecRestoreArray(zz,&y);
1124: return(0);
1125: }
1127: /* ------------------------------------------------------------------------------*/
1130: int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1131: {
1132: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1133: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1134: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1135: int ierr,m = b->AIJ->m,*idx,*ii;
1136: int n,i,jrow,j;
1139: VecGetArray(xx,&x);
1140: VecGetArray(yy,&y);
1141: idx = a->j;
1142: v = a->a;
1143: ii = a->i;
1145: for (i=0; i<m; i++) {
1146: jrow = ii[i];
1147: n = ii[i+1] - jrow;
1148: sum1 = 0.0;
1149: sum2 = 0.0;
1150: sum3 = 0.0;
1151: sum4 = 0.0;
1152: sum5 = 0.0;
1153: sum6 = 0.0;
1154: sum7 = 0.0;
1155: sum8 = 0.0;
1156: sum9 = 0.0;
1157: for (j=0; j<n; j++) {
1158: sum1 += v[jrow]*x[9*idx[jrow]];
1159: sum2 += v[jrow]*x[9*idx[jrow]+1];
1160: sum3 += v[jrow]*x[9*idx[jrow]+2];
1161: sum4 += v[jrow]*x[9*idx[jrow]+3];
1162: sum5 += v[jrow]*x[9*idx[jrow]+4];
1163: sum6 += v[jrow]*x[9*idx[jrow]+5];
1164: sum7 += v[jrow]*x[9*idx[jrow]+6];
1165: sum8 += v[jrow]*x[9*idx[jrow]+7];
1166: sum9 += v[jrow]*x[9*idx[jrow]+8];
1167: jrow++;
1168: }
1169: y[9*i] = sum1;
1170: y[9*i+1] = sum2;
1171: y[9*i+2] = sum3;
1172: y[9*i+3] = sum4;
1173: y[9*i+4] = sum5;
1174: y[9*i+5] = sum6;
1175: y[9*i+6] = sum7;
1176: y[9*i+7] = sum8;
1177: y[9*i+8] = sum9;
1178: }
1180: PetscLogFlops(18*a->nz - 9*m);
1181: VecRestoreArray(xx,&x);
1182: VecRestoreArray(yy,&y);
1183: return(0);
1184: }
1188: int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1189: {
1190: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1191: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1192: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1193: int ierr,m = b->AIJ->m,n,i,*idx;
1196: VecSet(&zero,yy);
1197: VecGetArray(xx,&x);
1198: VecGetArray(yy,&y);
1200: for (i=0; i<m; i++) {
1201: idx = a->j + a->i[i] ;
1202: v = a->a + a->i[i] ;
1203: n = a->i[i+1] - a->i[i];
1204: alpha1 = x[9*i];
1205: alpha2 = x[9*i+1];
1206: alpha3 = x[9*i+2];
1207: alpha4 = x[9*i+3];
1208: alpha5 = x[9*i+4];
1209: alpha6 = x[9*i+5];
1210: alpha7 = x[9*i+6];
1211: alpha8 = x[9*i+7];
1212: alpha9 = x[9*i+8];
1213: while (n-->0) {
1214: y[9*(*idx)] += alpha1*(*v);
1215: y[9*(*idx)+1] += alpha2*(*v);
1216: y[9*(*idx)+2] += alpha3*(*v);
1217: y[9*(*idx)+3] += alpha4*(*v);
1218: y[9*(*idx)+4] += alpha5*(*v);
1219: y[9*(*idx)+5] += alpha6*(*v);
1220: y[9*(*idx)+6] += alpha7*(*v);
1221: y[9*(*idx)+7] += alpha8*(*v);
1222: y[9*(*idx)+8] += alpha9*(*v);
1223: idx++; v++;
1224: }
1225: }
1226: PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1227: VecRestoreArray(xx,&x);
1228: VecRestoreArray(yy,&y);
1229: return(0);
1230: }
1234: int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1235: {
1236: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1237: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1238: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1239: int ierr,m = b->AIJ->m,*idx,*ii;
1240: int n,i,jrow,j;
1243: if (yy != zz) {VecCopy(yy,zz);}
1244: VecGetArray(xx,&x);
1245: VecGetArray(zz,&y);
1246: idx = a->j;
1247: v = a->a;
1248: ii = a->i;
1250: for (i=0; i<m; i++) {
1251: jrow = ii[i];
1252: n = ii[i+1] - jrow;
1253: sum1 = 0.0;
1254: sum2 = 0.0;
1255: sum3 = 0.0;
1256: sum4 = 0.0;
1257: sum5 = 0.0;
1258: sum6 = 0.0;
1259: sum7 = 0.0;
1260: sum8 = 0.0;
1261: sum9 = 0.0;
1262: for (j=0; j<n; j++) {
1263: sum1 += v[jrow]*x[9*idx[jrow]];
1264: sum2 += v[jrow]*x[9*idx[jrow]+1];
1265: sum3 += v[jrow]*x[9*idx[jrow]+2];
1266: sum4 += v[jrow]*x[9*idx[jrow]+3];
1267: sum5 += v[jrow]*x[9*idx[jrow]+4];
1268: sum6 += v[jrow]*x[9*idx[jrow]+5];
1269: sum7 += v[jrow]*x[9*idx[jrow]+6];
1270: sum8 += v[jrow]*x[9*idx[jrow]+7];
1271: sum9 += v[jrow]*x[9*idx[jrow]+8];
1272: jrow++;
1273: }
1274: y[9*i] += sum1;
1275: y[9*i+1] += sum2;
1276: y[9*i+2] += sum3;
1277: y[9*i+3] += sum4;
1278: y[9*i+4] += sum5;
1279: y[9*i+5] += sum6;
1280: y[9*i+6] += sum7;
1281: y[9*i+7] += sum8;
1282: y[9*i+8] += sum9;
1283: }
1285: PetscLogFlops(18*a->nz);
1286: VecRestoreArray(xx,&x);
1287: VecRestoreArray(zz,&y);
1288: return(0);
1289: }
1293: int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1294: {
1295: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1296: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1297: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1298: int ierr,m = b->AIJ->m,n,i,*idx;
1301: if (yy != zz) {VecCopy(yy,zz);}
1302: VecGetArray(xx,&x);
1303: VecGetArray(zz,&y);
1304: for (i=0; i<m; i++) {
1305: idx = a->j + a->i[i] ;
1306: v = a->a + a->i[i] ;
1307: n = a->i[i+1] - a->i[i];
1308: alpha1 = x[9*i];
1309: alpha2 = x[9*i+1];
1310: alpha3 = x[9*i+2];
1311: alpha4 = x[9*i+3];
1312: alpha5 = x[9*i+4];
1313: alpha6 = x[9*i+5];
1314: alpha7 = x[9*i+6];
1315: alpha8 = x[9*i+7];
1316: alpha9 = x[9*i+8];
1317: while (n-->0) {
1318: y[9*(*idx)] += alpha1*(*v);
1319: y[9*(*idx)+1] += alpha2*(*v);
1320: y[9*(*idx)+2] += alpha3*(*v);
1321: y[9*(*idx)+3] += alpha4*(*v);
1322: y[9*(*idx)+4] += alpha5*(*v);
1323: y[9*(*idx)+5] += alpha6*(*v);
1324: y[9*(*idx)+6] += alpha7*(*v);
1325: y[9*(*idx)+7] += alpha8*(*v);
1326: y[9*(*idx)+8] += alpha9*(*v);
1327: idx++; v++;
1328: }
1329: }
1330: PetscLogFlops(18*a->nz);
1331: VecRestoreArray(xx,&x);
1332: VecRestoreArray(zz,&y);
1333: return(0);
1334: }
1336: /*--------------------------------------------------------------------------------------------*/
1339: int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1340: {
1341: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1342: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1343: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1344: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1345: int ierr,m = b->AIJ->m,*idx,*ii;
1346: int n,i,jrow,j;
1349: VecGetArray(xx,&x);
1350: VecGetArray(yy,&y);
1351: idx = a->j;
1352: v = a->a;
1353: ii = a->i;
1355: for (i=0; i<m; i++) {
1356: jrow = ii[i];
1357: n = ii[i+1] - jrow;
1358: sum1 = 0.0;
1359: sum2 = 0.0;
1360: sum3 = 0.0;
1361: sum4 = 0.0;
1362: sum5 = 0.0;
1363: sum6 = 0.0;
1364: sum7 = 0.0;
1365: sum8 = 0.0;
1366: sum9 = 0.0;
1367: sum10 = 0.0;
1368: sum11 = 0.0;
1369: sum12 = 0.0;
1370: sum13 = 0.0;
1371: sum14 = 0.0;
1372: sum15 = 0.0;
1373: sum16 = 0.0;
1374: for (j=0; j<n; j++) {
1375: sum1 += v[jrow]*x[16*idx[jrow]];
1376: sum2 += v[jrow]*x[16*idx[jrow]+1];
1377: sum3 += v[jrow]*x[16*idx[jrow]+2];
1378: sum4 += v[jrow]*x[16*idx[jrow]+3];
1379: sum5 += v[jrow]*x[16*idx[jrow]+4];
1380: sum6 += v[jrow]*x[16*idx[jrow]+5];
1381: sum7 += v[jrow]*x[16*idx[jrow]+6];
1382: sum8 += v[jrow]*x[16*idx[jrow]+7];
1383: sum9 += v[jrow]*x[16*idx[jrow]+8];
1384: sum10 += v[jrow]*x[16*idx[jrow]+9];
1385: sum11 += v[jrow]*x[16*idx[jrow]+10];
1386: sum12 += v[jrow]*x[16*idx[jrow]+11];
1387: sum13 += v[jrow]*x[16*idx[jrow]+12];
1388: sum14 += v[jrow]*x[16*idx[jrow]+13];
1389: sum15 += v[jrow]*x[16*idx[jrow]+14];
1390: sum16 += v[jrow]*x[16*idx[jrow]+15];
1391: jrow++;
1392: }
1393: y[16*i] = sum1;
1394: y[16*i+1] = sum2;
1395: y[16*i+2] = sum3;
1396: y[16*i+3] = sum4;
1397: y[16*i+4] = sum5;
1398: y[16*i+5] = sum6;
1399: y[16*i+6] = sum7;
1400: y[16*i+7] = sum8;
1401: y[16*i+8] = sum9;
1402: y[16*i+9] = sum10;
1403: y[16*i+10] = sum11;
1404: y[16*i+11] = sum12;
1405: y[16*i+12] = sum13;
1406: y[16*i+13] = sum14;
1407: y[16*i+14] = sum15;
1408: y[16*i+15] = sum16;
1409: }
1411: PetscLogFlops(32*a->nz - 16*m);
1412: VecRestoreArray(xx,&x);
1413: VecRestoreArray(yy,&y);
1414: return(0);
1415: }
1419: int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1420: {
1421: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1422: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1423: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1424: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1425: int ierr,m = b->AIJ->m,n,i,*idx;
1428: VecSet(&zero,yy);
1429: VecGetArray(xx,&x);
1430: VecGetArray(yy,&y);
1432: for (i=0; i<m; i++) {
1433: idx = a->j + a->i[i] ;
1434: v = a->a + a->i[i] ;
1435: n = a->i[i+1] - a->i[i];
1436: alpha1 = x[16*i];
1437: alpha2 = x[16*i+1];
1438: alpha3 = x[16*i+2];
1439: alpha4 = x[16*i+3];
1440: alpha5 = x[16*i+4];
1441: alpha6 = x[16*i+5];
1442: alpha7 = x[16*i+6];
1443: alpha8 = x[16*i+7];
1444: alpha9 = x[16*i+8];
1445: alpha10 = x[16*i+9];
1446: alpha11 = x[16*i+10];
1447: alpha12 = x[16*i+11];
1448: alpha13 = x[16*i+12];
1449: alpha14 = x[16*i+13];
1450: alpha15 = x[16*i+14];
1451: alpha16 = x[16*i+15];
1452: while (n-->0) {
1453: y[16*(*idx)] += alpha1*(*v);
1454: y[16*(*idx)+1] += alpha2*(*v);
1455: y[16*(*idx)+2] += alpha3*(*v);
1456: y[16*(*idx)+3] += alpha4*(*v);
1457: y[16*(*idx)+4] += alpha5*(*v);
1458: y[16*(*idx)+5] += alpha6*(*v);
1459: y[16*(*idx)+6] += alpha7*(*v);
1460: y[16*(*idx)+7] += alpha8*(*v);
1461: y[16*(*idx)+8] += alpha9*(*v);
1462: y[16*(*idx)+9] += alpha10*(*v);
1463: y[16*(*idx)+10] += alpha11*(*v);
1464: y[16*(*idx)+11] += alpha12*(*v);
1465: y[16*(*idx)+12] += alpha13*(*v);
1466: y[16*(*idx)+13] += alpha14*(*v);
1467: y[16*(*idx)+14] += alpha15*(*v);
1468: y[16*(*idx)+15] += alpha16*(*v);
1469: idx++; v++;
1470: }
1471: }
1472: PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1473: VecRestoreArray(xx,&x);
1474: VecRestoreArray(yy,&y);
1475: return(0);
1476: }
1480: int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1481: {
1482: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1483: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1484: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1485: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1486: int ierr,m = b->AIJ->m,*idx,*ii;
1487: int n,i,jrow,j;
1490: if (yy != zz) {VecCopy(yy,zz);}
1491: VecGetArray(xx,&x);
1492: VecGetArray(zz,&y);
1493: idx = a->j;
1494: v = a->a;
1495: ii = a->i;
1497: for (i=0; i<m; i++) {
1498: jrow = ii[i];
1499: n = ii[i+1] - jrow;
1500: sum1 = 0.0;
1501: sum2 = 0.0;
1502: sum3 = 0.0;
1503: sum4 = 0.0;
1504: sum5 = 0.0;
1505: sum6 = 0.0;
1506: sum7 = 0.0;
1507: sum8 = 0.0;
1508: sum9 = 0.0;
1509: sum10 = 0.0;
1510: sum11 = 0.0;
1511: sum12 = 0.0;
1512: sum13 = 0.0;
1513: sum14 = 0.0;
1514: sum15 = 0.0;
1515: sum16 = 0.0;
1516: for (j=0; j<n; j++) {
1517: sum1 += v[jrow]*x[16*idx[jrow]];
1518: sum2 += v[jrow]*x[16*idx[jrow]+1];
1519: sum3 += v[jrow]*x[16*idx[jrow]+2];
1520: sum4 += v[jrow]*x[16*idx[jrow]+3];
1521: sum5 += v[jrow]*x[16*idx[jrow]+4];
1522: sum6 += v[jrow]*x[16*idx[jrow]+5];
1523: sum7 += v[jrow]*x[16*idx[jrow]+6];
1524: sum8 += v[jrow]*x[16*idx[jrow]+7];
1525: sum9 += v[jrow]*x[16*idx[jrow]+8];
1526: sum10 += v[jrow]*x[16*idx[jrow]+9];
1527: sum11 += v[jrow]*x[16*idx[jrow]+10];
1528: sum12 += v[jrow]*x[16*idx[jrow]+11];
1529: sum13 += v[jrow]*x[16*idx[jrow]+12];
1530: sum14 += v[jrow]*x[16*idx[jrow]+13];
1531: sum15 += v[jrow]*x[16*idx[jrow]+14];
1532: sum16 += v[jrow]*x[16*idx[jrow]+15];
1533: jrow++;
1534: }
1535: y[16*i] += sum1;
1536: y[16*i+1] += sum2;
1537: y[16*i+2] += sum3;
1538: y[16*i+3] += sum4;
1539: y[16*i+4] += sum5;
1540: y[16*i+5] += sum6;
1541: y[16*i+6] += sum7;
1542: y[16*i+7] += sum8;
1543: y[16*i+8] += sum9;
1544: y[16*i+9] += sum10;
1545: y[16*i+10] += sum11;
1546: y[16*i+11] += sum12;
1547: y[16*i+12] += sum13;
1548: y[16*i+13] += sum14;
1549: y[16*i+14] += sum15;
1550: y[16*i+15] += sum16;
1551: }
1553: PetscLogFlops(32*a->nz);
1554: VecRestoreArray(xx,&x);
1555: VecRestoreArray(zz,&y);
1556: return(0);
1557: }
1561: int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1562: {
1563: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1565: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1566: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1567: int ierr,m = b->AIJ->m,n,i,*idx;
1570: if (yy != zz) {VecCopy(yy,zz);}
1571: VecGetArray(xx,&x);
1572: VecGetArray(zz,&y);
1573: for (i=0; i<m; i++) {
1574: idx = a->j + a->i[i] ;
1575: v = a->a + a->i[i] ;
1576: n = a->i[i+1] - a->i[i];
1577: alpha1 = x[16*i];
1578: alpha2 = x[16*i+1];
1579: alpha3 = x[16*i+2];
1580: alpha4 = x[16*i+3];
1581: alpha5 = x[16*i+4];
1582: alpha6 = x[16*i+5];
1583: alpha7 = x[16*i+6];
1584: alpha8 = x[16*i+7];
1585: alpha9 = x[16*i+8];
1586: alpha10 = x[16*i+9];
1587: alpha11 = x[16*i+10];
1588: alpha12 = x[16*i+11];
1589: alpha13 = x[16*i+12];
1590: alpha14 = x[16*i+13];
1591: alpha15 = x[16*i+14];
1592: alpha16 = x[16*i+15];
1593: while (n-->0) {
1594: y[16*(*idx)] += alpha1*(*v);
1595: y[16*(*idx)+1] += alpha2*(*v);
1596: y[16*(*idx)+2] += alpha3*(*v);
1597: y[16*(*idx)+3] += alpha4*(*v);
1598: y[16*(*idx)+4] += alpha5*(*v);
1599: y[16*(*idx)+5] += alpha6*(*v);
1600: y[16*(*idx)+6] += alpha7*(*v);
1601: y[16*(*idx)+7] += alpha8*(*v);
1602: y[16*(*idx)+8] += alpha9*(*v);
1603: y[16*(*idx)+9] += alpha10*(*v);
1604: y[16*(*idx)+10] += alpha11*(*v);
1605: y[16*(*idx)+11] += alpha12*(*v);
1606: y[16*(*idx)+12] += alpha13*(*v);
1607: y[16*(*idx)+13] += alpha14*(*v);
1608: y[16*(*idx)+14] += alpha15*(*v);
1609: y[16*(*idx)+15] += alpha16*(*v);
1610: idx++; v++;
1611: }
1612: }
1613: PetscLogFlops(32*a->nz);
1614: VecRestoreArray(xx,&x);
1615: VecRestoreArray(zz,&y);
1616: return(0);
1617: }
1619: /*===================================================================================*/
1622: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1623: {
1624: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1625: int ierr;
1628: /* start the scatter */
1629: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1630: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1631: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1632: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1633: return(0);
1634: }
1638: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1639: {
1640: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1641: int ierr;
1643: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1644: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1645: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1646: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1647: return(0);
1648: }
1652: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1653: {
1654: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1655: int ierr;
1658: /* start the scatter */
1659: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1660: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1661: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1662: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1663: return(0);
1664: }
1668: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1671: int ierr;
1673: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1674: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1675: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1676: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1677: return(0);
1678: }
1680: /* ---------------------------------------------------------------------------------- */
1681: /*MC
1682: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1683: operations for multicomponent problems. It interpolates each component the same
1684: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
1685: and MATMPIAIJ for distributed matrices.
1687: Operations provided:
1688: . MatMult
1689: . MatMultTranspose
1690: . MatMultAdd
1691: . MatMultTransposeAdd
1693: Level: advanced
1695: M*/
1698: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1699: {
1700: int ierr,size,n;
1701: Mat_MPIMAIJ *b;
1702: Mat B;
1705: PetscObjectReference((PetscObject)A);
1707: if (dof == 1) {
1708: *maij = A;
1709: } else {
1710: MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1711: B->assembled = PETSC_TRUE;
1713: MPI_Comm_size(A->comm,&size);
1714: if (size == 1) {
1715: MatSetType(B,MATSEQMAIJ);
1716: B->ops->destroy = MatDestroy_SeqMAIJ;
1717: b = (Mat_MPIMAIJ*)B->data;
1718: b->dof = dof;
1719: b->AIJ = A;
1720: if (dof == 2) {
1721: B->ops->mult = MatMult_SeqMAIJ_2;
1722: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1723: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1724: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1725: } else if (dof == 3) {
1726: B->ops->mult = MatMult_SeqMAIJ_3;
1727: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1728: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1729: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1730: } else if (dof == 4) {
1731: B->ops->mult = MatMult_SeqMAIJ_4;
1732: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1733: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1734: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1735: } else if (dof == 5) {
1736: B->ops->mult = MatMult_SeqMAIJ_5;
1737: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1738: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1739: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1740: } else if (dof == 6) {
1741: B->ops->mult = MatMult_SeqMAIJ_6;
1742: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1743: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1744: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1745: } else if (dof == 8) {
1746: B->ops->mult = MatMult_SeqMAIJ_8;
1747: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1748: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1749: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1750: } else if (dof == 9) {
1751: B->ops->mult = MatMult_SeqMAIJ_9;
1752: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
1753: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
1754: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1755: } else if (dof == 16) {
1756: B->ops->mult = MatMult_SeqMAIJ_16;
1757: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1758: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1759: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1760: } else {
1761: SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1762: }
1763: } else {
1764: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1765: IS from,to;
1766: Vec gvec;
1767: int *garray,i;
1769: MatSetType(B,MATMPIMAIJ);
1770: B->ops->destroy = MatDestroy_MPIMAIJ;
1771: b = (Mat_MPIMAIJ*)B->data;
1772: b->dof = dof;
1773: b->A = A;
1774: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1775: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
1777: VecGetSize(mpiaij->lvec,&n);
1778: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
1780: /* create two temporary Index sets for build scatter gather */
1781: PetscMalloc((n+1)*sizeof(int),&garray);
1782: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1783: ISCreateBlock(A->comm,dof,n,garray,&from);
1784: PetscFree(garray);
1785: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
1787: /* create temporary global vector to generate scatter context */
1788: VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);
1790: /* generate the scatter context */
1791: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
1793: ISDestroy(from);
1794: ISDestroy(to);
1795: VecDestroy(gvec);
1797: B->ops->mult = MatMult_MPIMAIJ_dof;
1798: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1799: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1800: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1801: }
1802: *maij = B;
1803: }
1804: return(0);
1805: }