Actual source code: dvec2.c
2: /*$Id: dvec2.c,v 1.91 2001/09/11 18:14:31 bsmith Exp $*/
4: /*
5: Defines some vector operation functions that are shared by
6: sequential and parallel vectors.
7: */
8: #include src/vec/impls/dvecimpl.h
9: #include src/inline/dot.h
10: #include src/inline/setval.h
11: #include src/inline/axpy.h
13: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
16: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
17: {
18: Vec_Seq *xv = (Vec_Seq *)xin->data;
19: int i,nv_rem,n = xin->n,ierr;
20: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
21: Vec *yy;
24: sum0 = 0;
25: sum1 = 0;
26: sum2 = 0;
28: i = nv;
29: nv_rem = nv&0x3;
30: yy = (Vec*)yin;
31: x = xv->array;
33: switch (nv_rem) {
34: case 3:
35: VecGetArray(yy[0],&yy0);
36: VecGetArray(yy[1],&yy1);
37: VecGetArray(yy[2],&yy2);
38: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
39: VecRestoreArray(yy[0],&yy0);
40: VecRestoreArray(yy[1],&yy1);
41: VecRestoreArray(yy[2],&yy2);
42: z[0] = sum0;
43: z[1] = sum1;
44: z[2] = sum2;
45: break;
46: case 2:
47: VecGetArray(yy[0],&yy0);
48: VecGetArray(yy[1],&yy1);
49: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
50: VecRestoreArray(yy[0],&yy0);
51: VecRestoreArray(yy[1],&yy1);
52: z[0] = sum0;
53: z[1] = sum1;
54: break;
55: case 1:
56: VecGetArray(yy[0],&yy0);
57: fortranmdot1_(x,yy0,&n,&sum0);
58: VecRestoreArray(yy[0],&yy0);
59: z[0] = sum0;
60: break;
61: case 0:
62: break;
63: }
64: z += nv_rem;
65: i -= nv_rem;
66: yy += nv_rem;
68: while (i >0) {
69: sum0 = 0;
70: sum1 = 0;
71: sum2 = 0;
72: sum3 = 0;
73: VecGetArray(yy[0],&yy0);
74: VecGetArray(yy[1],&yy1);
75: VecGetArray(yy[2],&yy2);
76: VecGetArray(yy[3],&yy3);
77: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
78: VecRestoreArray(yy[0],&yy0);
79: VecRestoreArray(yy[1],&yy1);
80: VecRestoreArray(yy[2],&yy2);
81: VecRestoreArray(yy[3],&yy3);
82: yy += 4;
83: z[0] = sum0;
84: z[1] = sum1;
85: z[2] = sum2;
86: z[3] = sum3;
87: z += 4;
88: i -= 4;
89: }
90: PetscLogFlops(nv*(2*xin->n-1));
91: return(0);
92: }
94: #else
97: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
98: {
99: Vec_Seq *xv = (Vec_Seq *)xin->data;
100: int n = xin->n,i,j,nv_rem,j_rem,ierr;
101: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
102: PetscScalar * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
103: Vec *yy;
106: sum0 = 0;
107: sum1 = 0;
108: sum2 = 0;
110: i = nv;
111: nv_rem = nv&0x3;
112: yy = (Vec *)yin;
113: j = n;
114: x = xv->array;
116: switch (nv_rem) {
117: case 3:
118: VecGetArray(yy[0],&yy0);
119: VecGetArray(yy[1],&yy1);
120: VecGetArray(yy[2],&yy2);
121: switch (j_rem=j&0x3) {
122: case 3:
123: x2 = x[2];
124: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125: sum2 += x2*PetscConj(yy2[2]);
126: case 2:
127: x1 = x[1];
128: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129: sum2 += x1*PetscConj(yy2[1]);
130: case 1:
131: x0 = x[0];
132: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133: sum2 += x0*PetscConj(yy2[0]);
134: case 0:
135: x += j_rem;
136: yy0 += j_rem;
137: yy1 += j_rem;
138: yy2 += j_rem;
139: j -= j_rem;
140: break;
141: }
142: while (j>0) {
143: x0 = x[0];
144: x1 = x[1];
145: x2 = x[2];
146: x3 = x[3];
147: x += 4;
148:
149: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152: j -= 4;
153: }
154: z[0] = sum0;
155: z[1] = sum1;
156: z[2] = sum2;
157: VecRestoreArray(yy[0],&yy0);
158: VecRestoreArray(yy[1],&yy1);
159: VecRestoreArray(yy[2],&yy2);
160: break;
161: case 2:
162: VecGetArray(yy[0],&yy0);
163: VecGetArray(yy[1],&yy1);
164: switch (j_rem=j&0x3) {
165: case 3:
166: x2 = x[2];
167: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168: case 2:
169: x1 = x[1];
170: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171: case 1:
172: x0 = x[0];
173: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174: case 0:
175: x += j_rem;
176: yy0 += j_rem;
177: yy1 += j_rem;
178: j -= j_rem;
179: break;
180: }
181: while (j>0) {
182: x0 = x[0];
183: x1 = x[1];
184: x2 = x[2];
185: x3 = x[3];
186: x += 4;
187:
188: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190: j -= 4;
191: }
192: z[0] = sum0;
193: z[1] = sum1;
194:
195: VecRestoreArray(yy[0],&yy0);
196: VecRestoreArray(yy[1],&yy1);
197: break;
198: case 1:
199: VecGetArray(yy[0],&yy0);
200: switch (j_rem=j&0x3) {
201: case 3:
202: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203: case 2:
204: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205: case 1:
206: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207: case 0:
208: x += j_rem;
209: yy0 += j_rem;
210: j -= j_rem;
211: break;
212: }
213: while (j>0) {
214: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216: yy0+=4;
217: j -= 4; x+=4;
218: }
219: z[0] = sum0;
221: VecRestoreArray(yy[0],&yy0);
222: break;
223: case 0:
224: break;
225: }
226: z += nv_rem;
227: i -= nv_rem;
228: yy += nv_rem;
230: while (i >0) {
231: sum0 = 0;
232: sum1 = 0;
233: sum2 = 0;
234: sum3 = 0;
235: VecGetArray(yy[0],&yy0);
236: VecGetArray(yy[1],&yy1);
237: VecGetArray(yy[2],&yy2);
238: VecGetArray(yy[3],&yy3);
240: j = n;
241: x = xv->array;
242: switch (j_rem=j&0x3) {
243: case 3:
244: x2 = x[2];
245: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247: case 2:
248: x1 = x[1];
249: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251: case 1:
252: x0 = x[0];
253: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255: case 0:
256: x += j_rem;
257: yy0 += j_rem;
258: yy1 += j_rem;
259: yy2 += j_rem;
260: yy3 += j_rem;
261: j -= j_rem;
262: break;
263: }
264: while (j>0) {
265: x0 = x[0];
266: x1 = x[1];
267: x2 = x[2];
268: x3 = x[3];
269: x += 4;
270:
271: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275: j -= 4;
276: }
277: z[0] = sum0;
278: z[1] = sum1;
279: z[2] = sum2;
280: z[3] = sum3;
281: z += 4;
282: i -= 4;
283: VecRestoreArray(yy[0],&yy0);
284: VecRestoreArray(yy[1],&yy1);
285: VecRestoreArray(yy[2],&yy2);
286: VecRestoreArray(yy[3],&yy3);
287: yy += 4;
288: }
289: PetscLogFlops(nv*(2*xin->n-1));
290: return(0);
291: }
292: #endif
294: /* ----------------------------------------------------------------------------*/
297: int VecMTDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
298: {
299: Vec_Seq *xv = (Vec_Seq *)xin->data;
300: int n = xin->n,i,j,nv_rem,j_rem,ierr;
301: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
302: Vec *yy;
303:
306: sum0 = 0;
307: sum1 = 0;
308: sum2 = 0;
310: i = nv;
311: nv_rem = nv&0x3;
312: yy = (Vec*)yin;
313: j = n;
314: x = xv->array;
316: switch (nv_rem) {
317: case 3:
318: VecGetArray(yy[0],&yy0);
319: VecGetArray(yy[1],&yy1);
320: VecGetArray(yy[2],&yy2);
321: switch (j_rem=j&0x3) {
322: case 3:
323: x2 = x[2];
324: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
325: sum2 += x2*yy2[2];
326: case 2:
327: x1 = x[1];
328: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
329: sum2 += x1*yy2[1];
330: case 1:
331: x0 = x[0];
332: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
333: sum2 += x0*yy2[0];
334: case 0:
335: x += j_rem;
336: yy0 += j_rem;
337: yy1 += j_rem;
338: yy2 += j_rem;
339: j -= j_rem;
340: break;
341: }
342: while (j>0) {
343: x0 = x[0];
344: x1 = x[1];
345: x2 = x[2];
346: x3 = x[3];
347: x += 4;
348:
349: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
350: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
351: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
352: j -= 4;
353: }
354: z[0] = sum0;
355: z[1] = sum1;
356: z[2] = sum2;
357: VecRestoreArray(yy[0],&yy0);
358: VecRestoreArray(yy[1],&yy1);
359: VecRestoreArray(yy[2],&yy2);
360: break;
361: case 2:
362: VecGetArray(yy[0],&yy0);
363: VecGetArray(yy[1],&yy1);
364: switch (j_rem=j&0x3) {
365: case 3:
366: x2 = x[2];
367: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
368: case 2:
369: x1 = x[1];
370: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
371: case 1:
372: x0 = x[0];
373: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
374: case 0:
375: x += j_rem;
376: yy0 += j_rem;
377: yy1 += j_rem;
378: j -= j_rem;
379: break;
380: }
381: while (j>0) {
382: x0 = x[0];
383: x1 = x[1];
384: x2 = x[2];
385: x3 = x[3];
386: x += 4;
387:
388: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
389: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
390: j -= 4;
391: }
392: z[0] = sum0;
393: z[1] = sum1;
394:
395: VecRestoreArray(yy[0],&yy0);
396: VecRestoreArray(yy[1],&yy1);
397: break;
398: case 1:
399: VecGetArray(yy[0],&yy0);
400: switch (j_rem=j&0x3) {
401: case 3:
402: x2 = x[2]; sum0 += x2*yy0[2];
403: case 2:
404: x1 = x[1]; sum0 += x1*yy0[1];
405: case 1:
406: x0 = x[0]; sum0 += x0*yy0[0];
407: case 0:
408: x += j_rem;
409: yy0 += j_rem;
410: j -= j_rem;
411: break;
412: }
413: while (j>0) {
414: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
415: j -= 4; x+=4;
416: }
417: z[0] = sum0;
419: VecRestoreArray(yy[0],&yy0);
420: break;
421: case 0:
422: break;
423: }
424: z += nv_rem;
425: i -= nv_rem;
426: yy += nv_rem;
428: while (i >0) {
429: sum0 = 0;
430: sum1 = 0;
431: sum2 = 0;
432: sum3 = 0;
433: VecGetArray(yy[0],&yy0);
434: VecGetArray(yy[1],&yy1);
435: VecGetArray(yy[2],&yy2);
436: VecGetArray(yy[3],&yy3);
438: j = n;
439: x = xv->array;
440: switch (j_rem=j&0x3) {
441: case 3:
442: x2 = x[2];
443: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
444: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
445: case 2:
446: x1 = x[1];
447: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
448: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
449: case 1:
450: x0 = x[0];
451: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
452: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
453: case 0:
454: x += j_rem;
455: yy0 += j_rem;
456: yy1 += j_rem;
457: yy2 += j_rem;
458: yy3 += j_rem;
459: j -= j_rem;
460: break;
461: }
462: while (j>0) {
463: x0 = x[0];
464: x1 = x[1];
465: x2 = x[2];
466: x3 = x[3];
467: x += 4;
468:
469: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
470: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
471: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
472: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
473: j -= 4;
474: }
475: z[0] = sum0;
476: z[1] = sum1;
477: z[2] = sum2;
478: z[3] = sum3;
479: z += 4;
480: i -= 4;
481: VecRestoreArray(yy[0],&yy0);
482: VecRestoreArray(yy[1],&yy1);
483: VecRestoreArray(yy[2],&yy2);
484: VecRestoreArray(yy[3],&yy3);
485: yy += 4;
486: }
487: PetscLogFlops(nv*(2*xin->n-1));
488: return(0);
489: }
490:
494: int VecMax_Seq(Vec xin,int* idx,PetscReal * z)
495: {
496: Vec_Seq *x = (Vec_Seq*)xin->data;
497: int i,j=0,n = xin->n;
498: PetscReal max,tmp;
499: PetscScalar *xx = x->array;
502: if (!n) {
503: max = PETSC_MIN;
504: j = -1;
505: } else {
506: #if defined(PETSC_USE_COMPLEX)
507: max = PetscRealPart(*xx++); j = 0;
508: #else
509: max = *xx++; j = 0;
510: #endif
511: for (i=1; i<n; i++) {
512: #if defined(PETSC_USE_COMPLEX)
513: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
514: #else
515: if ((tmp = *xx++) > max) { j = i; max = tmp; }
516: #endif
517: }
518: }
519: *z = max;
520: if (idx) *idx = j;
521: return(0);
522: }
526: int VecMin_Seq(Vec xin,int* idx,PetscReal * z)
527: {
528: Vec_Seq *x = (Vec_Seq*)xin->data;
529: int i,j=0,n = xin->n;
530: PetscReal min,tmp;
531: PetscScalar *xx = x->array;
534: if (!n) {
535: min = PETSC_MAX;
536: j = -1;
537: } else {
538: #if defined(PETSC_USE_COMPLEX)
539: min = PetscRealPart(*xx++); j = 0;
540: #else
541: min = *xx++; j = 0;
542: #endif
543: for (i=1; i<n; i++) {
544: #if defined(PETSC_USE_COMPLEX)
545: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
546: #else
547: if ((tmp = *xx++) < min) { j = i; min = tmp; }
548: #endif
549: }
550: }
551: *z = min;
552: if (idx) *idx = j;
553: return(0);
554: }
558: int VecSet_Seq(const PetscScalar* alpha,Vec xin)
559: {
560: Vec_Seq *x = (Vec_Seq *)xin->data;
561: int n = xin->n,ierr;
562: PetscScalar *xx = x->array,oalpha = *alpha;
565: if (oalpha == 0.0) {
566: PetscMemzero(xx,n*sizeof(PetscScalar));
567: }
568: else {
569: SET(xx,n,oalpha);
570: }
571: return(0);
572: }
576: int VecSetRandom_Seq(PetscRandom r,Vec xin)
577: {
578: int n = xin->n,i,ierr;
579: PetscScalar *xx;
582: VecGetArray(xin,&xx);
583: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
584: VecRestoreArray(xin,&xx);
585: return(0);
586: }
590: int VecMAXPY_Seq(int nv,const PetscScalar *alpha,Vec xin,Vec *y)
591: {
592: Vec_Seq *xdata = (Vec_Seq*)xin->data;
593: int n = xin->n,ierr,j,j_rem;
594: PetscScalar *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;
596: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
597: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
598: #endif
601: PetscLogFlops(nv*2*n);
603: xx = xdata->array;
604: switch (j_rem=nv&0x3) {
605: case 3:
606: VecGetArray(y[0],&yy0);
607: VecGetArray(y[1],&yy1);
608: VecGetArray(y[2],&yy2);
609: alpha0 = alpha[0];
610: alpha1 = alpha[1];
611: alpha2 = alpha[2];
612: alpha += 3;
613: APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
614: VecRestoreArray(y[0],&yy0);
615: VecRestoreArray(y[1],&yy1);
616: VecRestoreArray(y[2],&yy2);
617: y += 3;
618: break;
619: case 2:
620: VecGetArray(y[0],&yy0);
621: VecGetArray(y[1],&yy1);
622: alpha0 = alpha[0];
623: alpha1 = alpha[1];
624: alpha +=2;
625: APXY2(xx,alpha0,alpha1,yy0,yy1,n);
626: VecRestoreArray(y[0],&yy0);
627: VecRestoreArray(y[1],&yy1);
628: y +=2;
629: break;
630: case 1:
631: VecGetArray(y[0],&yy0);
632: alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
633: VecRestoreArray(y[0],&yy0);
634: y +=1;
635: break;
636: }
637: for (j=j_rem; j<nv; j+=4) {
638: VecGetArray(y[0],&yy0);
639: VecGetArray(y[1],&yy1);
640: VecGetArray(y[2],&yy2);
641: VecGetArray(y[3],&yy3);
642: alpha0 = alpha[0];
643: alpha1 = alpha[1];
644: alpha2 = alpha[2];
645: alpha3 = alpha[3];
646: alpha += 4;
648: APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
649: VecRestoreArray(y[0],&yy0);
650: VecRestoreArray(y[1],&yy1);
651: VecRestoreArray(y[2],&yy2);
652: VecRestoreArray(y[3],&yy3);
653: y += 4;
654: }
655: return(0);
656: }
660: int VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
661: {
662: Vec_Seq *x = (Vec_Seq *)xin->data;
663: int n = xin->n,ierr;
664: PetscScalar *xx = x->array,*yy;
667: VecGetArray(yin,&yy);
668: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
669: fortranaypx_(&n,alpha,xx,yy);
670: #else
671: {
672: int i;
673: PetscScalar oalpha = *alpha;
674: for (i=0; i<n; i++) {
675: yy[i] = xx[i] + oalpha*yy[i];
676: }
677: }
678: #endif
679: VecRestoreArray(yin,&yy);
680: PetscLogFlops(2*n);
681: return(0);
682: }
684: /*
685: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
686: to be slower than a regular C loop. Hence,we do not include it.
687: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
688: */
692: int VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
693: {
694: Vec_Seq *x = (Vec_Seq *)xin->data;
695: int i,n = xin->n,ierr;
696: PetscScalar *xx = x->array,*yy,*ww,oalpha = *alpha;
699: VecGetArray(yin,&yy);
700: VecGetArray(win,&ww);
701: if (oalpha == 1.0) {
702: PetscLogFlops(n);
703: /* could call BLAS axpy after call to memcopy, but may be slower */
704: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
705: } else if (oalpha == -1.0) {
706: PetscLogFlops(n);
707: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
708: } else if (oalpha == 0.0) {
709: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
710: } else {
711: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
712: fortranwaxpy_(&n,alpha,xx,yy,ww);
713: #else
714: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
715: #endif
716: PetscLogFlops(2*n);
717: }
718: VecRestoreArray(yin,&yy);
719: VecRestoreArray(win,&ww);
720: return(0);
721: }
725: int VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
726: {
727: Vec_Seq *x = (Vec_Seq *)xin->data;
728: int n = xin->n,i,ierr;
729: PetscScalar *xx = x->array,*yy,*ww;
732: VecGetArray(yin,&yy);
733: if (yin != win) {VecGetArray(win,&ww);}
734: else ww = yy;
736: if (ww == xx) {
737: for (i=0; i<n; i++) ww[i] *= yy[i];
738: } else if (ww == yy) {
739: for (i=0; i<n; i++) ww[i] *= xx[i];
740: } else {
741: /* This was suppose to help on SGI but didn't really seem to
742: PetscReal * __restrict www = ww;
743: PetscReal * __restrict yyy = yy;
744: PetscReal * __restrict xxx = xx;
745: for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
746: */
747: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
748: fortranxtimesy_(xx,yy,ww,&n);
749: #else
750: for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
751: #endif
752: }
753: VecRestoreArray(yin,&yy);
754: if (yin != win) {VecRestoreArray(win,&ww);}
755: PetscLogFlops(n);
756: return(0);
757: }
761: int VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
762: {
763: Vec_Seq *x = (Vec_Seq *)xin->data;
764: int n = xin->n,i,ierr;
765: PetscScalar *xx = x->array,*yy,*ww;
768: VecGetArray(yin,&yy);
769: if (yin != win) {VecGetArray(win,&ww);}
770: else {ww = yy;}
771: for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
772: VecRestoreArray(yin,&yy);
773: if (yin != win) {VecRestoreArray(win,&ww);}
774: PetscLogFlops(n);
775: return(0);
776: }
780: int VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
781: {
782: Vec_Seq *x = (Vec_Seq *)xin->data;
783: int n = xin->n,i,ierr;
784: PetscScalar *xx = x->array,*yy;
785: PetscReal m = 0.0;
788: VecGetArray(yin,&yy);
789: for(i = 0; i < n; i++) {
790: if (yy[i] != 0.0) {
791: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
792: } else {
793: m = PetscMax(PetscAbsScalar(xx[i]), m);
794: }
795: }
796: MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,xin->comm);
797: VecRestoreArray(yin,&yy);
798: PetscLogFlops(n);
799: return(0);
800: }
804: int VecGetArray_Seq(Vec vin,PetscScalar *a[])
805: {
806: Vec_Seq *v = (Vec_Seq *)vin->data;
807: int ierr;
810: if (vin->array_gotten) {
811: SETERRQ(1,"Array has already been gotten for this vector,you may\n\
812: have forgotten a call to VecRestoreArray()");
813: }
814: vin->array_gotten = PETSC_TRUE;
816: *a = v->array;
817: PetscObjectTakeAccess(vin);
818: return(0);
819: }
823: int VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
824: {
829: if (!vin->array_gotten) {
830: SETERRQ(1,"Array has not been gotten for this vector, you may\n\
831: have forgotten a call to VecGetArray()");
832: }
833: vin->array_gotten = PETSC_FALSE;
834: if (a) *a = 0; /* now user cannot accidently use it again */
836: PetscObjectGrantAccess(vin);
837: return(0);
838: }
842: int VecResetArray_Seq(Vec vin)
843: {
844: Vec_Seq *v = (Vec_Seq *)vin->data;
847: v->array = v->array_allocated;
848: return(0);
849: }
853: int VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
854: {
855: Vec_Seq *v = (Vec_Seq *)vin->data;
858: v->array = (PetscScalar *)a;
859: return(0);
860: }
864: int VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
865: {
866: Vec_Seq *v = (Vec_Seq *)vin->data;
867: int ierr;
870: if (v->array_allocated) {PetscFree(v->array_allocated);}
871: v->array_allocated = v->array = (PetscScalar *)a;
872: return(0);
873: }
877: int VecGetSize_Seq(Vec vin,int *size)
878: {
880: *size = vin->n;
881: return(0);
882: }
886: int VecConjugate_Seq(Vec xin)
887: {
888: PetscScalar *x = ((Vec_Seq *)xin->data)->array;
889: int n = xin->n;
892: while (n-->0) {
893: *x = PetscConj(*x);
894: x++;
895: }
896: return(0);
897: }
898: