Actual source code: dvec2.c

  2: /*$Id: dvec2.c,v 1.91 2001/09/11 18:14:31 bsmith Exp $*/

  4: /* 
  5:    Defines some vector operation functions that are shared by 
  6:   sequential and parallel vectors.
  7: */
 8:  #include src/vec/impls/dvecimpl.h
 9:  #include src/inline/dot.h
 10:  #include src/inline/setval.h
 11:  #include src/inline/axpy.h

 13: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 16: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
 17: {
 18:   Vec_Seq     *xv = (Vec_Seq *)xin->data;
 19:   int         i,nv_rem,n = xin->n,ierr;
 20:   PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
 21:   Vec         *yy;

 24:   sum0 = 0;
 25:   sum1 = 0;
 26:   sum2 = 0;

 28:   i      = nv;
 29:   nv_rem = nv&0x3;
 30:   yy     = (Vec*)yin;
 31:   x      = xv->array;

 33:   switch (nv_rem) {
 34:   case 3:
 35:     VecGetArray(yy[0],&yy0);
 36:     VecGetArray(yy[1],&yy1);
 37:     VecGetArray(yy[2],&yy2);
 38:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 39:     VecRestoreArray(yy[0],&yy0);
 40:     VecRestoreArray(yy[1],&yy1);
 41:     VecRestoreArray(yy[2],&yy2);
 42:     z[0] = sum0;
 43:     z[1] = sum1;
 44:     z[2] = sum2;
 45:     break;
 46:   case 2:
 47:     VecGetArray(yy[0],&yy0);
 48:     VecGetArray(yy[1],&yy1);
 49:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 50:     VecRestoreArray(yy[0],&yy0);
 51:     VecRestoreArray(yy[1],&yy1);
 52:     z[0] = sum0;
 53:     z[1] = sum1;
 54:     break;
 55:   case 1:
 56:     VecGetArray(yy[0],&yy0);
 57:     fortranmdot1_(x,yy0,&n,&sum0);
 58:     VecRestoreArray(yy[0],&yy0);
 59:     z[0] = sum0;
 60:     break;
 61:   case 0:
 62:     break;
 63:   }
 64:   z  += nv_rem;
 65:   i  -= nv_rem;
 66:   yy += nv_rem;

 68:   while (i >0) {
 69:     sum0 = 0;
 70:     sum1 = 0;
 71:     sum2 = 0;
 72:     sum3 = 0;
 73:     VecGetArray(yy[0],&yy0);
 74:     VecGetArray(yy[1],&yy1);
 75:     VecGetArray(yy[2],&yy2);
 76:     VecGetArray(yy[3],&yy3);
 77:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 78:     VecRestoreArray(yy[0],&yy0);
 79:     VecRestoreArray(yy[1],&yy1);
 80:     VecRestoreArray(yy[2],&yy2);
 81:     VecRestoreArray(yy[3],&yy3);
 82:     yy  += 4;
 83:     z[0] = sum0;
 84:     z[1] = sum1;
 85:     z[2] = sum2;
 86:     z[3] = sum3;
 87:     z   += 4;
 88:     i   -= 4;
 89:   }
 90:   PetscLogFlops(nv*(2*xin->n-1));
 91:   return(0);
 92: }

 94: #else
 97: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
 98: {
 99:   Vec_Seq     *xv = (Vec_Seq *)xin->data;
100:   int          n = xin->n,i,j,nv_rem,j_rem,ierr;
101:   PetscScalar  sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
102:   PetscScalar  * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
103:   Vec          *yy;

106:   sum0 = 0;
107:   sum1 = 0;
108:   sum2 = 0;

110:   i      = nv;
111:   nv_rem = nv&0x3;
112:   yy     = (Vec *)yin;
113:   j      = n;
114:   x      = xv->array;

116:   switch (nv_rem) {
117:   case 3:
118:     VecGetArray(yy[0],&yy0);
119:     VecGetArray(yy[1],&yy1);
120:     VecGetArray(yy[2],&yy2);
121:     switch (j_rem=j&0x3) {
122:     case 3:
123:       x2 = x[2];
124:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125:       sum2 += x2*PetscConj(yy2[2]);
126:     case 2:
127:       x1 = x[1];
128:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129:       sum2 += x1*PetscConj(yy2[1]);
130:     case 1:
131:       x0 = x[0];
132:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133:       sum2 += x0*PetscConj(yy2[0]);
134:     case 0:
135:       x   += j_rem;
136:       yy0 += j_rem;
137:       yy1 += j_rem;
138:       yy2 += j_rem;
139:       j   -= j_rem;
140:       break;
141:     }
142:     while (j>0) {
143:       x0 = x[0];
144:       x1 = x[1];
145:       x2 = x[2];
146:       x3 = x[3];
147:       x += 4;
148: 
149:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152:       j -= 4;
153:     }
154:     z[0] = sum0;
155:     z[1] = sum1;
156:     z[2] = sum2;
157:     VecRestoreArray(yy[0],&yy0);
158:     VecRestoreArray(yy[1],&yy1);
159:     VecRestoreArray(yy[2],&yy2);
160:     break;
161:   case 2:
162:     VecGetArray(yy[0],&yy0);
163:     VecGetArray(yy[1],&yy1);
164:     switch (j_rem=j&0x3) {
165:     case 3:
166:       x2 = x[2];
167:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168:     case 2:
169:       x1 = x[1];
170:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171:     case 1:
172:       x0 = x[0];
173:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174:     case 0:
175:       x   += j_rem;
176:       yy0 += j_rem;
177:       yy1 += j_rem;
178:       j   -= j_rem;
179:       break;
180:     }
181:     while (j>0) {
182:       x0 = x[0];
183:       x1 = x[1];
184:       x2 = x[2];
185:       x3 = x[3];
186:       x += 4;
187: 
188:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190:       j -= 4;
191:     }
192:     z[0] = sum0;
193:     z[1] = sum1;
194: 
195:     VecRestoreArray(yy[0],&yy0);
196:     VecRestoreArray(yy[1],&yy1);
197:     break;
198:   case 1:
199:     VecGetArray(yy[0],&yy0);
200:     switch (j_rem=j&0x3) {
201:     case 3:
202:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203:     case 2:
204:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205:     case 1:
206:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207:     case 0:
208:       x   += j_rem;
209:       yy0 += j_rem;
210:       j   -= j_rem;
211:       break;
212:     }
213:     while (j>0) {
214:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216:       yy0+=4;
217:       j -= 4; x+=4;
218:     }
219:     z[0] = sum0;

221:     VecRestoreArray(yy[0],&yy0);
222:     break;
223:   case 0:
224:     break;
225:   }
226:   z  += nv_rem;
227:   i  -= nv_rem;
228:   yy += nv_rem;

230:   while (i >0) {
231:     sum0 = 0;
232:     sum1 = 0;
233:     sum2 = 0;
234:     sum3 = 0;
235:     VecGetArray(yy[0],&yy0);
236:     VecGetArray(yy[1],&yy1);
237:     VecGetArray(yy[2],&yy2);
238:     VecGetArray(yy[3],&yy3);

240:     j = n;
241:     x = xv->array;
242:     switch (j_rem=j&0x3) {
243:     case 3:
244:       x2 = x[2];
245:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247:     case 2:
248:       x1 = x[1];
249:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255:     case 0:
256:       x   += j_rem;
257:       yy0 += j_rem;
258:       yy1 += j_rem;
259:       yy2 += j_rem;
260:       yy3 += j_rem;
261:       j   -= j_rem;
262:       break;
263:     }
264:     while (j>0) {
265:       x0 = x[0];
266:       x1 = x[1];
267:       x2 = x[2];
268:       x3 = x[3];
269:       x += 4;
270: 
271:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275:       j -= 4;
276:     }
277:     z[0] = sum0;
278:     z[1] = sum1;
279:     z[2] = sum2;
280:     z[3] = sum3;
281:     z   += 4;
282:     i   -= 4;
283:     VecRestoreArray(yy[0],&yy0);
284:     VecRestoreArray(yy[1],&yy1);
285:     VecRestoreArray(yy[2],&yy2);
286:     VecRestoreArray(yy[3],&yy3);
287:     yy  += 4;
288:   }
289:   PetscLogFlops(nv*(2*xin->n-1));
290:   return(0);
291: }
292: #endif

294: /* ----------------------------------------------------------------------------*/
297: int VecMTDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
298: {
299:   Vec_Seq      *xv = (Vec_Seq *)xin->data;
300:   int          n = xin->n,i,j,nv_rem,j_rem,ierr;
301:   PetscScalar  sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
302:   Vec          *yy;
303: 

306:   sum0 = 0;
307:   sum1 = 0;
308:   sum2 = 0;

310:   i      = nv;
311:   nv_rem = nv&0x3;
312:   yy     = (Vec*)yin;
313:   j    = n;
314:   x    = xv->array;

316:   switch (nv_rem) {
317:   case 3:
318:     VecGetArray(yy[0],&yy0);
319:     VecGetArray(yy[1],&yy1);
320:     VecGetArray(yy[2],&yy2);
321:     switch (j_rem=j&0x3) {
322:     case 3:
323:       x2 = x[2];
324:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
325:       sum2 += x2*yy2[2];
326:     case 2:
327:       x1 = x[1];
328:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
329:       sum2 += x1*yy2[1];
330:     case 1:
331:       x0 = x[0];
332:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
333:       sum2 += x0*yy2[0];
334:     case 0:
335:       x  += j_rem;
336:       yy0 += j_rem;
337:       yy1 += j_rem;
338:       yy2 += j_rem;
339:       j  -= j_rem;
340:       break;
341:     }
342:     while (j>0) {
343:       x0 = x[0];
344:       x1 = x[1];
345:       x2 = x[2];
346:       x3 = x[3];
347:       x += 4;
348: 
349:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
350:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
351:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
352:       j -= 4;
353:     }
354:     z[0] = sum0;
355:     z[1] = sum1;
356:     z[2] = sum2;
357:     VecRestoreArray(yy[0],&yy0);
358:     VecRestoreArray(yy[1],&yy1);
359:     VecRestoreArray(yy[2],&yy2);
360:     break;
361:   case 2:
362:     VecGetArray(yy[0],&yy0);
363:     VecGetArray(yy[1],&yy1);
364:     switch (j_rem=j&0x3) {
365:     case 3:
366:       x2 = x[2];
367:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
368:     case 2:
369:       x1 = x[1];
370:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
371:     case 1:
372:       x0 = x[0];
373:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
374:     case 0:
375:       x  += j_rem;
376:       yy0 += j_rem;
377:       yy1 += j_rem;
378:       j  -= j_rem;
379:       break;
380:     }
381:     while (j>0) {
382:       x0 = x[0];
383:       x1 = x[1];
384:       x2 = x[2];
385:       x3 = x[3];
386:       x += 4;
387: 
388:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
389:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
390:       j -= 4;
391:     }
392:     z[0] = sum0;
393:     z[1] = sum1;
394: 
395:     VecRestoreArray(yy[0],&yy0);
396:     VecRestoreArray(yy[1],&yy1);
397:     break;
398:   case 1:
399:     VecGetArray(yy[0],&yy0);
400:     switch (j_rem=j&0x3) {
401:     case 3:
402:       x2 = x[2]; sum0 += x2*yy0[2];
403:     case 2:
404:       x1 = x[1]; sum0 += x1*yy0[1];
405:     case 1:
406:       x0 = x[0]; sum0 += x0*yy0[0];
407:     case 0:
408:       x  += j_rem;
409:       yy0 += j_rem;
410:       j  -= j_rem;
411:       break;
412:     }
413:     while (j>0) {
414:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
415:       j -= 4; x+=4;
416:     }
417:     z[0] = sum0;

419:     VecRestoreArray(yy[0],&yy0);
420:     break;
421:   case 0:
422:     break;
423:   }
424:   z  += nv_rem;
425:   i  -= nv_rem;
426:   yy += nv_rem;

428:   while (i >0) {
429:     sum0 = 0;
430:     sum1 = 0;
431:     sum2 = 0;
432:     sum3 = 0;
433:     VecGetArray(yy[0],&yy0);
434:     VecGetArray(yy[1],&yy1);
435:     VecGetArray(yy[2],&yy2);
436:     VecGetArray(yy[3],&yy3);

438:     j = n;
439:     x = xv->array;
440:     switch (j_rem=j&0x3) {
441:     case 3:
442:       x2 = x[2];
443:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
444:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
445:     case 2:
446:       x1 = x[1];
447:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
448:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
449:     case 1:
450:       x0 = x[0];
451:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
452:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
453:     case 0:
454:       x  += j_rem;
455:       yy0 += j_rem;
456:       yy1 += j_rem;
457:       yy2 += j_rem;
458:       yy3 += j_rem;
459:       j  -= j_rem;
460:       break;
461:     }
462:     while (j>0) {
463:       x0 = x[0];
464:       x1 = x[1];
465:       x2 = x[2];
466:       x3 = x[3];
467:       x += 4;
468: 
469:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
470:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
471:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
472:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
473:       j -= 4;
474:     }
475:     z[0] = sum0;
476:     z[1] = sum1;
477:     z[2] = sum2;
478:     z[3] = sum3;
479:     z   += 4;
480:     i   -= 4;
481:     VecRestoreArray(yy[0],&yy0);
482:     VecRestoreArray(yy[1],&yy1);
483:     VecRestoreArray(yy[2],&yy2);
484:     VecRestoreArray(yy[3],&yy3);
485:     yy  += 4;
486:   }
487:   PetscLogFlops(nv*(2*xin->n-1));
488:   return(0);
489: }
490: 

494: int VecMax_Seq(Vec xin,int* idx,PetscReal * z)
495: {
496:   Vec_Seq      *x = (Vec_Seq*)xin->data;
497:   int          i,j=0,n = xin->n;
498:   PetscReal    max,tmp;
499:   PetscScalar  *xx = x->array;

502:   if (!n) {
503:     max = PETSC_MIN;
504:     j   = -1;
505:   } else {
506: #if defined(PETSC_USE_COMPLEX)
507:       max = PetscRealPart(*xx++); j = 0;
508: #else
509:       max = *xx++; j = 0;
510: #endif
511:     for (i=1; i<n; i++) {
512: #if defined(PETSC_USE_COMPLEX)
513:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
514: #else
515:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
516: #endif
517:     }
518:   }
519:   *z   = max;
520:   if (idx) *idx = j;
521:   return(0);
522: }

526: int VecMin_Seq(Vec xin,int* idx,PetscReal * z)
527: {
528:   Vec_Seq      *x = (Vec_Seq*)xin->data;
529:   int          i,j=0,n = xin->n;
530:   PetscReal    min,tmp;
531:   PetscScalar  *xx = x->array;

534:   if (!n) {
535:     min = PETSC_MAX;
536:     j   = -1;
537:   } else {
538: #if defined(PETSC_USE_COMPLEX)
539:     min = PetscRealPart(*xx++); j = 0;
540: #else
541:     min = *xx++; j = 0;
542: #endif
543:     for (i=1; i<n; i++) {
544: #if defined(PETSC_USE_COMPLEX)
545:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
546: #else
547:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
548: #endif
549:     }
550:   }
551:   *z   = min;
552:   if (idx) *idx = j;
553:   return(0);
554: }

558: int VecSet_Seq(const PetscScalar* alpha,Vec xin)
559: {
560:   Vec_Seq      *x = (Vec_Seq *)xin->data;
561:   int          n = xin->n,ierr;
562:   PetscScalar  *xx = x->array,oalpha = *alpha;

565:   if (oalpha == 0.0) {
566:     PetscMemzero(xx,n*sizeof(PetscScalar));
567:   }
568:   else {
569:     SET(xx,n,oalpha);
570:   }
571:   return(0);
572: }

576: int VecSetRandom_Seq(PetscRandom r,Vec xin)
577: {
578:   int          n = xin->n,i,ierr;
579:   PetscScalar  *xx;

582:   VecGetArray(xin,&xx);
583:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
584:   VecRestoreArray(xin,&xx);
585:   return(0);
586: }

590: int VecMAXPY_Seq(int nv,const PetscScalar *alpha,Vec xin,Vec *y)
591: {
592:   Vec_Seq      *xdata = (Vec_Seq*)xin->data;
593:   int          n = xin->n,ierr,j,j_rem;
594:   PetscScalar  *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;

596: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
597: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
598: #endif

601:   PetscLogFlops(nv*2*n);

603:   xx = xdata->array;
604:   switch (j_rem=nv&0x3) {
605:   case 3:
606:     VecGetArray(y[0],&yy0);
607:     VecGetArray(y[1],&yy1);
608:     VecGetArray(y[2],&yy2);
609:     alpha0 = alpha[0];
610:     alpha1 = alpha[1];
611:     alpha2 = alpha[2];
612:     alpha += 3;
613:     APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
614:     VecRestoreArray(y[0],&yy0);
615:     VecRestoreArray(y[1],&yy1);
616:     VecRestoreArray(y[2],&yy2);
617:     y     += 3;
618:     break;
619:   case 2:
620:     VecGetArray(y[0],&yy0);
621:     VecGetArray(y[1],&yy1);
622:     alpha0 = alpha[0];
623:     alpha1 = alpha[1];
624:     alpha +=2;
625:     APXY2(xx,alpha0,alpha1,yy0,yy1,n);
626:     VecRestoreArray(y[0],&yy0);
627:     VecRestoreArray(y[1],&yy1);
628:     y     +=2;
629:     break;
630:   case 1:
631:     VecGetArray(y[0],&yy0);
632:     alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
633:     VecRestoreArray(y[0],&yy0);
634:     y     +=1;
635:     break;
636:   }
637:   for (j=j_rem; j<nv; j+=4) {
638:     VecGetArray(y[0],&yy0);
639:     VecGetArray(y[1],&yy1);
640:     VecGetArray(y[2],&yy2);
641:     VecGetArray(y[3],&yy3);
642:     alpha0 = alpha[0];
643:     alpha1 = alpha[1];
644:     alpha2 = alpha[2];
645:     alpha3 = alpha[3];
646:     alpha  += 4;

648:     APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
649:     VecRestoreArray(y[0],&yy0);
650:     VecRestoreArray(y[1],&yy1);
651:     VecRestoreArray(y[2],&yy2);
652:     VecRestoreArray(y[3],&yy3);
653:     y      += 4;
654:   }
655:   return(0);
656: }

660: int VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
661: {
662:   Vec_Seq      *x = (Vec_Seq *)xin->data;
663:   int          n = xin->n,ierr;
664:   PetscScalar  *xx = x->array,*yy;

667:   VecGetArray(yin,&yy);
668: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
669:   fortranaypx_(&n,alpha,xx,yy);
670: #else
671:   {
672:     int i;
673:     PetscScalar oalpha = *alpha;
674:     for (i=0; i<n; i++) {
675:       yy[i] = xx[i] + oalpha*yy[i];
676:     }
677:   }
678: #endif
679:   VecRestoreArray(yin,&yy);
680:   PetscLogFlops(2*n);
681:   return(0);
682: }

684: /*
685:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
686:   to be slower than a regular C loop.  Hence,we do not include it.
687:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
688: */

692: int VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
693: {
694:   Vec_Seq      *x = (Vec_Seq *)xin->data;
695:   int          i,n = xin->n,ierr;
696:   PetscScalar  *xx = x->array,*yy,*ww,oalpha = *alpha;

699:   VecGetArray(yin,&yy);
700:   VecGetArray(win,&ww);
701:   if (oalpha == 1.0) {
702:     PetscLogFlops(n);
703:     /* could call BLAS axpy after call to memcopy, but may be slower */
704:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
705:   } else if (oalpha == -1.0) {
706:     PetscLogFlops(n);
707:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
708:   } else if (oalpha == 0.0) {
709:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
710:   } else {
711: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
712:     fortranwaxpy_(&n,alpha,xx,yy,ww);
713: #else
714:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
715: #endif
716:     PetscLogFlops(2*n);
717:   }
718:   VecRestoreArray(yin,&yy);
719:   VecRestoreArray(win,&ww);
720:   return(0);
721: }

725: int VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
726: {
727:   Vec_Seq      *x = (Vec_Seq *)xin->data;
728:   int          n = xin->n,i,ierr;
729:   PetscScalar  *xx = x->array,*yy,*ww;

732:   VecGetArray(yin,&yy);
733:   if (yin != win) {VecGetArray(win,&ww);}
734:   else ww = yy;

736:   if (ww == xx) {
737:     for (i=0; i<n; i++) ww[i] *= yy[i];
738:   } else if (ww == yy) {
739:     for (i=0; i<n; i++) ww[i] *= xx[i];
740:   } else {
741:     /*  This was suppose to help on SGI but didn't really seem to
742:           PetscReal * __restrict www = ww;
743:           PetscReal * __restrict yyy = yy;
744:           PetscReal * __restrict xxx = xx;
745:           for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
746:     */
747: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
748:     fortranxtimesy_(xx,yy,ww,&n);
749: #else
750:     for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
751: #endif
752:   }
753:   VecRestoreArray(yin,&yy);
754:   if (yin != win) {VecRestoreArray(win,&ww);}
755:   PetscLogFlops(n);
756:   return(0);
757: }

761: int VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
762: {
763:   Vec_Seq      *x = (Vec_Seq *)xin->data;
764:   int          n = xin->n,i,ierr;
765:   PetscScalar  *xx = x->array,*yy,*ww;

768:   VecGetArray(yin,&yy);
769:   if (yin != win) {VecGetArray(win,&ww);}
770:   else {ww = yy;}
771:   for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
772:   VecRestoreArray(yin,&yy);
773:   if (yin != win) {VecRestoreArray(win,&ww);}
774:   PetscLogFlops(n);
775:   return(0);
776: }

780: int VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
781: {
782:   Vec_Seq      *x = (Vec_Seq *)xin->data;
783:   int          n = xin->n,i,ierr;
784:   PetscScalar  *xx = x->array,*yy;
785:   PetscReal    m = 0.0;

788:   VecGetArray(yin,&yy);
789:   for(i = 0; i < n; i++) {
790:     if (yy[i] != 0.0) {
791:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
792:     } else {
793:       m = PetscMax(PetscAbsScalar(xx[i]), m);
794:     }
795:   }
796:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,xin->comm);
797:   VecRestoreArray(yin,&yy);
798:   PetscLogFlops(n);
799:   return(0);
800: }

804: int VecGetArray_Seq(Vec vin,PetscScalar *a[])
805: {
806:   Vec_Seq *v = (Vec_Seq *)vin->data;
807:   int     ierr;

810:   if (vin->array_gotten) {
811:     SETERRQ(1,"Array has already been gotten for this vector,you may\n\
812:     have forgotten a call to VecRestoreArray()");
813:   }
814:   vin->array_gotten = PETSC_TRUE;

816:   *a =  v->array;
817:   PetscObjectTakeAccess(vin);
818:   return(0);
819: }

823: int VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
824: {


829:   if (!vin->array_gotten) {
830:     SETERRQ(1,"Array has not been gotten for this vector, you may\n\
831:     have forgotten a call to VecGetArray()");
832:   }
833:   vin->array_gotten = PETSC_FALSE;
834:   if (a) *a         = 0; /* now user cannot accidently use it again */

836:   PetscObjectGrantAccess(vin);
837:   return(0);
838: }

842: int VecResetArray_Seq(Vec vin)
843: {
844:   Vec_Seq *v = (Vec_Seq *)vin->data;

847:   v->array = v->array_allocated;
848:   return(0);
849: }

853: int VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
854: {
855:   Vec_Seq *v = (Vec_Seq *)vin->data;

858:   v->array = (PetscScalar *)a;
859:   return(0);
860: }

864: int VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
865: {
866:   Vec_Seq *v = (Vec_Seq *)vin->data;
867:   int     ierr;

870:   if (v->array_allocated) {PetscFree(v->array_allocated);}
871:   v->array_allocated = v->array = (PetscScalar *)a;
872:   return(0);
873: }

877: int VecGetSize_Seq(Vec vin,int *size)
878: {
880:   *size = vin->n;
881:   return(0);
882: }

886: int VecConjugate_Seq(Vec xin)
887: {
888:   PetscScalar *x = ((Vec_Seq *)xin->data)->array;
889:   int         n = xin->n;

892:   while (n-->0) {
893:     *x = PetscConj(*x);
894:     x++;
895:   }
896:   return(0);
897: }
898: