Actual source code: pvec2.c
1: /*$Id: pvec2.c,v 1.57 2001/09/11 16:32:01 bsmith Exp $*/
3: /*
4: Code for some of the parallel vector primatives.
5: */
6: #include src/vec/impls/mpi/pvecimpl.h
7: #include src/inline/dot.h
8: #include petscblaslapack.h
10: #define do_not_use_ethernet
11: int Ethernet_Allreduce(PetscReal *in,PetscReal *out,int n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
12: {
13: int i,rank,size,ierr;
14: MPI_Status status;
17: MPI_Comm_size(comm,&size);
18: MPI_Comm_rank(comm,&rank);
20: if (rank) {
21: MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);
22: for (i =0; i<n; i++) in[i] += out[i];
23: }
24: if (rank != size - 1) {
25: MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);
26: }
27: if (rank == size-1) {
28: for (i=0; i<n; i++) out[i] = in[i];
29: } else {
30: MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);
31: }
32: if (rank) {
33: MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);
34: }
35: return 0;
36: }
41: int VecMDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
42: {
43: PetscScalar awork[128],*work = awork;
44: int ierr;
47: if (nv > 128) {
48: PetscMalloc(nv*sizeof(PetscScalar),&work);
49: }
50: VecMDot_Seq(nv,xin,y,work);
51: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
52: if (nv > 128) {
53: PetscFree(work);
54: }
55: return(0);
56: }
60: int VecMTDot_MPI(int nv,Vec xin,const Vec y[],PetscScalar *z)
61: {
62: PetscScalar awork[128],*work = awork;
63: int ierr;
66: if (nv > 128) {
67: PetscMalloc(nv*sizeof(PetscScalar),&work);
68: }
69: VecMTDot_Seq(nv,xin,y,work);
70: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
71: if (nv > 128) {
72: PetscFree(work);
73: }
74: return(0);
75: }
79: int VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
80: {
81: Vec_MPI *x = (Vec_MPI*)xin->data;
82: PetscReal sum,work = 0.0;
83: PetscScalar *xx = x->array;
84: int n = xin->n,ierr;
87: if (type == NORM_2 || type == NORM_FROBENIUS) {
89: #if defined(PETSC_HAVE_SLOW_NRM2)
90: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
91: fortrannormsqr_(xx,&n,&work);
92: #elif defined(PETSC_USE_UNROLLED_NORM)
93: switch (n & 0x3) {
94: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
95: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
96: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
97: }
98: while (n>0) {
99: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
100: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
101: xx += 4; n -= 4;
102: }
103: #else
104: {int i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
105: #endif
106: #else
107: {int one = 1;
108: work = BLnrm2_(&n,xx,&one);
109: work *= work;
110: }
111: #endif
112: MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);
113: *z = sqrt(sum);
114: PetscLogFlops(2*xin->n);
115: } else if (type == NORM_1) {
116: /* Find the local part */
117: VecNorm_Seq(xin,NORM_1,&work);
118: /* Find the global max */
119: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);
120: } else if (type == NORM_INFINITY) {
121: /* Find the local max */
122: VecNorm_Seq(xin,NORM_INFINITY,&work);
123: /* Find the global max */
124: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
125: } else if (type == NORM_1_AND_2) {
126: PetscReal temp[2];
127: VecNorm_Seq(xin,NORM_1,temp);
128: VecNorm_Seq(xin,NORM_2,temp+1);
129: temp[1] = temp[1]*temp[1];
130: MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);
131: z[1] = sqrt(z[1]);
132: }
133: return(0);
134: }
136: /*
137: These two functions are the MPI reduction operation used for max and min with index
138: The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
139: MPI operator Vec[Max,Min]_Local_Op.
140: */
141: MPI_Op VecMax_Local_Op = 0;
142: MPI_Op VecMin_Local_Op = 0;
144: EXTERN_C_BEGIN
147: void VecMax_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
148: {
149: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
152: if (*datatype != MPIU_REAL) {
153: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
154: MPI_Abort(MPI_COMM_WORLD,1);
155: }
156: if (xin[0] > xout[0]) {
157: xout[0] = xin[0];
158: xout[1] = xin[1];
159: }
160: PetscStackPop;
161: return; /* cannot return a value */
162: }
163: EXTERN_C_END
165: EXTERN_C_BEGIN
168: void VecMin_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
169: {
170: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
173: if (*datatype != MPIU_REAL) {
174: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
175: MPI_Abort(MPI_COMM_WORLD,1);
176: }
177: if (xin[0] < xout[0]) {
178: xout[0] = xin[0];
179: xout[1] = xin[1];
180: }
181: PetscStackPop;
182: return;
183: }
184: EXTERN_C_END
188: int VecMax_MPI(Vec xin,int *idx,PetscReal *z)
189: {
190: int ierr;
191: PetscReal work;
194: /* Find the local max */
195: VecMax_Seq(xin,idx,&work);
197: /* Find the global max */
198: if (!idx) {
199: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);
200: } else {
201: PetscReal work2[2],z2[2];
202: int rstart;
204: if (!VecMax_Local_Op) {
205: MPI_Op_create(VecMax_Local,1,&VecMax_Local_Op);
206: }
207:
208: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
209: work2[0] = work;
210: work2[1] = *idx + rstart;
211: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);
212: *z = z2[0];
213: *idx = (int)z2[1];
214: }
215: return(0);
216: }
220: int VecMin_MPI(Vec xin,int *idx,PetscReal *z)
221: {
222: int ierr;
223: PetscReal work;
226: /* Find the local Min */
227: VecMin_Seq(xin,idx,&work);
229: /* Find the global Min */
230: if (!idx) {
231: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);
232: } else {
233: PetscReal work2[2],z2[2];
234: int rstart;
236: if (!VecMin_Local_Op) {
237: MPI_Op_create(VecMin_Local,1,&VecMin_Local_Op);
238: }
239:
240: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
241: work2[0] = work;
242: work2[1] = *idx + rstart;
243: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);
244: *z = z2[0];
245: *idx = (int)z2[1];
246: }
247: return(0);
248: }