Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/inline/spops.h
6: /*
7: Local utility routine that creates a mapping from the global column
8: number to the local number in the off-diagonal part of the local
9: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
10: a slightly higher hash table cost; without it it is not scalable (each processor
11: has an order N integer array but is fast to acess.
12: */
15: int CreateColmap_MPIAIJ_Private(Mat mat)
16: {
17: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18: int n = aij->B->n,i,ierr;
21: #if defined (PETSC_USE_CTABLE)
22: PetscTableCreate(n,&aij->colmap);
23: for (i=0; i<n; i++){
24: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
25: }
26: #else
27: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
28: PetscLogObjectMemory(mat,mat->N*sizeof(int));
29: PetscMemzero(aij->colmap,mat->N*sizeof(int));
30: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31: #endif
32: return(0);
33: }
35: #define CHUNKSIZE 15
36: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37: { \
38: \
39: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40: rmax = aimax[row]; nrow = ailen[row]; \
41: col1 = col - shift; \
42: \
43: low = 0; high = nrow; \
44: while (high-low > 5) { \
45: t = (low+high)/2; \
46: if (rp[t] > col) high = t; \
47: else low = t; \
48: } \
49: for (_i=low; _i<high; _i++) { \
50: if (rp[_i] > col1) break; \
51: if (rp[_i] == col1) { \
52: if (addv == ADD_VALUES) ap[_i] += value; \
53: else ap[_i] = value; \
54: goto a_noinsert; \
55: } \
56: } \
57: if (nonew == 1) goto a_noinsert; \
58: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59: if (nrow >= rmax) { \
60: /* there is no extra room in row, therefore enlarge */ \
61: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62: PetscScalar *new_a; \
63: \
64: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65: \
66: /* malloc new storage space */ \
67: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68: PetscMalloc(len,&new_a); \
69: new_j = (int*)(new_a + new_nz); \
70: new_i = new_j + new_nz; \
71: \
72: /* copy over old data into new slots */ \
73: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
76: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78: len*sizeof(int)); \
79: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
80: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81: len*sizeof(PetscScalar)); \
82: /* free up old matrix storage */ \
83: \
84: PetscFree(a->a); \
85: if (!a->singlemalloc) { \
86: PetscFree(a->i); \
87: PetscFree(a->j); \
88: } \
89: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \
90: a->singlemalloc = PETSC_TRUE; \
91: \
92: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93: rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95: a->maxnz += CHUNKSIZE; \
96: a->reallocs++; \
97: } \
98: N = nrow++ - 1; a->nz++; \
99: /* shift up all the later entries in this row */ \
100: for (ii=N; ii>=_i; ii--) { \
101: rp[ii+1] = rp[ii]; \
102: ap[ii+1] = ap[ii]; \
103: } \
104: rp[_i] = col1; \
105: ap[_i] = value; \
106: a_noinsert: ; \
107: ailen[row] = nrow; \
108: }
110: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111: { \
112: \
113: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114: rmax = bimax[row]; nrow = bilen[row]; \
115: col1 = col - shift; \
116: \
117: low = 0; high = nrow; \
118: while (high-low > 5) { \
119: t = (low+high)/2; \
120: if (rp[t] > col) high = t; \
121: else low = t; \
122: } \
123: for (_i=low; _i<high; _i++) { \
124: if (rp[_i] > col1) break; \
125: if (rp[_i] == col1) { \
126: if (addv == ADD_VALUES) ap[_i] += value; \
127: else ap[_i] = value; \
128: goto b_noinsert; \
129: } \
130: } \
131: if (nonew == 1) goto b_noinsert; \
132: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133: if (nrow >= rmax) { \
134: /* there is no extra room in row, therefore enlarge */ \
135: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136: PetscScalar *new_a; \
137: \
138: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139: \
140: /* malloc new storage space */ \
141: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142: PetscMalloc(len,&new_a); \
143: new_j = (int*)(new_a + new_nz); \
144: new_i = new_j + new_nz; \
145: \
146: /* copy over old data into new slots */ \
147: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
150: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152: len*sizeof(int)); \
153: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
154: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155: len*sizeof(PetscScalar)); \
156: /* free up old matrix storage */ \
157: \
158: PetscFree(b->a); \
159: if (!b->singlemalloc) { \
160: PetscFree(b->i); \
161: PetscFree(b->j); \
162: } \
163: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \
164: b->singlemalloc = PETSC_TRUE; \
165: \
166: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167: rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169: b->maxnz += CHUNKSIZE; \
170: b->reallocs++; \
171: } \
172: N = nrow++ - 1; b->nz++; \
173: /* shift up all the later entries in this row */ \
174: for (ii=N; ii>=_i; ii--) { \
175: rp[ii+1] = rp[ii]; \
176: ap[ii+1] = ap[ii]; \
177: } \
178: rp[_i] = col1; \
179: ap[_i] = value; \
180: b_noinsert: ; \
181: bilen[row] = nrow; \
182: }
186: int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187: {
188: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
189: PetscScalar value;
190: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
191: int cstart = aij->cstart,cend = aij->cend,row,col;
192: PetscTruth roworiented = aij->roworiented;
194: /* Some Variables required in the macro */
195: Mat A = aij->A;
196: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
197: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
198: PetscScalar *aa = a->a;
199: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
200: Mat B = aij->B;
201: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
202: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
203: PetscScalar *ba = b->a;
205: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
206: int nonew = a->nonew,shift=0;
207: PetscScalar *ap;
210: for (i=0; i<m; i++) {
211: if (im[i] < 0) continue;
212: #if defined(PETSC_USE_BOPT_g)
213: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
214: #endif
215: if (im[i] >= rstart && im[i] < rend) {
216: row = im[i] - rstart;
217: for (j=0; j<n; j++) {
218: if (in[j] >= cstart && in[j] < cend){
219: col = in[j] - cstart;
220: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
221: if (ignorezeroentries && value == 0.0) continue;
222: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
223: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
224: } else if (in[j] < 0) continue;
225: #if defined(PETSC_USE_BOPT_g)
226: else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
227: #endif
228: else {
229: if (mat->was_assembled) {
230: if (!aij->colmap) {
231: CreateColmap_MPIAIJ_Private(mat);
232: }
233: #if defined (PETSC_USE_CTABLE)
234: PetscTableFind(aij->colmap,in[j]+1,&col);
235: col--;
236: #else
237: col = aij->colmap[in[j]] - 1;
238: #endif
239: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240: DisAssemble_MPIAIJ(mat);
241: col = in[j];
242: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243: B = aij->B;
244: b = (Mat_SeqAIJ*)B->data;
245: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246: ba = b->a;
247: }
248: } else col = in[j];
249: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250: if (ignorezeroentries && value == 0.0) continue;
251: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
253: }
254: }
255: } else {
256: if (!aij->donotstash) {
257: if (roworiented) {
258: if (ignorezeroentries && v[i*n] == 0.0) continue;
259: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
260: } else {
261: if (ignorezeroentries && v[i] == 0.0) continue;
262: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
263: }
264: }
265: }
266: }
267: return(0);
268: }
272: int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
273: {
274: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
275: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
276: int cstart = aij->cstart,cend = aij->cend,row,col;
279: for (i=0; i<m; i++) {
280: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
281: if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
282: if (idxm[i] >= rstart && idxm[i] < rend) {
283: row = idxm[i] - rstart;
284: for (j=0; j<n; j++) {
285: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
286: if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
287: if (idxn[j] >= cstart && idxn[j] < cend){
288: col = idxn[j] - cstart;
289: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
290: } else {
291: if (!aij->colmap) {
292: CreateColmap_MPIAIJ_Private(mat);
293: }
294: #if defined (PETSC_USE_CTABLE)
295: PetscTableFind(aij->colmap,idxn[j]+1,&col);
296: col --;
297: #else
298: col = aij->colmap[idxn[j]] - 1;
299: #endif
300: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301: else {
302: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
303: }
304: }
305: }
306: } else {
307: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
308: }
309: }
310: return(0);
311: }
315: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316: {
317: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
318: int ierr,nstash,reallocs;
319: InsertMode addv;
322: if (aij->donotstash) {
323: return(0);
324: }
326: /* make sure all processors are either in INSERTMODE or ADDMODE */
327: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
328: if (addv == (ADD_VALUES|INSERT_VALUES)) {
329: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330: }
331: mat->insertmode = addv; /* in case this processor had no cache */
333: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
334: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
335: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336: return(0);
337: }
342: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343: {
344: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
345: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
346: int i,j,rstart,ncols,n,ierr,flg;
347: int *row,*col,other_disassembled;
348: PetscScalar *val;
349: InsertMode addv = mat->insertmode;
352: if (!aij->donotstash) {
353: while (1) {
354: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
355: if (!flg) break;
357: for (i=0; i<n;) {
358: /* Now identify the consecutive vals belonging to the same row */
359: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
360: if (j < n) ncols = j-i;
361: else ncols = n-i;
362: /* Now assemble all these values with a single function call */
363: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
364: i = j;
365: }
366: }
367: MatStashScatterEnd_Private(&mat->stash);
368: }
369:
370: MatAssemblyBegin(aij->A,mode);
371: MatAssemblyEnd(aij->A,mode);
373: /* determine if any processor has disassembled, if so we must
374: also disassemble ourselfs, in order that we may reassemble. */
375: /*
376: if nonzero structure of submatrix B cannot change then we know that
377: no processor disassembled thus we can skip this stuff
378: */
379: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
380: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
381: if (mat->was_assembled && !other_disassembled) {
382: DisAssemble_MPIAIJ(mat);
383: }
384: }
386: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387: MatSetUpMultiply_MPIAIJ(mat);
388: }
389: MatAssemblyBegin(aij->B,mode);
390: MatAssemblyEnd(aij->B,mode);
392: if (aij->rowvalues) {
393: PetscFree(aij->rowvalues);
394: aij->rowvalues = 0;
395: }
397: /* used by MatAXPY() */
398: a->xtoy = 0; b->xtoy = 0;
399: a->XtoY = 0; b->XtoY = 0;
401: return(0);
402: }
406: int MatZeroEntries_MPIAIJ(Mat A)
407: {
408: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
409: int ierr;
412: MatZeroEntries(l->A);
413: MatZeroEntries(l->B);
414: return(0);
415: }
419: int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420: {
421: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
422: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
423: int *nprocs,j,idx,nsends,row;
424: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425: int *rvalues,tag = A->tag,count,base,slen,n,*source;
426: int *lens,imdex,*lrows,*values,rstart=l->rstart;
427: MPI_Comm comm = A->comm;
428: MPI_Request *send_waits,*recv_waits;
429: MPI_Status recv_status,*send_status;
430: IS istmp;
431: PetscTruth found;
434: ISGetLocalSize(is,&N);
435: ISGetIndices(is,&rows);
437: /* first count number of contributors to each processor */
438: PetscMalloc(2*size*sizeof(int),&nprocs);
439: PetscMemzero(nprocs,2*size*sizeof(int));
440: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
441: for (i=0; i<N; i++) {
442: idx = rows[i];
443: found = PETSC_FALSE;
444: for (j=0; j<size; j++) {
445: if (idx >= owners[j] && idx < owners[j+1]) {
446: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
447: }
448: }
449: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
450: }
451: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453: /* inform other processors of number of messages and max length*/
454: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
456: /* post receives: */
457: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
458: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
459: for (i=0; i<nrecvs; i++) {
460: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
461: }
463: /* do sends:
464: 1) starts[i] gives the starting index in svalues for stuff going to
465: the ith processor
466: */
467: PetscMalloc((N+1)*sizeof(int),&svalues);
468: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
469: PetscMalloc((size+1)*sizeof(int),&starts);
470: starts[0] = 0;
471: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
472: for (i=0; i<N; i++) {
473: svalues[starts[owner[i]]++] = rows[i];
474: }
475: ISRestoreIndices(is,&rows);
477: starts[0] = 0;
478: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479: count = 0;
480: for (i=0; i<size; i++) {
481: if (nprocs[2*i+1]) {
482: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
483: }
484: }
485: PetscFree(starts);
487: base = owners[rank];
489: /* wait on receives */
490: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
491: source = lens + nrecvs;
492: count = nrecvs; slen = 0;
493: while (count) {
494: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
495: /* unpack receives into our local space */
496: MPI_Get_count(&recv_status,MPI_INT,&n);
497: source[imdex] = recv_status.MPI_SOURCE;
498: lens[imdex] = n;
499: slen += n;
500: count--;
501: }
502: PetscFree(recv_waits);
503:
504: /* move the data into the send scatter */
505: PetscMalloc((slen+1)*sizeof(int),&lrows);
506: count = 0;
507: for (i=0; i<nrecvs; i++) {
508: values = rvalues + i*nmax;
509: for (j=0; j<lens[i]; j++) {
510: lrows[count++] = values[j] - base;
511: }
512: }
513: PetscFree(rvalues);
514: PetscFree(lens);
515: PetscFree(owner);
516: PetscFree(nprocs);
517:
518: /* actually zap the local rows */
519: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
520: PetscLogObjectParent(A,istmp);
522: /*
523: Zero the required rows. If the "diagonal block" of the matrix
524: is square and the user wishes to set the diagonal we use seperate
525: code so that MatSetValues() is not called for each diagonal allocating
526: new memory, thus calling lots of mallocs and slowing things down.
528: Contributed by: Mathew Knepley
529: */
530: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
531: MatZeroRows(l->B,istmp,0);
532: if (diag && (l->A->M == l->A->N)) {
533: MatZeroRows(l->A,istmp,diag);
534: } else if (diag) {
535: MatZeroRows(l->A,istmp,0);
536: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
537: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
538: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
539: }
540: for (i = 0; i < slen; i++) {
541: row = lrows[i] + rstart;
542: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
543: }
544: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
545: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
546: } else {
547: MatZeroRows(l->A,istmp,0);
548: }
549: ISDestroy(istmp);
550: PetscFree(lrows);
552: /* wait on sends */
553: if (nsends) {
554: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
555: MPI_Waitall(nsends,send_waits,send_status);
556: PetscFree(send_status);
557: }
558: PetscFree(send_waits);
559: PetscFree(svalues);
561: return(0);
562: }
566: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
567: {
568: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
569: int ierr,nt;
572: VecGetLocalSize(xx,&nt);
573: if (nt != A->n) {
574: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
575: }
576: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
577: (*a->A->ops->mult)(a->A,xx,yy);
578: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
579: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
580: return(0);
581: }
585: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586: {
587: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588: int ierr;
591: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
592: (*a->A->ops->multadd)(a->A,xx,yy,zz);
593: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
594: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
595: return(0);
596: }
600: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
601: {
602: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603: int ierr;
606: /* do nondiagonal part */
607: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
608: /* send it on its way */
609: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
610: /* do local part */
611: (*a->A->ops->multtranspose)(a->A,xx,yy);
612: /* receive remote parts: note this assumes the values are not actually */
613: /* inserted in yy until the next line, which is true for my implementation*/
614: /* but is not perhaps always true. */
615: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
616: return(0);
617: }
619: EXTERN_C_BEGIN
622: int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
623: {
624: MPI_Comm comm;
625: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627: IS Me,Notme;
628: int M,N,first,last,*notme,ntids,i, ierr;
632: /* Easy test: symmetric diagonal block */
633: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634: MatIsTranspose(Adia,Bdia,f);
635: if (!*f) return(0);
636: PetscObjectGetComm((PetscObject)Amat,&comm);
637: MPI_Comm_size(comm,&ntids);
638: if (ntids==1) return(0);
640: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
641: MatGetSize(Amat,&M,&N);
642: MatGetOwnershipRange(Amat,&first,&last);
643: PetscMalloc((N-last+first)*sizeof(int),¬me);
644: for (i=0; i<first; i++) notme[i] = i;
645: for (i=last; i<M; i++) notme[i-last+first] = i;
646: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
647: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
648: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
649: Aoff = Aoffs[0];
650: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
651: Boff = Boffs[0];
652: MatIsTranspose(Aoff,Boff,f);
653: MatDestroyMatrices(1,&Aoffs);
654: MatDestroyMatrices(1,&Boffs);
655: ISDestroy(Me);
656: ISDestroy(Notme);
658: return(0);
659: }
660: EXTERN_C_END
664: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
665: {
666: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
667: int ierr;
670: /* do nondiagonal part */
671: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
672: /* send it on its way */
673: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
674: /* do local part */
675: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
676: /* receive remote parts: note this assumes the values are not actually */
677: /* inserted in yy until the next line, which is true for my implementation*/
678: /* but is not perhaps always true. */
679: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
680: return(0);
681: }
683: /*
684: This only works correctly for square matrices where the subblock A->A is the
685: diagonal block
686: */
689: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
690: {
691: int ierr;
692: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
695: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
696: if (a->rstart != a->cstart || a->rend != a->cend) {
697: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
698: }
699: MatGetDiagonal(a->A,v);
700: return(0);
701: }
705: int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
706: {
707: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
708: int ierr;
711: MatScale(aa,a->A);
712: MatScale(aa,a->B);
713: return(0);
714: }
718: int MatDestroy_MPIAIJ(Mat mat)
719: {
720: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721: int ierr;
724: #if defined(PETSC_USE_LOG)
725: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
726: #endif
727: MatStashDestroy_Private(&mat->stash);
728: PetscFree(aij->rowners);
729: MatDestroy(aij->A);
730: MatDestroy(aij->B);
731: #if defined (PETSC_USE_CTABLE)
732: if (aij->colmap) {PetscTableDelete(aij->colmap);}
733: #else
734: if (aij->colmap) {PetscFree(aij->colmap);}
735: #endif
736: if (aij->garray) {PetscFree(aij->garray);}
737: if (aij->lvec) {VecDestroy(aij->lvec);}
738: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
739: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
740: PetscFree(aij);
741: return(0);
742: }
746: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747: {
748: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
749: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
750: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
751: int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
752: int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
753: PetscScalar *column_values;
756: MPI_Comm_rank(mat->comm,&rank);
757: MPI_Comm_size(mat->comm,&size);
758: nz = A->nz + B->nz;
759: if (rank == 0) {
760: header[0] = MAT_FILE_COOKIE;
761: header[1] = mat->M;
762: header[2] = mat->N;
763: MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
764: PetscViewerBinaryGetDescriptor(viewer,&fd);
765: PetscBinaryWrite(fd,header,4,PETSC_INT,1);
766: /* get largest number of rows any processor has */
767: rlen = mat->m;
768: PetscMapGetGlobalRange(mat->rmap,&range);
769: for (i=1; i<size; i++) {
770: rlen = PetscMax(rlen,range[i+1] - range[i]);
771: }
772: } else {
773: MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
774: rlen = mat->m;
775: }
777: /* load up the local row counts */
778: PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
779: for (i=0; i<mat->m; i++) {
780: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781: }
783: /* store the row lengths to the file */
784: if (rank == 0) {
785: MPI_Status status;
786: PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
787: for (i=1; i<size; i++) {
788: rlen = range[i+1] - range[i];
789: MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
790: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
791: }
792: } else {
793: MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
794: }
795: PetscFree(row_lengths);
797: /* load up the local column indices */
798: nzmax = nz; /* )th processor needs space a largest processor needs */
799: MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
800: PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
801: cnt = 0;
802: for (i=0; i<mat->m; i++) {
803: for (j=B->i[i]; j<B->i[i+1]; j++) {
804: if ( (col = garray[B->j[j]]) > cstart) break;
805: column_indices[cnt++] = col;
806: }
807: for (k=A->i[i]; k<A->i[i+1]; k++) {
808: column_indices[cnt++] = A->j[k] + cstart;
809: }
810: for (; j<B->i[i+1]; j++) {
811: column_indices[cnt++] = garray[B->j[j]];
812: }
813: }
814: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
816: /* store the column indices to the file */
817: if (rank == 0) {
818: MPI_Status status;
819: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
820: for (i=1; i<size; i++) {
821: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
822: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
823: MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
824: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
825: }
826: } else {
827: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
828: MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
829: }
830: PetscFree(column_indices);
832: /* load up the local column values */
833: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
834: cnt = 0;
835: for (i=0; i<mat->m; i++) {
836: for (j=B->i[i]; j<B->i[i+1]; j++) {
837: if ( garray[B->j[j]] > cstart) break;
838: column_values[cnt++] = B->a[j];
839: }
840: for (k=A->i[i]; k<A->i[i+1]; k++) {
841: column_values[cnt++] = A->a[k];
842: }
843: for (; j<B->i[i+1]; j++) {
844: column_values[cnt++] = B->a[j];
845: }
846: }
847: if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
849: /* store the column values to the file */
850: if (rank == 0) {
851: MPI_Status status;
852: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
853: for (i=1; i<size; i++) {
854: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
855: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
856: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
857: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
858: }
859: } else {
860: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
861: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
862: }
863: PetscFree(column_values);
864: return(0);
865: }
869: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870: {
871: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
872: int ierr,rank = aij->rank,size = aij->size;
873: PetscTruth isdraw,isascii,flg,isbinary;
874: PetscViewer sviewer;
875: PetscViewerFormat format;
878: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
880: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881: if (isascii) {
882: PetscViewerGetFormat(viewer,&format);
883: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884: MatInfo info;
885: MPI_Comm_rank(mat->comm,&rank);
886: MatGetInfo(mat,MAT_LOCAL,&info);
887: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
888: if (flg) {
889: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
890: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
891: } else {
892: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
893: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
894: }
895: MatGetInfo(aij->A,MAT_LOCAL,&info);
896: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
897: MatGetInfo(aij->B,MAT_LOCAL,&info);
898: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
899: PetscViewerFlush(viewer);
900: VecScatterView(aij->Mvctx,viewer);
901: return(0);
902: } else if (format == PETSC_VIEWER_ASCII_INFO) {
903: return(0);
904: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
905: return(0);
906: }
907: } else if (isbinary) {
908: if (size == 1) {
909: PetscObjectSetName((PetscObject)aij->A,mat->name);
910: MatView(aij->A,viewer);
911: } else {
912: MatView_MPIAIJ_Binary(mat,viewer);
913: }
914: return(0);
915: } else if (isdraw) {
916: PetscDraw draw;
917: PetscTruth isnull;
918: PetscViewerDrawGetDraw(viewer,0,&draw);
919: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
920: }
922: if (size == 1) {
923: PetscObjectSetName((PetscObject)aij->A,mat->name);
924: MatView(aij->A,viewer);
925: } else {
926: /* assemble the entire matrix onto first processor. */
927: Mat A;
928: Mat_SeqAIJ *Aloc;
929: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
930: PetscScalar *a;
932: if (!rank) {
933: MatCreate(mat->comm,M,N,M,N,&A);
934: } else {
935: MatCreate(mat->comm,0,0,M,N,&A);
936: }
937: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
938: MatSetType(A,MATMPIAIJ);
939: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
940: PetscLogObjectParent(mat,A);
942: /* copy over the A part */
943: Aloc = (Mat_SeqAIJ*)aij->A->data;
944: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
945: row = aij->rstart;
946: for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
947: for (i=0; i<m; i++) {
948: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
949: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
950: }
951: aj = Aloc->j;
952: for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
954: /* copy over the B part */
955: Aloc = (Mat_SeqAIJ*)aij->B->data;
956: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
957: row = aij->rstart;
958: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
959: ct = cols;
960: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
961: for (i=0; i<m; i++) {
962: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
963: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
964: }
965: PetscFree(ct);
966: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
967: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
968: /*
969: Everyone has to call to draw the matrix since the graphics waits are
970: synchronized across all processors that share the PetscDraw object
971: */
972: PetscViewerGetSingleton(viewer,&sviewer);
973: if (!rank) {
974: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
975: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
976: }
977: PetscViewerRestoreSingleton(viewer,&sviewer);
978: MatDestroy(A);
979: }
980: return(0);
981: }
985: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
986: {
987: int ierr;
988: PetscTruth isascii,isdraw,issocket,isbinary;
989:
991: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
992: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
993: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
994: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
995: if (isascii || isdraw || isbinary || issocket) {
996: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
997: } else {
998: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
999: }
1000: return(0);
1001: }
1007: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1008: {
1009: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1010: int ierr;
1011: Vec bb1;
1012: PetscScalar mone=-1.0;
1015: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1017: VecDuplicate(bb,&bb1);
1019: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1020: if (flag & SOR_ZERO_INITIAL_GUESS) {
1021: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1022: its--;
1023: }
1024:
1025: while (its--) {
1026: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1027: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1029: /* update rhs: bb1 = bb - B*x */
1030: VecScale(&mone,mat->lvec);
1031: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1033: /* local sweep */
1034: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1035:
1036: }
1037: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1038: if (flag & SOR_ZERO_INITIAL_GUESS) {
1039: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1040: its--;
1041: }
1042: while (its--) {
1043: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1044: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1046: /* update rhs: bb1 = bb - B*x */
1047: VecScale(&mone,mat->lvec);
1048: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1050: /* local sweep */
1051: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1052:
1053: }
1054: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1055: if (flag & SOR_ZERO_INITIAL_GUESS) {
1056: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1057: its--;
1058: }
1059: while (its--) {
1060: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1061: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1063: /* update rhs: bb1 = bb - B*x */
1064: VecScale(&mone,mat->lvec);
1065: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1067: /* local sweep */
1068: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1069:
1070: }
1071: } else {
1072: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1073: }
1075: VecDestroy(bb1);
1076: return(0);
1077: }
1081: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082: {
1083: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084: Mat A = mat->A,B = mat->B;
1085: int ierr;
1086: PetscReal isend[5],irecv[5];
1089: info->block_size = 1.0;
1090: MatGetInfo(A,MAT_LOCAL,info);
1091: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092: isend[3] = info->memory; isend[4] = info->mallocs;
1093: MatGetInfo(B,MAT_LOCAL,info);
1094: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095: isend[3] += info->memory; isend[4] += info->mallocs;
1096: if (flag == MAT_LOCAL) {
1097: info->nz_used = isend[0];
1098: info->nz_allocated = isend[1];
1099: info->nz_unneeded = isend[2];
1100: info->memory = isend[3];
1101: info->mallocs = isend[4];
1102: } else if (flag == MAT_GLOBAL_MAX) {
1103: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1104: info->nz_used = irecv[0];
1105: info->nz_allocated = irecv[1];
1106: info->nz_unneeded = irecv[2];
1107: info->memory = irecv[3];
1108: info->mallocs = irecv[4];
1109: } else if (flag == MAT_GLOBAL_SUM) {
1110: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1111: info->nz_used = irecv[0];
1112: info->nz_allocated = irecv[1];
1113: info->nz_unneeded = irecv[2];
1114: info->memory = irecv[3];
1115: info->mallocs = irecv[4];
1116: }
1117: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1118: info->fill_ratio_needed = 0;
1119: info->factor_mallocs = 0;
1120: info->rows_global = (double)matin->M;
1121: info->columns_global = (double)matin->N;
1122: info->rows_local = (double)matin->m;
1123: info->columns_local = (double)matin->N;
1125: return(0);
1126: }
1130: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131: {
1132: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1133: int ierr;
1136: switch (op) {
1137: case MAT_NO_NEW_NONZERO_LOCATIONS:
1138: case MAT_YES_NEW_NONZERO_LOCATIONS:
1139: case MAT_COLUMNS_UNSORTED:
1140: case MAT_COLUMNS_SORTED:
1141: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1142: case MAT_KEEP_ZEROED_ROWS:
1143: case MAT_NEW_NONZERO_LOCATION_ERR:
1144: case MAT_USE_INODES:
1145: case MAT_DO_NOT_USE_INODES:
1146: case MAT_IGNORE_ZERO_ENTRIES:
1147: MatSetOption(a->A,op);
1148: MatSetOption(a->B,op);
1149: break;
1150: case MAT_ROW_ORIENTED:
1151: a->roworiented = PETSC_TRUE;
1152: MatSetOption(a->A,op);
1153: MatSetOption(a->B,op);
1154: break;
1155: case MAT_ROWS_SORTED:
1156: case MAT_ROWS_UNSORTED:
1157: case MAT_YES_NEW_DIAGONALS:
1158: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1159: break;
1160: case MAT_COLUMN_ORIENTED:
1161: a->roworiented = PETSC_FALSE;
1162: MatSetOption(a->A,op);
1163: MatSetOption(a->B,op);
1164: break;
1165: case MAT_IGNORE_OFF_PROC_ENTRIES:
1166: a->donotstash = PETSC_TRUE;
1167: break;
1168: case MAT_NO_NEW_DIAGONALS:
1169: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1170: case MAT_SYMMETRIC:
1171: case MAT_STRUCTURALLY_SYMMETRIC:
1172: case MAT_NOT_SYMMETRIC:
1173: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1174: case MAT_HERMITIAN:
1175: case MAT_NOT_HERMITIAN:
1176: case MAT_SYMMETRY_ETERNAL:
1177: case MAT_NOT_SYMMETRY_ETERNAL:
1178: break;
1179: default:
1180: SETERRQ(PETSC_ERR_SUP,"unknown option");
1181: }
1182: return(0);
1183: }
1187: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1188: {
1189: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1190: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1191: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1192: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1193: int *cmap,*idx_p;
1196: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1197: mat->getrowactive = PETSC_TRUE;
1199: if (!mat->rowvalues && (idx || v)) {
1200: /*
1201: allocate enough space to hold information from the longest row.
1202: */
1203: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1204: int max = 1,tmp;
1205: for (i=0; i<matin->m; i++) {
1206: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1207: if (max < tmp) { max = tmp; }
1208: }
1209: PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1210: mat->rowindices = (int*)(mat->rowvalues + max);
1211: }
1213: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1214: lrow = row - rstart;
1216: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1217: if (!v) {pvA = 0; pvB = 0;}
1218: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1219: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1220: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1221: nztot = nzA + nzB;
1223: cmap = mat->garray;
1224: if (v || idx) {
1225: if (nztot) {
1226: /* Sort by increasing column numbers, assuming A and B already sorted */
1227: int imark = -1;
1228: if (v) {
1229: *v = v_p = mat->rowvalues;
1230: for (i=0; i<nzB; i++) {
1231: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1232: else break;
1233: }
1234: imark = i;
1235: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1236: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1237: }
1238: if (idx) {
1239: *idx = idx_p = mat->rowindices;
1240: if (imark > -1) {
1241: for (i=0; i<imark; i++) {
1242: idx_p[i] = cmap[cworkB[i]];
1243: }
1244: } else {
1245: for (i=0; i<nzB; i++) {
1246: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1247: else break;
1248: }
1249: imark = i;
1250: }
1251: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1252: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1253: }
1254: } else {
1255: if (idx) *idx = 0;
1256: if (v) *v = 0;
1257: }
1258: }
1259: *nz = nztot;
1260: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1261: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1262: return(0);
1263: }
1267: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1268: {
1269: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1272: if (aij->getrowactive == PETSC_FALSE) {
1273: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1274: }
1275: aij->getrowactive = PETSC_FALSE;
1276: return(0);
1277: }
1281: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1282: {
1283: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1284: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1285: int ierr,i,j,cstart = aij->cstart;
1286: PetscReal sum = 0.0;
1287: PetscScalar *v;
1290: if (aij->size == 1) {
1291: MatNorm(aij->A,type,norm);
1292: } else {
1293: if (type == NORM_FROBENIUS) {
1294: v = amat->a;
1295: for (i=0; i<amat->nz; i++) {
1296: #if defined(PETSC_USE_COMPLEX)
1297: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298: #else
1299: sum += (*v)*(*v); v++;
1300: #endif
1301: }
1302: v = bmat->a;
1303: for (i=0; i<bmat->nz; i++) {
1304: #if defined(PETSC_USE_COMPLEX)
1305: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1306: #else
1307: sum += (*v)*(*v); v++;
1308: #endif
1309: }
1310: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1311: *norm = sqrt(*norm);
1312: } else if (type == NORM_1) { /* max column norm */
1313: PetscReal *tmp,*tmp2;
1314: int *jj,*garray = aij->garray;
1315: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1316: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1317: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1318: *norm = 0.0;
1319: v = amat->a; jj = amat->j;
1320: for (j=0; j<amat->nz; j++) {
1321: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1322: }
1323: v = bmat->a; jj = bmat->j;
1324: for (j=0; j<bmat->nz; j++) {
1325: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1326: }
1327: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1328: for (j=0; j<mat->N; j++) {
1329: if (tmp2[j] > *norm) *norm = tmp2[j];
1330: }
1331: PetscFree(tmp);
1332: PetscFree(tmp2);
1333: } else if (type == NORM_INFINITY) { /* max row norm */
1334: PetscReal ntemp = 0.0;
1335: for (j=0; j<aij->A->m; j++) {
1336: v = amat->a + amat->i[j];
1337: sum = 0.0;
1338: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1339: sum += PetscAbsScalar(*v); v++;
1340: }
1341: v = bmat->a + bmat->i[j];
1342: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1343: sum += PetscAbsScalar(*v); v++;
1344: }
1345: if (sum > ntemp) ntemp = sum;
1346: }
1347: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1348: } else {
1349: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1350: }
1351: }
1352: return(0);
1353: }
1357: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1358: {
1359: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1360: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1361: int ierr;
1362: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1363: Mat B;
1364: PetscScalar *array;
1367: if (!matout && M != N) {
1368: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1369: }
1371: MatCreate(A->comm,A->n,A->m,N,M,&B);
1372: MatSetType(B,A->type_name);
1373: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
1375: /* copy over the A part */
1376: Aloc = (Mat_SeqAIJ*)a->A->data;
1377: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1378: row = a->rstart;
1379: for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1380: for (i=0; i<m; i++) {
1381: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1382: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383: }
1384: aj = Aloc->j;
1385: for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1387: /* copy over the B part */
1388: Aloc = (Mat_SeqAIJ*)a->B->data;
1389: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1390: row = a->rstart;
1391: PetscMalloc((1+ai[m])*sizeof(int),&cols);
1392: ct = cols;
1393: for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1394: for (i=0; i<m; i++) {
1395: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1396: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1397: }
1398: PetscFree(ct);
1399: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1400: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1401: if (matout) {
1402: *matout = B;
1403: } else {
1404: MatHeaderCopy(A,B);
1405: }
1406: return(0);
1407: }
1411: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1412: {
1413: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1414: Mat a = aij->A,b = aij->B;
1415: int ierr,s1,s2,s3;
1418: MatGetLocalSize(mat,&s2,&s3);
1419: if (rr) {
1420: VecGetLocalSize(rr,&s1);
1421: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1422: /* Overlap communication with computation. */
1423: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1424: }
1425: if (ll) {
1426: VecGetLocalSize(ll,&s1);
1427: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1428: (*b->ops->diagonalscale)(b,ll,0);
1429: }
1430: /* scale the diagonal block */
1431: (*a->ops->diagonalscale)(a,ll,rr);
1433: if (rr) {
1434: /* Do a scatter end and then right scale the off-diagonal block */
1435: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1436: (*b->ops->diagonalscale)(b,0,aij->lvec);
1437: }
1438:
1439: return(0);
1440: }
1445: int MatPrintHelp_MPIAIJ(Mat A)
1446: {
1447: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1448: int ierr;
1451: if (!a->rank) {
1452: MatPrintHelp_SeqAIJ(a->A);
1453: }
1454: return(0);
1455: }
1459: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1460: {
1462: *bs = 1;
1463: return(0);
1464: }
1467: int MatSetUnfactored_MPIAIJ(Mat A)
1468: {
1469: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1470: int ierr;
1473: MatSetUnfactored(a->A);
1474: return(0);
1475: }
1479: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1480: {
1481: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1482: Mat a,b,c,d;
1483: PetscTruth flg;
1484: int ierr;
1487: a = matA->A; b = matA->B;
1488: c = matB->A; d = matB->B;
1490: MatEqual(a,c,&flg);
1491: if (flg == PETSC_TRUE) {
1492: MatEqual(b,d,&flg);
1493: }
1494: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1495: return(0);
1496: }
1500: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1501: {
1502: int ierr;
1503: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1504: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1507: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1508: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1509: /* because of the column compression in the off-processor part of the matrix a->B,
1510: the number of columns in a->B and b->B may be different, hence we cannot call
1511: the MatCopy() directly on the two parts. If need be, we can provide a more
1512: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1513: then copying the submatrices */
1514: MatCopy_Basic(A,B,str);
1515: } else {
1516: MatCopy(a->A,b->A,str);
1517: MatCopy(a->B,b->B,str);
1518: }
1519: return(0);
1520: }
1524: int MatSetUpPreallocation_MPIAIJ(Mat A)
1525: {
1526: int ierr;
1529: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1530: return(0);
1531: }
1533: #include petscblaslapack.h
1536: int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1537: {
1538: int ierr,one=1,i;
1539: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1540: Mat_SeqAIJ *x,*y;
1543: if (str == SAME_NONZERO_PATTERN) {
1544: x = (Mat_SeqAIJ *)xx->A->data;
1545: y = (Mat_SeqAIJ *)yy->A->data;
1546: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1547: x = (Mat_SeqAIJ *)xx->B->data;
1548: y = (Mat_SeqAIJ *)yy->B->data;
1549: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1550: } else if (str == SUBSET_NONZERO_PATTERN) {
1551: MatAXPY_SeqAIJ(a,xx->A,yy->A,str);
1553: x = (Mat_SeqAIJ *)xx->B->data;
1554: y = (Mat_SeqAIJ *)yy->B->data;
1555: if (y->xtoy && y->XtoY != xx->B) {
1556: PetscFree(y->xtoy);
1557: MatDestroy(y->XtoY);
1558: }
1559: if (!y->xtoy) { /* get xtoy */
1560: MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1561: y->XtoY = xx->B;
1562: }
1563: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1564: } else {
1565: MatAXPY_Basic(a,X,Y,str);
1566: }
1567: return(0);
1568: }
1570: /* -------------------------------------------------------------------*/
1571: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1572: MatGetRow_MPIAIJ,
1573: MatRestoreRow_MPIAIJ,
1574: MatMult_MPIAIJ,
1575: /* 4*/ MatMultAdd_MPIAIJ,
1576: MatMultTranspose_MPIAIJ,
1577: MatMultTransposeAdd_MPIAIJ,
1578: 0,
1579: 0,
1580: 0,
1581: /*10*/ 0,
1582: 0,
1583: 0,
1584: MatRelax_MPIAIJ,
1585: MatTranspose_MPIAIJ,
1586: /*15*/ MatGetInfo_MPIAIJ,
1587: MatEqual_MPIAIJ,
1588: MatGetDiagonal_MPIAIJ,
1589: MatDiagonalScale_MPIAIJ,
1590: MatNorm_MPIAIJ,
1591: /*20*/ MatAssemblyBegin_MPIAIJ,
1592: MatAssemblyEnd_MPIAIJ,
1593: 0,
1594: MatSetOption_MPIAIJ,
1595: MatZeroEntries_MPIAIJ,
1596: /*25*/ MatZeroRows_MPIAIJ,
1597: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1598: MatLUFactorSymbolic_MPIAIJ_TFS,
1599: #else
1600: 0,
1601: #endif
1602: 0,
1603: 0,
1604: 0,
1605: /*30*/ MatSetUpPreallocation_MPIAIJ,
1606: 0,
1607: 0,
1608: 0,
1609: 0,
1610: /*35*/ MatDuplicate_MPIAIJ,
1611: 0,
1612: 0,
1613: 0,
1614: 0,
1615: /*40*/ MatAXPY_MPIAIJ,
1616: MatGetSubMatrices_MPIAIJ,
1617: MatIncreaseOverlap_MPIAIJ,
1618: MatGetValues_MPIAIJ,
1619: MatCopy_MPIAIJ,
1620: /*45*/ MatPrintHelp_MPIAIJ,
1621: MatScale_MPIAIJ,
1622: 0,
1623: 0,
1624: 0,
1625: /*50*/ MatGetBlockSize_MPIAIJ,
1626: 0,
1627: 0,
1628: 0,
1629: 0,
1630: /*55*/ MatFDColoringCreate_MPIAIJ,
1631: 0,
1632: MatSetUnfactored_MPIAIJ,
1633: 0,
1634: 0,
1635: /*60*/ MatGetSubMatrix_MPIAIJ,
1636: MatDestroy_MPIAIJ,
1637: MatView_MPIAIJ,
1638: MatGetPetscMaps_Petsc,
1639: 0,
1640: /*65*/ 0,
1641: 0,
1642: 0,
1643: 0,
1644: 0,
1645: /*70*/ 0,
1646: 0,
1647: MatSetColoring_MPIAIJ,
1648: MatSetValuesAdic_MPIAIJ,
1649: MatSetValuesAdifor_MPIAIJ,
1650: /*75*/ 0,
1651: 0,
1652: 0,
1653: 0,
1654: 0,
1655: /*80*/ 0,
1656: 0,
1657: 0,
1658: 0,
1659: /*85*/ MatLoad_MPIAIJ};
1661: /* ----------------------------------------------------------------------------------------*/
1663: EXTERN_C_BEGIN
1666: int MatStoreValues_MPIAIJ(Mat mat)
1667: {
1668: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1669: int ierr;
1672: MatStoreValues(aij->A);
1673: MatStoreValues(aij->B);
1674: return(0);
1675: }
1676: EXTERN_C_END
1678: EXTERN_C_BEGIN
1681: int MatRetrieveValues_MPIAIJ(Mat mat)
1682: {
1683: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1684: int ierr;
1687: MatRetrieveValues(aij->A);
1688: MatRetrieveValues(aij->B);
1689: return(0);
1690: }
1691: EXTERN_C_END
1693: #include petscpc.h
1694: EXTERN_C_BEGIN
1697: int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1698: {
1699: Mat_MPIAIJ *b;
1700: int ierr,i;
1703: B->preallocated = PETSC_TRUE;
1704: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1705: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1706: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1707: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1708: if (d_nnz) {
1709: for (i=0; i<B->m; i++) {
1710: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1711: }
1712: }
1713: if (o_nnz) {
1714: for (i=0; i<B->m; i++) {
1715: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1716: }
1717: }
1718: b = (Mat_MPIAIJ*)B->data;
1719: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
1720: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
1722: return(0);
1723: }
1724: EXTERN_C_END
1726: /*MC
1727: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1729: Options Database Keys:
1730: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1732: Level: beginner
1734: .seealso: MatCreateMPIAIJ
1735: M*/
1737: EXTERN_C_BEGIN
1740: int MatCreate_MPIAIJ(Mat B)
1741: {
1742: Mat_MPIAIJ *b;
1743: int ierr,i,size;
1746: MPI_Comm_size(B->comm,&size);
1748: PetscNew(Mat_MPIAIJ,&b);
1749: B->data = (void*)b;
1750: PetscMemzero(b,sizeof(Mat_MPIAIJ));
1751: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1752: B->factor = 0;
1753: B->assembled = PETSC_FALSE;
1754: B->mapping = 0;
1756: B->insertmode = NOT_SET_VALUES;
1757: b->size = size;
1758: MPI_Comm_rank(B->comm,&b->rank);
1760: PetscSplitOwnership(B->comm,&B->m,&B->M);
1761: PetscSplitOwnership(B->comm,&B->n,&B->N);
1763: /* the information in the maps duplicates the information computed below, eventually
1764: we should remove the duplicate information that is not contained in the maps */
1765: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1766: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1768: /* build local table of row and column ownerships */
1769: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1770: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771: b->cowners = b->rowners + b->size + 2;
1772: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1773: b->rowners[0] = 0;
1774: for (i=2; i<=b->size; i++) {
1775: b->rowners[i] += b->rowners[i-1];
1776: }
1777: b->rstart = b->rowners[b->rank];
1778: b->rend = b->rowners[b->rank+1];
1779: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1780: b->cowners[0] = 0;
1781: for (i=2; i<=b->size; i++) {
1782: b->cowners[i] += b->cowners[i-1];
1783: }
1784: b->cstart = b->cowners[b->rank];
1785: b->cend = b->cowners[b->rank+1];
1787: /* build cache for off array entries formed */
1788: MatStashCreate_Private(B->comm,1,&B->stash);
1789: b->donotstash = PETSC_FALSE;
1790: b->colmap = 0;
1791: b->garray = 0;
1792: b->roworiented = PETSC_TRUE;
1794: /* stuff used for matrix vector multiply */
1795: b->lvec = PETSC_NULL;
1796: b->Mvctx = PETSC_NULL;
1798: /* stuff for MatGetRow() */
1799: b->rowindices = 0;
1800: b->rowvalues = 0;
1801: b->getrowactive = PETSC_FALSE;
1803: /* Explicitly create 2 MATSEQAIJ matrices. */
1804: MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
1805: MatSetType(b->A,MATSEQAIJ);
1806: PetscLogObjectParent(B,b->A);
1807: MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);
1808: MatSetType(b->B,MATSEQAIJ);
1809: PetscLogObjectParent(B,b->B);
1811: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1812: "MatStoreValues_MPIAIJ",
1813: MatStoreValues_MPIAIJ);
1814: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1815: "MatRetrieveValues_MPIAIJ",
1816: MatRetrieveValues_MPIAIJ);
1817: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1818: "MatGetDiagonalBlock_MPIAIJ",
1819: MatGetDiagonalBlock_MPIAIJ);
1820: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1821: "MatIsTranspose_MPIAIJ",
1822: MatIsTranspose_MPIAIJ);
1823: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1824: "MatMPIAIJSetPreallocation_MPIAIJ",
1825: MatMPIAIJSetPreallocation_MPIAIJ);
1826: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1827: "MatDiagonalScaleLocal_MPIAIJ",
1828: MatDiagonalScaleLocal_MPIAIJ);
1829: return(0);
1830: }
1831: EXTERN_C_END
1833: /*MC
1834: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1836: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1837: and MATMPIAIJ otherwise.
1839: Options Database Keys:
1840: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1842: Level: beginner
1844: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1845: M*/
1847: EXTERN_C_BEGIN
1850: int MatCreate_AIJ(Mat A) {
1851: int ierr,size;
1854: PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1855: MPI_Comm_size(A->comm,&size);
1856: if (size == 1) {
1857: MatSetType(A,MATSEQAIJ);
1858: } else {
1859: MatSetType(A,MATMPIAIJ);
1860: }
1861: return(0);
1862: }
1863: EXTERN_C_END
1867: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1868: {
1869: Mat mat;
1870: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1871: int ierr;
1874: *newmat = 0;
1875: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1876: MatSetType(mat,matin->type_name);
1877: a = (Mat_MPIAIJ*)mat->data;
1878: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1879: mat->factor = matin->factor;
1880: mat->assembled = PETSC_TRUE;
1881: mat->insertmode = NOT_SET_VALUES;
1882: mat->preallocated = PETSC_TRUE;
1884: a->rstart = oldmat->rstart;
1885: a->rend = oldmat->rend;
1886: a->cstart = oldmat->cstart;
1887: a->cend = oldmat->cend;
1888: a->size = oldmat->size;
1889: a->rank = oldmat->rank;
1890: a->donotstash = oldmat->donotstash;
1891: a->roworiented = oldmat->roworiented;
1892: a->rowindices = 0;
1893: a->rowvalues = 0;
1894: a->getrowactive = PETSC_FALSE;
1896: PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1897: MatStashCreate_Private(matin->comm,1,&mat->stash);
1898: if (oldmat->colmap) {
1899: #if defined (PETSC_USE_CTABLE)
1900: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1901: #else
1902: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1903: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1904: PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1905: #endif
1906: } else a->colmap = 0;
1907: if (oldmat->garray) {
1908: int len;
1909: len = oldmat->B->n;
1910: PetscMalloc((len+1)*sizeof(int),&a->garray);
1911: PetscLogObjectMemory(mat,len*sizeof(int));
1912: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1913: } else a->garray = 0;
1914:
1915: VecDuplicate(oldmat->lvec,&a->lvec);
1916: PetscLogObjectParent(mat,a->lvec);
1917: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1918: PetscLogObjectParent(mat,a->Mvctx);
1919: MatDestroy(a->A);
1920: MatDuplicate(oldmat->A,cpvalues,&a->A);
1921: PetscLogObjectParent(mat,a->A);
1922: MatDestroy(a->B);
1923: MatDuplicate(oldmat->B,cpvalues,&a->B);
1924: PetscLogObjectParent(mat,a->B);
1925: PetscFListDuplicate(matin->qlist,&mat->qlist);
1926: *newmat = mat;
1927: return(0);
1928: }
1930: #include petscsys.h
1934: int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1935: {
1936: Mat A;
1937: PetscScalar *vals,*svals;
1938: MPI_Comm comm = ((PetscObject)viewer)->comm;
1939: MPI_Status status;
1940: int i,nz,ierr,j,rstart,rend,fd;
1941: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1942: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1943: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1946: MPI_Comm_size(comm,&size);
1947: MPI_Comm_rank(comm,&rank);
1948: if (!rank) {
1949: PetscViewerBinaryGetDescriptor(viewer,&fd);
1950: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1951: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1952: if (header[3] < 0) {
1953: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1954: }
1955: }
1957: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1958: M = header[1]; N = header[2];
1959: /* determine ownership of all rows */
1960: m = M/size + ((M % size) > rank);
1961: PetscMalloc((size+2)*sizeof(int),&rowners);
1962: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1963: rowners[0] = 0;
1964: for (i=2; i<=size; i++) {
1965: rowners[i] += rowners[i-1];
1966: }
1967: rstart = rowners[rank];
1968: rend = rowners[rank+1];
1970: /* distribute row lengths to all processors */
1971: PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1972: offlens = ourlens + (rend-rstart);
1973: if (!rank) {
1974: PetscMalloc(M*sizeof(int),&rowlengths);
1975: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1976: PetscMalloc(size*sizeof(int),&sndcounts);
1977: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1978: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1979: PetscFree(sndcounts);
1980: } else {
1981: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1982: }
1984: if (!rank) {
1985: /* calculate the number of nonzeros on each processor */
1986: PetscMalloc(size*sizeof(int),&procsnz);
1987: PetscMemzero(procsnz,size*sizeof(int));
1988: for (i=0; i<size; i++) {
1989: for (j=rowners[i]; j< rowners[i+1]; j++) {
1990: procsnz[i] += rowlengths[j];
1991: }
1992: }
1993: PetscFree(rowlengths);
1995: /* determine max buffer needed and allocate it */
1996: maxnz = 0;
1997: for (i=0; i<size; i++) {
1998: maxnz = PetscMax(maxnz,procsnz[i]);
1999: }
2000: PetscMalloc(maxnz*sizeof(int),&cols);
2002: /* read in my part of the matrix column indices */
2003: nz = procsnz[0];
2004: PetscMalloc(nz*sizeof(int),&mycols);
2005: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2007: /* read in every one elses and ship off */
2008: for (i=1; i<size; i++) {
2009: nz = procsnz[i];
2010: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2011: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
2012: }
2013: PetscFree(cols);
2014: } else {
2015: /* determine buffer space needed for message */
2016: nz = 0;
2017: for (i=0; i<m; i++) {
2018: nz += ourlens[i];
2019: }
2020: PetscMalloc((nz+1)*sizeof(int),&mycols);
2022: /* receive message of column indices*/
2023: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2024: MPI_Get_count(&status,MPI_INT,&maxnz);
2025: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2026: }
2028: /* determine column ownership if matrix is not square */
2029: if (N != M) {
2030: n = N/size + ((N % size) > rank);
2031: MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
2032: cstart = cend - n;
2033: } else {
2034: cstart = rstart;
2035: cend = rend;
2036: n = cend - cstart;
2037: }
2039: /* loop over local rows, determining number of off diagonal entries */
2040: PetscMemzero(offlens,m*sizeof(int));
2041: jj = 0;
2042: for (i=0; i<m; i++) {
2043: for (j=0; j<ourlens[i]; j++) {
2044: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2045: jj++;
2046: }
2047: }
2049: /* create our matrix */
2050: for (i=0; i<m; i++) {
2051: ourlens[i] -= offlens[i];
2052: }
2053: MatCreate(comm,m,n,M,N,&A);
2054: MatSetType(A,type);
2055: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2057: MatSetOption(A,MAT_COLUMNS_SORTED);
2058: for (i=0; i<m; i++) {
2059: ourlens[i] += offlens[i];
2060: }
2062: if (!rank) {
2063: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2065: /* read in my part of the matrix numerical values */
2066: nz = procsnz[0];
2067: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2068:
2069: /* insert into matrix */
2070: jj = rstart;
2071: smycols = mycols;
2072: svals = vals;
2073: for (i=0; i<m; i++) {
2074: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2075: smycols += ourlens[i];
2076: svals += ourlens[i];
2077: jj++;
2078: }
2080: /* read in other processors and ship out */
2081: for (i=1; i<size; i++) {
2082: nz = procsnz[i];
2083: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2084: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2085: }
2086: PetscFree(procsnz);
2087: } else {
2088: /* receive numeric values */
2089: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2091: /* receive message of values*/
2092: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2093: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2094: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2096: /* insert into matrix */
2097: jj = rstart;
2098: smycols = mycols;
2099: svals = vals;
2100: for (i=0; i<m; i++) {
2101: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2102: smycols += ourlens[i];
2103: svals += ourlens[i];
2104: jj++;
2105: }
2106: }
2107: PetscFree(ourlens);
2108: PetscFree(vals);
2109: PetscFree(mycols);
2110: PetscFree(rowners);
2112: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2113: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2114: *newmat = A;
2115: return(0);
2116: }
2120: /*
2121: Not great since it makes two copies of the submatrix, first an SeqAIJ
2122: in local and then by concatenating the local matrices the end result.
2123: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2124: */
2125: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2126: {
2127: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2128: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2129: Mat *local,M,Mreuse;
2130: PetscScalar *vwork,*aa;
2131: MPI_Comm comm = mat->comm;
2132: Mat_SeqAIJ *aij;
2136: MPI_Comm_rank(comm,&rank);
2137: MPI_Comm_size(comm,&size);
2139: if (call == MAT_REUSE_MATRIX) {
2140: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2141: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2142: local = &Mreuse;
2143: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2144: } else {
2145: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2146: Mreuse = *local;
2147: PetscFree(local);
2148: }
2150: /*
2151: m - number of local rows
2152: n - number of columns (same on all processors)
2153: rstart - first row in new global matrix generated
2154: */
2155: MatGetSize(Mreuse,&m,&n);
2156: if (call == MAT_INITIAL_MATRIX) {
2157: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2158: ii = aij->i;
2159: jj = aij->j;
2161: /*
2162: Determine the number of non-zeros in the diagonal and off-diagonal
2163: portions of the matrix in order to do correct preallocation
2164: */
2166: /* first get start and end of "diagonal" columns */
2167: if (csize == PETSC_DECIDE) {
2168: ISGetSize(isrow,&mglobal);
2169: if (mglobal == n) { /* square matrix */
2170: nlocal = m;
2171: } else {
2172: nlocal = n/size + ((n % size) > rank);
2173: }
2174: } else {
2175: nlocal = csize;
2176: }
2177: MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2178: rstart = rend - nlocal;
2179: if (rank == size - 1 && rend != n) {
2180: SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2181: }
2183: /* next, compute all the lengths */
2184: PetscMalloc((2*m+1)*sizeof(int),&dlens);
2185: olens = dlens + m;
2186: for (i=0; i<m; i++) {
2187: jend = ii[i+1] - ii[i];
2188: olen = 0;
2189: dlen = 0;
2190: for (j=0; j<jend; j++) {
2191: if (*jj < rstart || *jj >= rend) olen++;
2192: else dlen++;
2193: jj++;
2194: }
2195: olens[i] = olen;
2196: dlens[i] = dlen;
2197: }
2198: MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);
2199: MatSetType(M,mat->type_name);
2200: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2201: PetscFree(dlens);
2202: } else {
2203: int ml,nl;
2205: M = *newmat;
2206: MatGetLocalSize(M,&ml,&nl);
2207: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2208: MatZeroEntries(M);
2209: /*
2210: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2211: rather than the slower MatSetValues().
2212: */
2213: M->was_assembled = PETSC_TRUE;
2214: M->assembled = PETSC_FALSE;
2215: }
2216: MatGetOwnershipRange(M,&rstart,&rend);
2217: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2218: ii = aij->i;
2219: jj = aij->j;
2220: aa = aij->a;
2221: for (i=0; i<m; i++) {
2222: row = rstart + i;
2223: nz = ii[i+1] - ii[i];
2224: cwork = jj; jj += nz;
2225: vwork = aa; aa += nz;
2226: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2227: }
2229: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2230: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2231: *newmat = M;
2233: /* save submatrix used in processor for next request */
2234: if (call == MAT_INITIAL_MATRIX) {
2235: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2236: PetscObjectDereference((PetscObject)Mreuse);
2237: }
2239: return(0);
2240: }
2244: /*@C
2245: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2246: (the default parallel PETSc format). For good matrix assembly performance
2247: the user should preallocate the matrix storage by setting the parameters
2248: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2249: performance can be increased by more than a factor of 50.
2251: Collective on MPI_Comm
2253: Input Parameters:
2254: + A - the matrix
2255: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2256: (same value is used for all local rows)
2257: . d_nnz - array containing the number of nonzeros in the various rows of the
2258: DIAGONAL portion of the local submatrix (possibly different for each row)
2259: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2260: The size of this array is equal to the number of local rows, i.e 'm'.
2261: You must leave room for the diagonal entry even if it is zero.
2262: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2263: submatrix (same value is used for all local rows).
2264: - o_nnz - array containing the number of nonzeros in the various rows of the
2265: OFF-DIAGONAL portion of the local submatrix (possibly different for
2266: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2267: structure. The size of this array is equal to the number
2268: of local rows, i.e 'm'.
2270: The AIJ format (also called the Yale sparse matrix format or
2271: compressed row storage), is fully compatible with standard Fortran 77
2272: storage. That is, the stored row and column indices can begin at
2273: either one (as in Fortran) or zero. See the users manual for details.
2275: The user MUST specify either the local or global matrix dimensions
2276: (possibly both).
2278: The parallel matrix is partitioned such that the first m0 rows belong to
2279: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2280: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2282: The DIAGONAL portion of the local submatrix of a processor can be defined
2283: as the submatrix which is obtained by extraction the part corresponding
2284: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2285: first row that belongs to the processor, and r2 is the last row belonging
2286: to the this processor. This is a square mxm matrix. The remaining portion
2287: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2289: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2291: By default, this format uses inodes (identical nodes) when possible.
2292: We search for consecutive rows with the same nonzero structure, thereby
2293: reusing matrix information to achieve increased efficiency.
2295: Options Database Keys:
2296: + -mat_aij_no_inode - Do not use inodes
2297: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2298: - -mat_aij_oneindex - Internally use indexing starting at 1
2299: rather than 0. Note that when calling MatSetValues(),
2300: the user still MUST index entries starting at 0!
2302: Example usage:
2303:
2304: Consider the following 8x8 matrix with 34 non-zero values, that is
2305: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2306: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2307: as follows:
2309: .vb
2310: 1 2 0 | 0 3 0 | 0 4
2311: Proc0 0 5 6 | 7 0 0 | 8 0
2312: 9 0 10 | 11 0 0 | 12 0
2313: -------------------------------------
2314: 13 0 14 | 15 16 17 | 0 0
2315: Proc1 0 18 0 | 19 20 21 | 0 0
2316: 0 0 0 | 22 23 0 | 24 0
2317: -------------------------------------
2318: Proc2 25 26 27 | 0 0 28 | 29 0
2319: 30 0 0 | 31 32 33 | 0 34
2320: .ve
2322: This can be represented as a collection of submatrices as:
2324: .vb
2325: A B C
2326: D E F
2327: G H I
2328: .ve
2330: Where the submatrices A,B,C are owned by proc0, D,E,F are
2331: owned by proc1, G,H,I are owned by proc2.
2333: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2334: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2335: The 'M','N' parameters are 8,8, and have the same values on all procs.
2337: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2338: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2339: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2340: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2341: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2342: matrix, ans [DF] as another SeqAIJ matrix.
2344: When d_nz, o_nz parameters are specified, d_nz storage elements are
2345: allocated for every row of the local diagonal submatrix, and o_nz
2346: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2347: One way to choose d_nz and o_nz is to use the max nonzerors per local
2348: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2349: In this case, the values of d_nz,o_nz are:
2350: .vb
2351: proc0 : dnz = 2, o_nz = 2
2352: proc1 : dnz = 3, o_nz = 2
2353: proc2 : dnz = 1, o_nz = 4
2354: .ve
2355: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2356: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2357: for proc3. i.e we are using 12+15+10=37 storage locations to store
2358: 34 values.
2360: When d_nnz, o_nnz parameters are specified, the storage is specified
2361: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2362: In the above case the values for d_nnz,o_nnz are:
2363: .vb
2364: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2365: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2366: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2367: .ve
2368: Here the space allocated is sum of all the above values i.e 34, and
2369: hence pre-allocation is perfect.
2371: Level: intermediate
2373: .keywords: matrix, aij, compressed row, sparse, parallel
2375: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2376: @*/
2377: int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2378: {
2379: int ierr,(*f)(Mat,int,const int[],int,const int[]);
2382: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2383: if (f) {
2384: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2385: }
2386: return(0);
2387: }
2391: /*@C
2392: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2393: (the default parallel PETSc format). For good matrix assembly performance
2394: the user should preallocate the matrix storage by setting the parameters
2395: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2396: performance can be increased by more than a factor of 50.
2398: Collective on MPI_Comm
2400: Input Parameters:
2401: + comm - MPI communicator
2402: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2403: This value should be the same as the local size used in creating the
2404: y vector for the matrix-vector product y = Ax.
2405: . n - This value should be the same as the local size used in creating the
2406: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2407: calculated if N is given) For square matrices n is almost always m.
2408: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2409: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2410: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2411: (same value is used for all local rows)
2412: . d_nnz - array containing the number of nonzeros in the various rows of the
2413: DIAGONAL portion of the local submatrix (possibly different for each row)
2414: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2415: The size of this array is equal to the number of local rows, i.e 'm'.
2416: You must leave room for the diagonal entry even if it is zero.
2417: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2418: submatrix (same value is used for all local rows).
2419: - o_nnz - array containing the number of nonzeros in the various rows of the
2420: OFF-DIAGONAL portion of the local submatrix (possibly different for
2421: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2422: structure. The size of this array is equal to the number
2423: of local rows, i.e 'm'.
2425: Output Parameter:
2426: . A - the matrix
2428: Notes:
2429: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2430: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2431: storage requirements for this matrix.
2433: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2434: processor than it must be used on all processors that share the object for
2435: that argument.
2437: The AIJ format (also called the Yale sparse matrix format or
2438: compressed row storage), is fully compatible with standard Fortran 77
2439: storage. That is, the stored row and column indices can begin at
2440: either one (as in Fortran) or zero. See the users manual for details.
2442: The user MUST specify either the local or global matrix dimensions
2443: (possibly both).
2445: The parallel matrix is partitioned such that the first m0 rows belong to
2446: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2447: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2449: The DIAGONAL portion of the local submatrix of a processor can be defined
2450: as the submatrix which is obtained by extraction the part corresponding
2451: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2452: first row that belongs to the processor, and r2 is the last row belonging
2453: to the this processor. This is a square mxm matrix. The remaining portion
2454: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2456: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2458: When calling this routine with a single process communicator, a matrix of
2459: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
2460: type of communicator, use the construction mechanism:
2461: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2463: By default, this format uses inodes (identical nodes) when possible.
2464: We search for consecutive rows with the same nonzero structure, thereby
2465: reusing matrix information to achieve increased efficiency.
2467: Options Database Keys:
2468: + -mat_aij_no_inode - Do not use inodes
2469: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2470: - -mat_aij_oneindex - Internally use indexing starting at 1
2471: rather than 0. Note that when calling MatSetValues(),
2472: the user still MUST index entries starting at 0!
2475: Example usage:
2476:
2477: Consider the following 8x8 matrix with 34 non-zero values, that is
2478: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2479: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2480: as follows:
2482: .vb
2483: 1 2 0 | 0 3 0 | 0 4
2484: Proc0 0 5 6 | 7 0 0 | 8 0
2485: 9 0 10 | 11 0 0 | 12 0
2486: -------------------------------------
2487: 13 0 14 | 15 16 17 | 0 0
2488: Proc1 0 18 0 | 19 20 21 | 0 0
2489: 0 0 0 | 22 23 0 | 24 0
2490: -------------------------------------
2491: Proc2 25 26 27 | 0 0 28 | 29 0
2492: 30 0 0 | 31 32 33 | 0 34
2493: .ve
2495: This can be represented as a collection of submatrices as:
2497: .vb
2498: A B C
2499: D E F
2500: G H I
2501: .ve
2503: Where the submatrices A,B,C are owned by proc0, D,E,F are
2504: owned by proc1, G,H,I are owned by proc2.
2506: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2508: The 'M','N' parameters are 8,8, and have the same values on all procs.
2510: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2511: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2512: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2513: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2514: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2515: matrix, ans [DF] as another SeqAIJ matrix.
2517: When d_nz, o_nz parameters are specified, d_nz storage elements are
2518: allocated for every row of the local diagonal submatrix, and o_nz
2519: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2520: One way to choose d_nz and o_nz is to use the max nonzerors per local
2521: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2522: In this case, the values of d_nz,o_nz are:
2523: .vb
2524: proc0 : dnz = 2, o_nz = 2
2525: proc1 : dnz = 3, o_nz = 2
2526: proc2 : dnz = 1, o_nz = 4
2527: .ve
2528: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2529: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2530: for proc3. i.e we are using 12+15+10=37 storage locations to store
2531: 34 values.
2533: When d_nnz, o_nnz parameters are specified, the storage is specified
2534: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2535: In the above case the values for d_nnz,o_nnz are:
2536: .vb
2537: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2538: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2539: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2540: .ve
2541: Here the space allocated is sum of all the above values i.e 34, and
2542: hence pre-allocation is perfect.
2544: Level: intermediate
2546: .keywords: matrix, aij, compressed row, sparse, parallel
2548: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2549: @*/
2550: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2551: {
2552: int ierr,size;
2555: MatCreate(comm,m,n,M,N,A);
2556: MPI_Comm_size(comm,&size);
2557: if (size > 1) {
2558: MatSetType(*A,MATMPIAIJ);
2559: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2560: } else {
2561: MatSetType(*A,MATSEQAIJ);
2562: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2563: }
2564: return(0);
2565: }
2569: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2570: {
2571: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2573: *Ad = a->A;
2574: *Ao = a->B;
2575: *colmap = a->garray;
2576: return(0);
2577: }
2581: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2582: {
2583: int ierr,i;
2584: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2587: if (coloring->ctype == IS_COLORING_LOCAL) {
2588: ISColoringValue *allcolors,*colors;
2589: ISColoring ocoloring;
2591: /* set coloring for diagonal portion */
2592: MatSetColoring_SeqAIJ(a->A,coloring);
2594: /* set coloring for off-diagonal portion */
2595: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2596: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2597: for (i=0; i<a->B->n; i++) {
2598: colors[i] = allcolors[a->garray[i]];
2599: }
2600: PetscFree(allcolors);
2601: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2602: MatSetColoring_SeqAIJ(a->B,ocoloring);
2603: ISColoringDestroy(ocoloring);
2604: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2605: ISColoringValue *colors;
2606: int *larray;
2607: ISColoring ocoloring;
2609: /* set coloring for diagonal portion */
2610: PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2611: for (i=0; i<a->A->n; i++) {
2612: larray[i] = i + a->cstart;
2613: }
2614: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2615: PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2616: for (i=0; i<a->A->n; i++) {
2617: colors[i] = coloring->colors[larray[i]];
2618: }
2619: PetscFree(larray);
2620: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2621: MatSetColoring_SeqAIJ(a->A,ocoloring);
2622: ISColoringDestroy(ocoloring);
2624: /* set coloring for off-diagonal portion */
2625: PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2626: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2627: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2628: for (i=0; i<a->B->n; i++) {
2629: colors[i] = coloring->colors[larray[i]];
2630: }
2631: PetscFree(larray);
2632: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2633: MatSetColoring_SeqAIJ(a->B,ocoloring);
2634: ISColoringDestroy(ocoloring);
2635: } else {
2636: SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2637: }
2639: return(0);
2640: }
2644: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2645: {
2646: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2647: int ierr;
2650: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2651: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2652: return(0);
2653: }
2657: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2658: {
2659: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2660: int ierr;
2663: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2664: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2665: return(0);
2666: }
2670: /*@C
2671: MatMerge - Creates a single large PETSc matrix by concatinating sequential
2672: matrices from each processor
2674: Collective on MPI_Comm
2676: Input Parameters:
2677: + comm - the communicators the parallel matrix will live on
2678: - inmat - the input sequential matrices
2680: Output Parameter:
2681: . outmat - the parallel matrix generated
2683: Level: advanced
2685: Notes: The number of columns of the matrix in EACH of the seperate files
2686: MUST be the same.
2688: @*/
2689: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2690: {
2691: int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2692: PetscScalar *values;
2693: PetscMap columnmap,rowmap;
2696:
2697: MatGetSize(inmat,&m,&n);
2699: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2700: PetscMapCreate(comm,&columnmap);
2701: PetscMapSetSize(columnmap,n);
2702: PetscMapSetType(columnmap,MAP_MPI);
2703: PetscMapGetLocalSize(columnmap,&n);
2704: PetscMapDestroy(columnmap);
2706: PetscMapCreate(comm,&rowmap);
2707: PetscMapSetLocalSize(rowmap,m);
2708: PetscMapSetType(rowmap,MAP_MPI);
2709: PetscMapGetLocalRange(rowmap,&rstart,0);
2710: PetscMapDestroy(rowmap);
2712: MatPreallocateInitialize(comm,m,n,dnz,onz);
2713: for (i=0;i<m;i++) {
2714: MatGetRow(inmat,i,&nnz,&indx,&values);
2715: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2716: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2717: }
2718: /* This routine will ONLY return MPIAIJ type matrix */
2719: MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);
2720: MatSetType(*outmat,MATMPIAIJ);
2721: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
2722: MatPreallocateFinalize(dnz,onz);
2724: for (i=0;i<m;i++) {
2725: MatGetRow(inmat,i,&nnz,&indx,&values);
2726: I = i + rstart;
2727: MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2728: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2729: }
2730: MatDestroy(inmat);
2731: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2732: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2734: return(0);
2735: }
2739: int MatFileSplit(Mat A,char *outfile)
2740: {
2741: int ierr,rank,len,m,N,i,rstart,*indx,nnz;
2742: PetscViewer out;
2743: char *name;
2744: Mat B;
2745: PetscScalar *values;
2748:
2749: MatGetLocalSize(A,&m,0);
2750: MatGetSize(A,0,&N);
2751: /* Should this be the type of the diagonal block of A? */
2752: MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);
2753: MatSetType(B,MATSEQAIJ);
2754: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
2755: MatGetOwnershipRange(A,&rstart,0);
2756: for (i=0;i<m;i++) {
2757: MatGetRow(A,i+rstart,&nnz,&indx,&values);
2758: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2759: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2760: }
2761: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2762: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2764: MPI_Comm_rank(A->comm,&rank);
2765: PetscStrlen(outfile,&len);
2766: PetscMalloc((len+5)*sizeof(char),&name);
2767: sprintf(name,"%s.%d",outfile,rank);
2768: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);
2769: PetscFree(name);
2770: MatView(B,out);
2771: PetscViewerDestroy(out);
2772: MatDestroy(B);
2773: return(0);
2774: }