/[escript]/trunk-mpi-branch/paso/src/SystemMatrix_nullifyRowsAndCols.c
ViewVC logotype

Annotation of /trunk-mpi-branch/paso/src/SystemMatrix_nullifyRowsAndCols.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1096 - (hide annotations)
Mon Apr 16 22:59:33 2007 UTC (14 years, 2 months ago) by ksteube
File MIME type: text/plain
File size: 14302 byte(s)
MPI implicit solver example run_simplesolve.py now compiling and
running successfully on one CPU of ess.

Adjusted SConscript, removed some debug print statements and removed
some partially implemented TRILINOS calls.


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631 /*
4     ********************************************************************************
5 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
6 dhawcroft 631 * *
7     * http://www.access.edu.au *
8     * Primary Business: Queensland, Australia *
9     * Licensed under the Open Software License version 3.0 *
10     * http://www.opensource.org/licenses/osl-3.0.php *
11     ********************************************************************************
12     */
13    
14 jgs 150 /**************************************************************/
15    
16     /* Paso: SystemMatrix */
17    
18     /* nullify rows and columns in the matrix */
19    
20     /* the rows and columns are marked by positive values in */
21     /* mask_row and mask_col. Values on the main diagonal */
22     /* which are marked to set to zero by both mask_row and */
23     /* mask_col are set to main_diagonal_value */
24    
25    
26     /**************************************************************/
27    
28     /* Copyrights by ACcESS Australia 2003 */
29     /* Author: gross@access.edu.au */
30    
31     /**************************************************************/
32    
33     #include "Paso.h"
34     #include "SystemMatrix.h"
35    
36     void Paso_SystemMatrix_nullifyRowsAndCols(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
37    
38 gross 1087 dim_t N=A->maxNumCols * A->col_block_size;
39     Paso_MPIInfo *mpi_info=A->mpi_info;
40     double* buffer0, *buffer1;
41     if (mpi_info->size>1) {
42     if (A ->col_block_size==1 && A ->row_block_size ==1) {
43     if (A->type & MATRIX_FORMAT_CSC) {
44     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: CSC with block size 1 is not supported by MPI.");
45     return;
46     } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
47     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
48     return;
49     } else {
50     Paso_SystemMatrix_nullifyRows_CSR_BLK1(A,mask_row,main_diagonal_value);
51     }
52     } else {
53     if (A->type & MATRIX_FORMAT_CSC) {
54     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: CSC is not supported by MPI.");
55     return;
56     } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
57     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
58     return;
59     } else {
60     Paso_SystemMatrix_nullifyRows_CSR(A,mask_row,main_diagonal_value);
61     }
62     }
63     #ifdef PASO_MPI
64     {
65     buffer0=TMPMEMALLOC(N,double);
66     buffer1=TMPMEMALLOC(N,double);
67     if (Finley_checkPtr(buffer0) || Finley_checkPtr(buffer1) ) {
68     TMPMEMFREE(buffer0);
69     TMPMEMFREE(buffer1);
70     return;
71     }
72     MPI_Status status;
73     MPI_Request snd_rqst, rcv_rqst;
74     double* snd_buf=mask_col, *rcv_buf=buffer0, *swap;
75     dim_t len_snd_buf=N;
76     dim_t len_rcv_buf=N;
77     dim_t h, fromRank, toRank;
78     dim_t myRank=mpi_info->rank;
79     dim_t numProc=mpi_info->size;
80     index_t rank_of_snd_buf=mpi_info->rank;
81    
82    
83     for (h=0; h<A->pattern->numHops; ++h) {
84    
85     /* start asynchronos communication */
86     if (h<A->pattern->numHops-1) {
87     fromRank=PASO_MPI_mod(myRank-(A->pattern->hop[h]),numProc);
88     toRank=PASO_MPI_mod(myRank+(A->pattern->hop[h]),numProc);
89     mpi_info->msg_tag_counter++;
90     #pragma omp master
91     {
92     MPI_Irecv(rcv_buf,len_rcv_buf,MPI_DOUBLE,fromRank,
93     mpi_info->msg_tag_counter,mpi_info->comm,&snd_rqst);
94     MPI_Issend(snd_buf,len_snd_buf,MPI_DOUBLE,toRank,
95     mpi_info->msg_tag_counter,mpi_info->comm,&rcv_rqst);
96     }
97     }
98     /* annulate colums as input */
99     if (A ->col_block_size==1 && A ->row_block_size ==1) {
100     Paso_SystemMatrix_nullifyCols_CSR_BLK1(A,snd_buf,main_diagonal_value,
101     A->pattern->input_distribution->first_component[rank_of_snd_buf],
102     A->pattern->input_distribution->first_component[rank_of_snd_buf+1]);
103     } else {
104     Paso_SystemMatrix_nullifyCols_CSR(A,snd_buf,main_diagonal_value,
105     A->pattern->input_distribution->first_component[rank_of_snd_buf],
106     A->pattern->input_distribution->first_component[rank_of_snd_buf+1]);
107     }
108     /* wait communication to be finished */
109     if (h<A->pattern->numHops-1) {
110     #pragma omp master
111     {
112     MPI_Wait(&rcv_rqst,&status);
113     MPI_Wait(&snd_rqst,&status);
114     }
115     }
116     /* swap recieve and send buffer */
117     if (h==0) {
118     snd_buf = rcv_buf;
119     len_snd_buf = len_rcv_buf;
120     rcv_buf = buffer1;
121     } else {
122     swap = snd_buf;
123     snd_buf = rcv_buf;
124     rcv_buf = swap;
125     }
126     rank_of_snd_buf=fromRank;
127     }
128     printf("RRRRRR\n");
129     TMPMEMFREE(buffer0);
130     TMPMEMFREE(buffer1);
131     }
132     #endif
133     } else {
134     if (A ->col_block_size==1 && A ->row_block_size ==1) {
135     if (A->type & MATRIX_FORMAT_CSC) {
136     Paso_SystemMatrix_nullifyRowsAndCols_CSC_BLK1(A,mask_row,mask_col,main_diagonal_value);
137     } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
138     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported.");
139     } else {
140     Paso_SystemMatrix_nullifyRowsAndCols_CSR_BLK1(A,mask_row,mask_col,main_diagonal_value);
141     }
142     } else {
143     if (A->type & MATRIX_FORMAT_CSC) {
144     Paso_SystemMatrix_nullifyRowsAndCols_CSC(A,mask_row,mask_col,main_diagonal_value);
145     } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
146     Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
147     } else {
148     Paso_SystemMatrix_nullifyRowsAndCols_CSR(A,mask_row,mask_col,main_diagonal_value);
149     }
150     }
151     }
152     return;
153     }
154    
155     void Paso_SystemMatrix_nullifyRowsAndCols_CSC_BLK1(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
156 gross 415 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
157 gross 1087 index_t irow, iptr, icol;
158     #pragma omp parallel for private(irow, iptr,icol) schedule(static)
159     for (icol=0;icol< A->pattern->myNumOutput;icol++) {
160     for (iptr=A->pattern->ptr[icol]-index_offset;iptr<A->pattern->ptr[icol+1]-index_offset; iptr++) {
161 gross 415 irow=A->pattern->index[iptr]-index_offset;
162 jgs 150 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
163 gross 1087 if (irow==icol) {
164 jgs 150 A->val[iptr]=main_diagonal_value;
165     } else {
166     A->val[iptr]=0;
167     }
168     }
169     }
170     }
171 gross 1087 }
172     void Paso_SystemMatrix_nullifyRowsAndCols_CSR_BLK1(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
173     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
174     index_t irow, iptr, icol;
175     #pragma omp parallel for private(irow, iptr,icol) schedule(static)
176     for (irow=0;irow< A->pattern->myNumOutput;irow++) {
177     /* TODO: test mask_row here amd not inside every loop */
178     for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
179     icol=A->pattern->index[iptr]-index_offset;
180     if (mask_col[icol]>0. || mask_row[irow]>0. ) {
181     if (irow==icol) {
182 jgs 150 A->val[iptr]=main_diagonal_value;
183     } else {
184     A->val[iptr]=0;
185     }
186     }
187 gross 1087 }
188     }
189     }
190     void Paso_SystemMatrix_nullifyRowsAndCols_CSC(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
191     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
192     index_t ir,icol,iptr,icb,irb,irow,ic,l;
193     #pragma omp parallel for private(l,irow, iptr,icol,ic,irb,icb) schedule(static)
194     for (ic=0;ic< A->pattern->myNumOutput;ic++) {
195 gross 415 for (iptr=A->pattern->ptr[ic]-index_offset;iptr<A->pattern->ptr[ic+1]-index_offset; iptr++) {
196 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
197 gross 415 irow=irb+A->row_block_size*(A->pattern->index[iptr]-index_offset);
198 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
199 gross 415 icol=icb+A->col_block_size*ic;
200 jgs 150 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
201     l=iptr*A->block_size+irb+A->row_block_size*icb;
202 gross 1087 if (irow==icol) {
203 jgs 150 A->val[l]=main_diagonal_value;
204     } else {
205     A->val[l]=0;
206     }
207     }
208     }
209     }
210     }
211 gross 1087 }
212     }
213     void Paso_SystemMatrix_nullifyRowsAndCols_CSR(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
214     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
215     index_t ir,icol,iptr,icb,irb,irow,ic,l;
216     #pragma omp parallel for private(l,irow, iptr,icol,ir,irb,icb) schedule(static)
217     for (ir=0;ir< A->pattern->myNumOutput;ir++) {
218 gross 415 for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
219 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
220 gross 415 irow=irb+A->row_block_size*ir;
221 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
222 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]-index_offset);
223 jgs 150 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
224     l=iptr*A->block_size+irb+A->row_block_size*icb;
225 gross 1087 if (irow==icol) {
226 jgs 150 A->val[l]=main_diagonal_value;
227     } else {
228     A->val[l]=0;
229     }
230     }
231     }
232     }
233     }
234 gross 1087 }
235     }
236     void Paso_SystemMatrix_nullifyRows_CSR_BLK1(Paso_SystemMatrix* A, double* mask_row, double main_diagonal_value) {
237     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
238     index_t irow, iptr, icol_global, irow_global;
239     index_t myFirstRow=A->myFirstRow;
240     #pragma omp parallel for private(irow, iptr,icol_global, irow_global) schedule(static)
241     for (irow=0;irow< A->pattern->myNumOutput;irow++) {
242     if (mask_row[irow]>0.) {
243     irow_global=myFirstRow+irow;
244     for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
245     icol_global=A->pattern->index[iptr]-index_offset;
246     if (irow_global==icol_global) {
247     A->val[iptr]=main_diagonal_value;
248     } else {
249     A->val[iptr]=0;
250     }
251     }
252     }
253     }
254     }
255     void Paso_SystemMatrix_nullifyRows_CSR(Paso_SystemMatrix* A, double* mask_row, double main_diagonal_value) {
256     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
257     index_t myFirstRow=A->myFirstRow;
258     index_t ir,icol,iptr,icb,irb,irow,ic,l, irow_global, icol_global;
259     #pragma omp parallel for private(l,irow, iptr,icol,ir,irb,icb, irow_global, icol_global) schedule(static)
260     for (ir=0;ir< A->pattern->myNumOutput;ir++) {
261     for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
262     for (irb=0;irb< A->row_block_size;irb++) {
263     irow=irb+A->row_block_size*ir;
264     if (mask_row[irow]>0. ) {
265     irow_global=irow+myFirstRow*A->row_block_size;
266     for (icb=0;icb< A->col_block_size;icb++) {
267     icol_global=icb+A->col_block_size*(A->pattern->index[iptr]-index_offset);
268     l=iptr*A->block_size+irb+A->row_block_size*icb;
269     if (irow_global==icol_global) {
270     A->val[l]=main_diagonal_value;
271     } else {
272     A->val[l]=0;
273     }
274     }
275     }
276     }
277     }
278     }
279     }
280     void Paso_SystemMatrix_nullifyCols_CSR_BLK1(Paso_SystemMatrix* A,
281     double* mask_col,
282     double main_diagonal_value,
283     index_t min_index, index_t max_index) {
284     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
285     index_t myFirstRow=A->myFirstRow;
286     index_t irow, iptr, icol, icol_global, irow_global;
287     #pragma omp parallel for private(irow, iptr,icol, icol_global, irow_global) schedule(static)
288     for (irow=0;irow< A->pattern->myNumOutput;irow++) {
289     irow_global=irow+myFirstRow;
290     for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
291     icol_global=A->pattern->index[iptr]-index_offset;
292     if (min_index <= icol_global && icol_global < max_index) {
293     icol=icol_global-min_index;
294     if (mask_col[icol]>0.) {
295     if (irow_global==icol_global) {
296     A->val[iptr]=main_diagonal_value;
297     } else {
298     A->val[iptr]=0;
299     }
300     }
301     }
302     }
303     }
304     }
305     void Paso_SystemMatrix_nullifyCols_CSR(Paso_SystemMatrix* A,
306     double* mask_col,
307     double main_diagonal_value,
308     index_t min_index, index_t max_index) {
309     index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
310     index_t myFirstRow=A->myFirstRow;
311     index_t ir,icol,iptr,icb,irb,irow,ic,l, icol_global, irow_global, ic_global;
312 ksteube 1096 #pragma omp parallel for private(l,irow, iptr,icol,ir,irb,icb, icol_global, irow_global, ic_global) schedule(static)
313 gross 1087 for (ir=0;ir< A->pattern->myNumOutput;ir++) {
314     for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
315     ic_global=A->pattern->index[iptr]-index_offset;
316     if (min_index <= ic_global && ic_global < max_index) {
317     for (icb=0;icb< A->col_block_size;icb++) {
318     icol_global=icb+A->col_block_size*ic_global;
319     icol=icb+A->col_block_size*(ic_global-min_index);
320     if (mask_col[icol]>0.) {
321     for (irb=0;irb< A->row_block_size;irb++) {
322     irow_global=irb+A->row_block_size*(ir+myFirstRow);
323     l=iptr*A->block_size+irb+A->row_block_size*icb;
324     if (irow_global==icol_global) {
325     A->val[l]=main_diagonal_value;
326     } else {
327     A->val[l]=0;
328     }
329     }
330     }
331     }
332     }
333 jgs 150 }
334     }
335     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26