/[escript]/trunk-mpi-branch/paso/src/SystemMatrix_nullifyRowsAndCols.c
ViewVC logotype

Contents of /trunk-mpi-branch/paso/src/SystemMatrix_nullifyRowsAndCols.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1087 - (show annotations)
Thu Apr 12 10:01:47 2007 UTC (12 years, 2 months ago) by gross
File MIME type: text/plain
File size: 14308 byte(s)
the MPI version of PASO.PCG is running. 
There is a bug in the rectangular mesh generators but they need to be
revised in any case to clean up the code.


1 /* $Id$ */
2
3 /*
4 ********************************************************************************
5 * Copyright 2006 by ACcESS MNRF *
6 * *
7 * http://www.access.edu.au *
8 * Primary Business: Queensland, Australia *
9 * Licensed under the Open Software License version 3.0 *
10 * http://www.opensource.org/licenses/osl-3.0.php *
11 ********************************************************************************
12 */
13
14 /**************************************************************/
15
16 /* Paso: SystemMatrix */
17
18 /* nullify rows and columns in the matrix */
19
20 /* the rows and columns are marked by positive values in */
21 /* mask_row and mask_col. Values on the main diagonal */
22 /* which are marked to set to zero by both mask_row and */
23 /* mask_col are set to main_diagonal_value */
24
25
26 /**************************************************************/
27
28 /* Copyrights by ACcESS Australia 2003 */
29 /* Author: gross@access.edu.au */
30
31 /**************************************************************/
32
33 #include "Paso.h"
34 #include "SystemMatrix.h"
35
36 void Paso_SystemMatrix_nullifyRowsAndCols(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
37
38 dim_t N=A->maxNumCols * A->col_block_size;
39 Paso_MPIInfo *mpi_info=A->mpi_info;
40 double* buffer0, *buffer1;
41 if (mpi_info->size>1) {
42 if (A ->col_block_size==1 && A ->row_block_size ==1) {
43 if (A->type & MATRIX_FORMAT_CSC) {
44 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: CSC with block size 1 is not supported by MPI.");
45 return;
46 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
47 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
48 return;
49 } else {
50 Paso_SystemMatrix_nullifyRows_CSR_BLK1(A,mask_row,main_diagonal_value);
51 }
52 } else {
53 if (A->type & MATRIX_FORMAT_CSC) {
54 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: CSC is not supported by MPI.");
55 return;
56 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
57 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
58 return;
59 } else {
60 Paso_SystemMatrix_nullifyRows_CSR(A,mask_row,main_diagonal_value);
61 }
62 }
63 #ifdef PASO_MPI
64 {
65 buffer0=TMPMEMALLOC(N,double);
66 buffer1=TMPMEMALLOC(N,double);
67 if (Finley_checkPtr(buffer0) || Finley_checkPtr(buffer1) ) {
68 TMPMEMFREE(buffer0);
69 TMPMEMFREE(buffer1);
70 return;
71 }
72 MPI_Status status;
73 MPI_Request snd_rqst, rcv_rqst;
74 double* snd_buf=mask_col, *rcv_buf=buffer0, *swap;
75 dim_t len_snd_buf=N;
76 dim_t len_rcv_buf=N;
77 dim_t h, fromRank, toRank;
78 dim_t myRank=mpi_info->rank;
79 dim_t numProc=mpi_info->size;
80 index_t rank_of_snd_buf=mpi_info->rank;
81
82
83 for (h=0; h<A->pattern->numHops; ++h) {
84
85 /* start asynchronos communication */
86 if (h<A->pattern->numHops-1) {
87 fromRank=PASO_MPI_mod(myRank-(A->pattern->hop[h]),numProc);
88 toRank=PASO_MPI_mod(myRank+(A->pattern->hop[h]),numProc);
89 mpi_info->msg_tag_counter++;
90 #pragma omp master
91 {
92 MPI_Irecv(rcv_buf,len_rcv_buf,MPI_DOUBLE,fromRank,
93 mpi_info->msg_tag_counter,mpi_info->comm,&snd_rqst);
94 MPI_Issend(snd_buf,len_snd_buf,MPI_DOUBLE,toRank,
95 mpi_info->msg_tag_counter,mpi_info->comm,&rcv_rqst);
96 }
97 }
98 /* annulate colums as input */
99 if (A ->col_block_size==1 && A ->row_block_size ==1) {
100 Paso_SystemMatrix_nullifyCols_CSR_BLK1(A,snd_buf,main_diagonal_value,
101 A->pattern->input_distribution->first_component[rank_of_snd_buf],
102 A->pattern->input_distribution->first_component[rank_of_snd_buf+1]);
103 } else {
104 Paso_SystemMatrix_nullifyCols_CSR(A,snd_buf,main_diagonal_value,
105 A->pattern->input_distribution->first_component[rank_of_snd_buf],
106 A->pattern->input_distribution->first_component[rank_of_snd_buf+1]);
107 }
108 /* wait communication to be finished */
109 if (h<A->pattern->numHops-1) {
110 #pragma omp master
111 {
112 MPI_Wait(&rcv_rqst,&status);
113 MPI_Wait(&snd_rqst,&status);
114 }
115 }
116 /* swap recieve and send buffer */
117 if (h==0) {
118 snd_buf = rcv_buf;
119 len_snd_buf = len_rcv_buf;
120 rcv_buf = buffer1;
121 } else {
122 swap = snd_buf;
123 snd_buf = rcv_buf;
124 rcv_buf = swap;
125 }
126 rank_of_snd_buf=fromRank;
127 }
128 printf("RRRRRR\n");
129 TMPMEMFREE(buffer0);
130 TMPMEMFREE(buffer1);
131 }
132 #endif
133 } else {
134 if (A ->col_block_size==1 && A ->row_block_size ==1) {
135 if (A->type & MATRIX_FORMAT_CSC) {
136 Paso_SystemMatrix_nullifyRowsAndCols_CSC_BLK1(A,mask_row,mask_col,main_diagonal_value);
137 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
138 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported.");
139 } else {
140 Paso_SystemMatrix_nullifyRowsAndCols_CSR_BLK1(A,mask_row,mask_col,main_diagonal_value);
141 }
142 } else {
143 if (A->type & MATRIX_FORMAT_CSC) {
144 Paso_SystemMatrix_nullifyRowsAndCols_CSC(A,mask_row,mask_col,main_diagonal_value);
145 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
146 Paso_setError(SYSTEM_ERROR,"Paso_SystemMatrix_nullifyRowsAndCols: TRILINOS is not supported with MPI.");
147 } else {
148 Paso_SystemMatrix_nullifyRowsAndCols_CSR(A,mask_row,mask_col,main_diagonal_value);
149 }
150 }
151 }
152 return;
153 }
154
155 void Paso_SystemMatrix_nullifyRowsAndCols_CSC_BLK1(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
156 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
157 index_t irow, iptr, icol;
158 #pragma omp parallel for private(irow, iptr,icol) schedule(static)
159 for (icol=0;icol< A->pattern->myNumOutput;icol++) {
160 for (iptr=A->pattern->ptr[icol]-index_offset;iptr<A->pattern->ptr[icol+1]-index_offset; iptr++) {
161 irow=A->pattern->index[iptr]-index_offset;
162 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
163 if (irow==icol) {
164 A->val[iptr]=main_diagonal_value;
165 } else {
166 A->val[iptr]=0;
167 }
168 }
169 }
170 }
171 }
172 void Paso_SystemMatrix_nullifyRowsAndCols_CSR_BLK1(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
173 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
174 index_t irow, iptr, icol;
175 #pragma omp parallel for private(irow, iptr,icol) schedule(static)
176 for (irow=0;irow< A->pattern->myNumOutput;irow++) {
177 /* TODO: test mask_row here amd not inside every loop */
178 for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
179 icol=A->pattern->index[iptr]-index_offset;
180 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
181 if (irow==icol) {
182 A->val[iptr]=main_diagonal_value;
183 } else {
184 A->val[iptr]=0;
185 }
186 }
187 }
188 }
189 }
190 void Paso_SystemMatrix_nullifyRowsAndCols_CSC(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
191 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
192 index_t ir,icol,iptr,icb,irb,irow,ic,l;
193 #pragma omp parallel for private(l,irow, iptr,icol,ic,irb,icb) schedule(static)
194 for (ic=0;ic< A->pattern->myNumOutput;ic++) {
195 for (iptr=A->pattern->ptr[ic]-index_offset;iptr<A->pattern->ptr[ic+1]-index_offset; iptr++) {
196 for (irb=0;irb< A->row_block_size;irb++) {
197 irow=irb+A->row_block_size*(A->pattern->index[iptr]-index_offset);
198 for (icb=0;icb< A->col_block_size;icb++) {
199 icol=icb+A->col_block_size*ic;
200 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
201 l=iptr*A->block_size+irb+A->row_block_size*icb;
202 if (irow==icol) {
203 A->val[l]=main_diagonal_value;
204 } else {
205 A->val[l]=0;
206 }
207 }
208 }
209 }
210 }
211 }
212 }
213 void Paso_SystemMatrix_nullifyRowsAndCols_CSR(Paso_SystemMatrix* A, double* mask_row, double* mask_col, double main_diagonal_value) {
214 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
215 index_t ir,icol,iptr,icb,irb,irow,ic,l;
216 #pragma omp parallel for private(l,irow, iptr,icol,ir,irb,icb) schedule(static)
217 for (ir=0;ir< A->pattern->myNumOutput;ir++) {
218 for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
219 for (irb=0;irb< A->row_block_size;irb++) {
220 irow=irb+A->row_block_size*ir;
221 for (icb=0;icb< A->col_block_size;icb++) {
222 icol=icb+A->col_block_size*(A->pattern->index[iptr]-index_offset);
223 if (mask_col[icol]>0. || mask_row[irow]>0. ) {
224 l=iptr*A->block_size+irb+A->row_block_size*icb;
225 if (irow==icol) {
226 A->val[l]=main_diagonal_value;
227 } else {
228 A->val[l]=0;
229 }
230 }
231 }
232 }
233 }
234 }
235 }
236 void Paso_SystemMatrix_nullifyRows_CSR_BLK1(Paso_SystemMatrix* A, double* mask_row, double main_diagonal_value) {
237 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
238 index_t irow, iptr, icol_global, irow_global;
239 index_t myFirstRow=A->myFirstRow;
240 #pragma omp parallel for private(irow, iptr,icol_global, irow_global) schedule(static)
241 for (irow=0;irow< A->pattern->myNumOutput;irow++) {
242 if (mask_row[irow]>0.) {
243 irow_global=myFirstRow+irow;
244 for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
245 icol_global=A->pattern->index[iptr]-index_offset;
246 if (irow_global==icol_global) {
247 A->val[iptr]=main_diagonal_value;
248 } else {
249 A->val[iptr]=0;
250 }
251 }
252 }
253 }
254 }
255 void Paso_SystemMatrix_nullifyRows_CSR(Paso_SystemMatrix* A, double* mask_row, double main_diagonal_value) {
256 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
257 index_t myFirstRow=A->myFirstRow;
258 index_t ir,icol,iptr,icb,irb,irow,ic,l, irow_global, icol_global;
259 #pragma omp parallel for private(l,irow, iptr,icol,ir,irb,icb, irow_global, icol_global) schedule(static)
260 for (ir=0;ir< A->pattern->myNumOutput;ir++) {
261 for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
262 for (irb=0;irb< A->row_block_size;irb++) {
263 irow=irb+A->row_block_size*ir;
264 if (mask_row[irow]>0. ) {
265 irow_global=irow+myFirstRow*A->row_block_size;
266 for (icb=0;icb< A->col_block_size;icb++) {
267 icol_global=icb+A->col_block_size*(A->pattern->index[iptr]-index_offset);
268 l=iptr*A->block_size+irb+A->row_block_size*icb;
269 if (irow_global==icol_global) {
270 A->val[l]=main_diagonal_value;
271 } else {
272 A->val[l]=0;
273 }
274 }
275 }
276 }
277 }
278 }
279 }
280 void Paso_SystemMatrix_nullifyCols_CSR_BLK1(Paso_SystemMatrix* A,
281 double* mask_col,
282 double main_diagonal_value,
283 index_t min_index, index_t max_index) {
284 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
285 index_t myFirstRow=A->myFirstRow;
286 index_t irow, iptr, icol, icol_global, irow_global;
287 #pragma omp parallel for private(irow, iptr,icol, icol_global, irow_global) schedule(static)
288 for (irow=0;irow< A->pattern->myNumOutput;irow++) {
289 irow_global=irow+myFirstRow;
290 for (iptr=A->pattern->ptr[irow]-index_offset;iptr<A->pattern->ptr[irow+1]-index_offset; iptr++) {
291 icol_global=A->pattern->index[iptr]-index_offset;
292 if (min_index <= icol_global && icol_global < max_index) {
293 icol=icol_global-min_index;
294 if (mask_col[icol]>0.) {
295 if (irow_global==icol_global) {
296 A->val[iptr]=main_diagonal_value;
297 } else {
298 A->val[iptr]=0;
299 }
300 }
301 }
302 }
303 }
304 }
305 void Paso_SystemMatrix_nullifyCols_CSR(Paso_SystemMatrix* A,
306 double* mask_col,
307 double main_diagonal_value,
308 index_t min_index, index_t max_index) {
309 index_t index_offset=(A->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
310 index_t myFirstRow=A->myFirstRow;
311 index_t ir,icol,iptr,icb,irb,irow,ic,l, icol_global, irow_global, ic_global;
312 #pragma omp parallel for private(itmp, l,irow, iptr,icol,ir,irb,icb, icol_global, irow_global, ic_global) schedule(static)
313 for (ir=0;ir< A->pattern->myNumOutput;ir++) {
314 for (iptr=A->pattern->ptr[ir]-index_offset;iptr<A->pattern->ptr[ir+1]-index_offset; iptr++) {
315 ic_global=A->pattern->index[iptr]-index_offset;
316 if (min_index <= ic_global && ic_global < max_index) {
317 for (icb=0;icb< A->col_block_size;icb++) {
318 icol_global=icb+A->col_block_size*ic_global;
319 icol=icb+A->col_block_size*(ic_global-min_index);
320 if (mask_col[icol]>0.) {
321 for (irb=0;irb< A->row_block_size;irb++) {
322 irow_global=irb+A->row_block_size*(ir+myFirstRow);
323 l=iptr*A->block_size+irb+A->row_block_size*icb;
324 if (irow_global==icol_global) {
325 A->val[l]=main_diagonal_value;
326 } else {
327 A->val[l]=0;
328 }
329 }
330 }
331 }
332 }
333 }
334 }
335 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26