1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/************************************************************* |
16 |
|
17 |
Paso: Coarsening strategies (no MPI) |
18 |
|
19 |
the given matrix A is splitted in to the form |
20 |
|
21 |
|
22 |
[ A_FF A_FC ] |
23 |
A = [ ] |
24 |
[ A_CF A_CC ] |
25 |
|
26 |
such that unknowns/equations in set C are weakly connected via A_CC and strongly connected |
27 |
to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F |
28 |
are marked in the marker_F by FALSE and TRUE respectively. |
29 |
|
30 |
The weak/strong connection is controlled by coarsening_threshold. |
31 |
|
32 |
three strategies are implemented: |
33 |
|
34 |
a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii| |
35 |
b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik| |
36 |
c) Aggregation : |a_ij|^2 >= theta**2 * |a_ii||a_jj| |
37 |
|
38 |
where theta = coarsening_threshold/maximum_pattern_degree |
39 |
|
40 |
Remark: |
41 |
|
42 |
- a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation |
43 |
|
44 |
*/ |
45 |
/************************************************************** |
46 |
|
47 |
Author: artak@uq.edu.au, l.gross@uq.edu.au |
48 |
|
49 |
**************************************************************/ |
50 |
|
51 |
#include "Coarsening.h" |
52 |
#include "PasoUtil.h" |
53 |
#include <limits.h> |
54 |
|
55 |
|
56 |
|
57 |
/*Used in Paso_Coarsening_Local_RS_MI*/ |
58 |
|
59 |
/*Computes how many connections unknown i have in S. |
60 |
bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise. |
61 |
Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE. |
62 |
*/ |
63 |
dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose) { |
64 |
dim_t j,n; |
65 |
dim_t total,ltotal; |
66 |
index_t *index,*where_p; |
67 |
|
68 |
/*index_t iptr;*/ |
69 |
total=0; |
70 |
ltotal=0; |
71 |
|
72 |
n=S->numOutput; |
73 |
|
74 |
if(transpose) { |
75 |
#pragma omp parallel |
76 |
{ |
77 |
ltotal=0; |
78 |
#pragma omp for private(j,index,where_p,ltotal) schedule(static) |
79 |
for (j=0;j<n;++j) { |
80 |
index=&(S->index[S->ptr[j]]); |
81 |
where_p=(index_t*)bsearch(&i, |
82 |
index, |
83 |
S->ptr[j + 1]-S->ptr[j], |
84 |
sizeof(index_t), |
85 |
Paso_comparIndex); |
86 |
if (where_p!=NULL) { |
87 |
ltotal++; |
88 |
} |
89 |
} |
90 |
} |
91 |
#pragma omp critical |
92 |
{ |
93 |
total+=ltotal; |
94 |
} |
95 |
|
96 |
} |
97 |
else { |
98 |
total=S->ptr[i+1]-S->ptr[i]; |
99 |
/*#pragma omp parallel for private(iptr) schedule(static)*/ |
100 |
/*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) { |
101 |
if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE) |
102 |
total++; |
103 |
}*/ |
104 |
|
105 |
} |
106 |
|
107 |
if (total==0) total=IS_NOT_AVAILABLE; |
108 |
|
109 |
return total; |
110 |
} |
111 |
|
112 |
|
113 |
|
114 |
|
115 |
|
116 |
Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P){ |
117 |
|
118 |
Paso_Pattern *outpattern=NULL; |
119 |
|
120 |
dim_t C=P->numInput; |
121 |
dim_t F=P->numOutput-C; |
122 |
dim_t n=C+F; |
123 |
dim_t i,j; |
124 |
index_t iptr; |
125 |
Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(C); |
126 |
|
127 |
|
128 |
/*#pragma omp parallel for private(i,iptr,j) schedule(static)*/ |
129 |
for (i=0;i<n;++i) { |
130 |
for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) { |
131 |
j=P->index[iptr]; |
132 |
Paso_IndexListArray_insertIndex(index_list,i,j); |
133 |
} |
134 |
} |
135 |
|
136 |
outpattern=Paso_Pattern_fromIndexListArray(0, index_list,0,n,0); |
137 |
|
138 |
/* clean up */ |
139 |
Paso_IndexListArray_free(index_list); |
140 |
|
141 |
return outpattern; |
142 |
} |
143 |
|
144 |
|
145 |
/************** BLOCK COARSENENING *********************/ |
146 |
|
147 |
void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta) |
148 |
{ |
149 |
const dim_t n=A->numRows; |
150 |
|
151 |
dim_t i,j,k; |
152 |
index_t iptr,jptr; |
153 |
/*index_t *index,*where_p;*/ |
154 |
double threshold,max_offdiagonal; |
155 |
dim_t *lambda; /*mesure of importance */ |
156 |
dim_t maxlambda=0; |
157 |
index_t index_maxlambda=0; |
158 |
double time0=0; |
159 |
bool_t verbose=0; |
160 |
dim_t n_block=A->row_block_size; |
161 |
|
162 |
double fnorm=0; |
163 |
dim_t bi; |
164 |
|
165 |
Paso_Pattern *S=NULL; |
166 |
Paso_Pattern *ST=NULL; |
167 |
Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n); |
168 |
|
169 |
time0=Esys_timer(); |
170 |
k=0; |
171 |
/*Paso_Coarsening_Local_getReport(n,marker_F);*/ |
172 |
/*printf("Blocks %d %d\n",n_block,A->len);*/ |
173 |
|
174 |
/*S_i={j \in N_i; i strongly coupled to j}*/ |
175 |
#pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static) |
176 |
for (i=0;i<n;++i) { |
177 |
if(marker_F[i]==IS_AVAILABLE) { |
178 |
max_offdiagonal = DBL_MIN; |
179 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
180 |
j=A->pattern->index[iptr]; |
181 |
if( j != i){ |
182 |
fnorm=0; |
183 |
for(bi=0;bi<n_block*n_block;++bi) |
184 |
{ |
185 |
fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi]; |
186 |
} |
187 |
fnorm=sqrt(fnorm); |
188 |
max_offdiagonal = MAX(max_offdiagonal,fnorm); |
189 |
} |
190 |
} |
191 |
threshold = theta*max_offdiagonal; |
192 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
193 |
j=A->pattern->index[iptr]; |
194 |
if( j != i){ |
195 |
fnorm=0; |
196 |
for(bi=0;bi<n_block*n_block;++bi) |
197 |
{ |
198 |
fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi]; |
199 |
} |
200 |
fnorm=sqrt(fnorm); |
201 |
if(fnorm>=threshold) { |
202 |
Paso_IndexListArray_insertIndex(index_list,i,j); |
203 |
} |
204 |
} |
205 |
} |
206 |
} |
207 |
} |
208 |
|
209 |
S=Paso_Pattern_fromIndexListArray(0,index_list,0,A->pattern->numInput,0); |
210 |
ST=Paso_Coarsening_Local_getTranspose(S); |
211 |
|
212 |
/*printf("Patterns len %d %d\n",S->len,ST->len);*/ |
213 |
|
214 |
time0=Esys_timer()-time0; |
215 |
if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0); |
216 |
|
217 |
lambda=TMPMEMALLOC(n,dim_t); |
218 |
|
219 |
#pragma omp parallel for private(i) schedule(static) |
220 |
for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; } |
221 |
|
222 |
/*S_i={j \in N_i; i strongly coupled to j}*/ |
223 |
|
224 |
/* |
225 |
#pragma omp parallel for private(i,iptr,lk) schedule(static) |
226 |
for (i=0;i<n;++i) { |
227 |
lk=0; |
228 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
229 |
if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i ) |
230 |
lk++; |
231 |
} |
232 |
#pragma omp critical |
233 |
k+=lk; |
234 |
if(k==0) { |
235 |
marker_F[i]=TRUE; |
236 |
} |
237 |
} |
238 |
*/ |
239 |
|
240 |
|
241 |
k=0; |
242 |
maxlambda=0; |
243 |
|
244 |
time0=Esys_timer(); |
245 |
|
246 |
for (i=0;i<n;++i) { |
247 |
if(marker_F[i]==IS_AVAILABLE) { |
248 |
lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/ |
249 |
/*lambda[i]=how_many(i,S,TRUE);*/ |
250 |
/*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/ |
251 |
k++; |
252 |
if(maxlambda<lambda[i]) { |
253 |
maxlambda=lambda[i]; |
254 |
index_maxlambda=i; |
255 |
} |
256 |
} |
257 |
} |
258 |
|
259 |
k=0; |
260 |
time0=Esys_timer()-time0; |
261 |
if (verbose) fprintf(stdout,"timing: Lambdas computations at the begining: %e\n",time0); |
262 |
|
263 |
time0=Esys_timer(); |
264 |
|
265 |
/*Paso_Coarsening_Local_getReport(n,marker_F);*/ |
266 |
|
267 |
while (Paso_Util_isAny(n,marker_F,IS_AVAILABLE)) { |
268 |
|
269 |
if(index_maxlambda<0) { |
270 |
break; |
271 |
} |
272 |
|
273 |
i=index_maxlambda; |
274 |
if(marker_F[i]==IS_AVAILABLE) { |
275 |
marker_F[index_maxlambda]=FALSE; |
276 |
lambda[index_maxlambda]=IS_NOT_AVAILABLE; |
277 |
for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) { |
278 |
j=ST->index[iptr]; |
279 |
if(marker_F[j]==IS_AVAILABLE) { |
280 |
marker_F[j]=TRUE; |
281 |
lambda[j]=IS_NOT_AVAILABLE; |
282 |
for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) { |
283 |
k=S->index[jptr]; |
284 |
if(marker_F[k]==IS_AVAILABLE) { |
285 |
lambda[k]++; |
286 |
} |
287 |
} |
288 |
} |
289 |
} |
290 |
for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) { |
291 |
j=S->index[iptr]; |
292 |
if(marker_F[j]==IS_AVAILABLE) { |
293 |
lambda[j]--; |
294 |
} |
295 |
} |
296 |
} |
297 |
|
298 |
/* Used when transpose of S is not available */ |
299 |
/* |
300 |
for (i=0;i<n;++i) { |
301 |
if(marker_F[i]==IS_AVAILABLE) { |
302 |
if (i==index_maxlambda) { |
303 |
marker_F[index_maxlambda]=FALSE; |
304 |
lambda[index_maxlambda]=-1; |
305 |
for (j=0;j<n;++j) { |
306 |
if(marker_F[j]==IS_AVAILABLE) { |
307 |
index=&(S->index[S->ptr[j]]); |
308 |
where_p=(index_t*)bsearch(&i, |
309 |
index, |
310 |
S->ptr[j + 1]-S->ptr[j], |
311 |
sizeof(index_t), |
312 |
Paso_comparIndex); |
313 |
if (where_p!=NULL) { |
314 |
marker_F[j]=TRUE; |
315 |
lambda[j]=-1; |
316 |
for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) { |
317 |
k=S->index[iptr]; |
318 |
if(marker_F[k]==IS_AVAILABLE) { |
319 |
lambda[k]++; |
320 |
} |
321 |
} |
322 |
} |
323 |
|
324 |
} |
325 |
} |
326 |
|
327 |
} |
328 |
} |
329 |
} |
330 |
*/ |
331 |
index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE); |
332 |
} |
333 |
|
334 |
time0=Esys_timer()-time0; |
335 |
if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0); |
336 |
|
337 |
/*Paso_Coarsening_Local_getReport(n,marker_F);*/ |
338 |
|
339 |
#pragma omp parallel for private(i) schedule(static) |
340 |
for (i=0;i<n;++i) |
341 |
if(marker_F[i]==IS_AVAILABLE) { |
342 |
marker_F[i]=TRUE; |
343 |
} |
344 |
|
345 |
/*Paso_Coarsening_Local_getReport(n,marker_F);*/ |
346 |
|
347 |
TMPMEMFREE(lambda); |
348 |
|
349 |
/* clean up */ |
350 |
Paso_IndexListArray_free(index_list); |
351 |
Paso_Pattern_free(S); |
352 |
|
353 |
/* swap to TRUE/FALSE in marker_F */ |
354 |
#pragma omp parallel for private(i) schedule(static) |
355 |
for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE; |
356 |
|
357 |
} |
358 |
|
359 |
|
360 |
|
361 |
#undef IS_AVAILABLE |
362 |
#undef IS_NOT_AVAILABLE |
363 |
#undef TRUE |
364 |
#undef FALSE |
365 |
|
366 |
#undef IS_UNDECIDED |
367 |
#undef IS_STRONG |
368 |
#undef IS_WEAK |
369 |
|
370 |
#undef TRUEB |
371 |
#undef TRUEB |
372 |
|