1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2009 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**********************************************************************/ |
16 |
|
17 |
/* Paso: Pattern: Paso_Pattern_coupling |
18 |
|
19 |
searches for a maximal independent set MIS in the matrix pattern |
20 |
vertices in the maximal independent set are marked in mis_marker |
21 |
nodes to be considered are marked by -1 on the input in mis_marker |
22 |
|
23 |
*/ |
24 |
/**********************************************************************/ |
25 |
|
26 |
/* Copyrights by ACcESS Australia 2003,2004,2005 */ |
27 |
/* Author: artak@uq.edu.au */ |
28 |
|
29 |
/**************************************************************/ |
30 |
|
31 |
#include "PasoUtil.h" |
32 |
#include "Pattern_coupling.h" |
33 |
#include <limits.h> |
34 |
|
35 |
|
36 |
/***************************************************************/ |
37 |
|
38 |
#define IS_AVAILABLE -1 |
39 |
#define IS_NOT_AVAILABLE -2 |
40 |
#define IS_IN_F -3 /* in F (strong) */ |
41 |
#define IS_IN_C -4 /* in C (weak) */ |
42 |
|
43 |
|
44 |
#define IS_UNDECIDED -1 |
45 |
#define IS_STRONG -2 |
46 |
#define IS_WEAK -3 |
47 |
|
48 |
|
49 |
#define IS_IN_FA -5 /* test */ |
50 |
#define IS_IN_FB -6 /* test */ |
51 |
|
52 |
|
53 |
void Paso_Pattern_Read(char *fileName,dim_t n,index_t* mis_marker) { |
54 |
|
55 |
dim_t i; |
56 |
int scan_ret; |
57 |
|
58 |
FILE *fileHandle_p = NULL; |
59 |
|
60 |
fileHandle_p = fopen( fileName, "r" ); |
61 |
if( fileHandle_p == NULL ) |
62 |
{ |
63 |
Paso_setError(IO_ERROR, "Paso_Pattern_Read: Cannot open file for reading."); |
64 |
return; |
65 |
} |
66 |
|
67 |
for (i=0;i<n;++i) { |
68 |
scan_ret=fscanf( fileHandle_p, "%d\n", &mis_marker[i]); |
69 |
if (scan_ret!=1) { |
70 |
Paso_setError(IO_ERROR, "Paso_Pattern_Read: Cannot read line from file."); |
71 |
return; |
72 |
} |
73 |
} |
74 |
|
75 |
fclose(fileHandle_p); |
76 |
|
77 |
/* swap to TRUE/FALSE in mis_marker */ |
78 |
#pragma omp parallel for private(i) schedule(static) |
79 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]!=1); |
80 |
} |
81 |
|
82 |
|
83 |
|
84 |
void Paso_Pattern_YS(Paso_SparseMatrix* A, index_t* mis_marker, double threshold) { |
85 |
|
86 |
dim_t i,j; |
87 |
/*double sum;*/ |
88 |
index_t iptr; |
89 |
/*index_t *index,*where_p;*/ |
90 |
bool_t passed=FALSE; |
91 |
dim_t n=A->numRows; |
92 |
double *diags; |
93 |
diags=MEMALLOC(n,double); |
94 |
|
95 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
96 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_coup: symmetric matrix pattern is not supported yet"); |
97 |
return; |
98 |
} |
99 |
|
100 |
#pragma omp parallel for private(i) schedule(static) |
101 |
for (i=0;i<n;++i) |
102 |
if(mis_marker[i]==IS_AVAILABLE) |
103 |
mis_marker[i]=IS_IN_C; |
104 |
|
105 |
#pragma omp parallel for private(i,j) schedule(static) |
106 |
for (i=0;i<n;++i) { |
107 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
108 |
j=A->pattern->index[iptr]; |
109 |
if(i==j) { |
110 |
diags[i]=A->val[iptr]; |
111 |
} |
112 |
} |
113 |
} |
114 |
|
115 |
/*This loop cannot be parallelized, as order matters here.*/ |
116 |
for (i=0;i<n;++i) { |
117 |
if (mis_marker[i]==IS_IN_C) { |
118 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
119 |
j=A->pattern->index[iptr]; |
120 |
if (j!=i && ABS(A->val[iptr])>=threshold*ABS(diags[i])) { |
121 |
mis_marker[j]=IS_IN_F; |
122 |
} |
123 |
} |
124 |
} |
125 |
} |
126 |
|
127 |
|
128 |
|
129 |
/*This loop cannot be parallelized, as order matters here.*/ |
130 |
for (i=0;i<n;i++) { |
131 |
if (mis_marker[i]==IS_IN_F) { |
132 |
passed=TRUE; |
133 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
134 |
j=A->pattern->index[iptr]; |
135 |
if (mis_marker[j]==IS_IN_C) { |
136 |
if ((A->val[iptr]/diags[i])>=-threshold) { |
137 |
passed=TRUE; |
138 |
} |
139 |
else { |
140 |
passed=FALSE; |
141 |
break; |
142 |
} |
143 |
} |
144 |
} |
145 |
if (passed) mis_marker[i]=IS_IN_C; |
146 |
} |
147 |
} |
148 |
|
149 |
/* swap to TRUE/FALSE in mis_marker */ |
150 |
#pragma omp parallel for private(i) schedule(static) |
151 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
152 |
|
153 |
MEMFREE(diags); |
154 |
} |
155 |
|
156 |
|
157 |
/* |
158 |
* Ruge-Stueben strength of connection mask. |
159 |
* |
160 |
*/ |
161 |
void Paso_Pattern_RS(Paso_SparseMatrix* A, index_t* mis_marker, double theta) |
162 |
{ |
163 |
dim_t i,n,j; |
164 |
index_t iptr; |
165 |
double threshold,max_offdiagonal; |
166 |
|
167 |
Paso_Pattern *out=NULL; |
168 |
|
169 |
Paso_IndexList* index_list=NULL; |
170 |
|
171 |
index_list=TMPMEMALLOC(A->pattern->numOutput,Paso_IndexList); |
172 |
if (! Paso_checkPtr(index_list)) { |
173 |
#pragma omp parallel for private(i) schedule(static) |
174 |
for(i=0;i<A->pattern->numOutput;++i) { |
175 |
index_list[i].extension=NULL; |
176 |
index_list[i].n=0; |
177 |
} |
178 |
} |
179 |
|
180 |
|
181 |
n=A->numRows; |
182 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
183 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_RS: symmetric matrix pattern is not supported yet"); |
184 |
return; |
185 |
} |
186 |
/*#pragma omp parallel for private(i,iptr,max_offdiagonal,threshold,j) schedule(static)*/ |
187 |
for (i=0;i<n;++i) { |
188 |
if(mis_marker[i]==IS_AVAILABLE) { |
189 |
max_offdiagonal = DBL_MIN; |
190 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
191 |
if(A->pattern->index[iptr] != i){ |
192 |
max_offdiagonal = MAX(max_offdiagonal,ABS(A->val[iptr])); |
193 |
} |
194 |
} |
195 |
|
196 |
threshold = theta*max_offdiagonal; |
197 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
198 |
j=A->pattern->index[iptr]; |
199 |
if(ABS(A->val[iptr])>=threshold && i!=j) { |
200 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
201 |
/*Paso_IndexList_insertIndex(&(index_list[j]),i);*/ |
202 |
} |
203 |
} |
204 |
} |
205 |
} |
206 |
|
207 |
|
208 |
out=Paso_IndexList_createPattern(0, A->pattern->numOutput,index_list,0,A->pattern->numInput,0); |
209 |
|
210 |
/* clean up */ |
211 |
if (index_list!=NULL) { |
212 |
#pragma omp parallel for private(i) schedule(static) |
213 |
for(i=0;i<A->pattern->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
214 |
} |
215 |
TMPMEMFREE(index_list); |
216 |
|
217 |
/*Paso_Pattern_mis(out,mis_marker);*/ |
218 |
Paso_Pattern_greedy(out,mis_marker); |
219 |
Paso_Pattern_free(out); |
220 |
} |
221 |
|
222 |
void Paso_Pattern_Aggregiation(Paso_SparseMatrix* A, index_t* mis_marker, double theta) |
223 |
{ |
224 |
dim_t i,j,n; |
225 |
index_t iptr; |
226 |
double diag,eps_Aii,val; |
227 |
double* diags; |
228 |
|
229 |
|
230 |
Paso_Pattern *out=NULL; |
231 |
Paso_IndexList* index_list=NULL; |
232 |
|
233 |
n=A->numRows; |
234 |
diags=MEMALLOC(n,double); |
235 |
|
236 |
index_list=TMPMEMALLOC(A->pattern->numOutput,Paso_IndexList); |
237 |
if (! Paso_checkPtr(index_list)) { |
238 |
#pragma omp parallel for private(i) schedule(static) |
239 |
for(i=0;i<A->pattern->numOutput;++i) { |
240 |
index_list[i].extension=NULL; |
241 |
index_list[i].n=0; |
242 |
} |
243 |
} |
244 |
|
245 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
246 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_Aggregiation: symmetric matrix pattern is not supported yet"); |
247 |
return; |
248 |
} |
249 |
|
250 |
|
251 |
#pragma omp parallel for private(i,iptr,diag) schedule(static) |
252 |
for (i=0;i<n;++i) { |
253 |
diag = 0; |
254 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
255 |
if(A->pattern->index[iptr] == i){ |
256 |
diag+=A->val[iptr]; |
257 |
} |
258 |
} |
259 |
diags[i]=ABS(diag); |
260 |
} |
261 |
|
262 |
|
263 |
#pragma omp parallel for private(i,iptr,j,val,eps_Aii) schedule(static) |
264 |
for (i=0;i<n;++i) { |
265 |
if (mis_marker[i]==IS_AVAILABLE) { |
266 |
eps_Aii = theta*theta*diags[i]; |
267 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
268 |
j=A->pattern->index[iptr]; |
269 |
val=A->val[iptr]; |
270 |
if((val*val)>=(eps_Aii*diags[j])) { |
271 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
272 |
} |
273 |
} |
274 |
} |
275 |
} |
276 |
|
277 |
out=Paso_IndexList_createPattern(0, A->pattern->numOutput,index_list,0,A->pattern->numInput,0); |
278 |
|
279 |
/* clean up */ |
280 |
if (index_list!=NULL) { |
281 |
#pragma omp parallel for private(i) schedule(static) |
282 |
for(i=0;i<A->pattern->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
283 |
} |
284 |
|
285 |
TMPMEMFREE(index_list); |
286 |
MEMFREE(diags); |
287 |
|
288 |
|
289 |
/*Paso_Pattern_mis(out,mis_marker);*/ |
290 |
Paso_Pattern_greedy(out,mis_marker); |
291 |
Paso_Pattern_free(out); |
292 |
|
293 |
} |
294 |
|
295 |
/* Greedy algorithm */ |
296 |
void Paso_Pattern_greedy(Paso_Pattern* pattern, index_t* mis_marker) { |
297 |
|
298 |
dim_t i,j; |
299 |
/*double sum;*/ |
300 |
index_t iptr; |
301 |
bool_t passed=FALSE; |
302 |
dim_t n=pattern->numOutput; |
303 |
|
304 |
if (pattern->type & PATTERN_FORMAT_SYM) { |
305 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_greedy: symmetric matrix pattern is not supported yet"); |
306 |
return; |
307 |
} |
308 |
|
309 |
#pragma omp parallel for private(i) schedule(static) |
310 |
for (i=0;i<n;++i) |
311 |
if(mis_marker[i]==IS_AVAILABLE) |
312 |
mis_marker[i]=IS_IN_C; |
313 |
|
314 |
|
315 |
for (i=0;i<n;++i) { |
316 |
if (mis_marker[i]==IS_IN_C) { |
317 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
318 |
j=pattern->index[iptr]; |
319 |
mis_marker[j]=IS_IN_F; |
320 |
} |
321 |
} |
322 |
} |
323 |
|
324 |
|
325 |
for (i=0;i<n;i++) { |
326 |
if (mis_marker[i]==IS_IN_F) { |
327 |
passed=TRUE; |
328 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
329 |
j=pattern->index[iptr]; |
330 |
if (mis_marker[j]==IS_IN_F) { |
331 |
passed=TRUE; |
332 |
} |
333 |
else { |
334 |
passed=FALSE; |
335 |
break; |
336 |
} |
337 |
} |
338 |
if (passed) mis_marker[i]=IS_IN_C; |
339 |
} |
340 |
} |
341 |
|
342 |
/* swap to TRUE/FALSE in mis_marker */ |
343 |
#pragma omp parallel for private(i) schedule(static) |
344 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
345 |
|
346 |
} |
347 |
|
348 |
void Paso_Pattern_greedy_color(Paso_Pattern* pattern, index_t* mis_marker) { |
349 |
|
350 |
dim_t i,j; |
351 |
/*double sum;*/ |
352 |
index_t iptr; |
353 |
index_t num_colors; |
354 |
index_t* colorOf; |
355 |
register index_t color; |
356 |
bool_t passed=FALSE; |
357 |
dim_t n=pattern->numOutput; |
358 |
|
359 |
|
360 |
colorOf=MEMALLOC(n,index_t); |
361 |
|
362 |
if (pattern->type & PATTERN_FORMAT_SYM) { |
363 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_greedy: symmetric matrix pattern is not supported yet"); |
364 |
return; |
365 |
} |
366 |
|
367 |
Paso_Pattern_color(pattern,&num_colors,colorOf); |
368 |
|
369 |
/* We do not need this loop if we set IS_IN_MIS=IS_AVAILABLE. */ |
370 |
#pragma omp parallel for private(i) schedule(static) |
371 |
for (i=0;i<n;++i) |
372 |
if(mis_marker[i]==IS_AVAILABLE) |
373 |
mis_marker[i]=IS_IN_F; |
374 |
|
375 |
#pragma omp barrier |
376 |
for (color=0;color<num_colors;++color) { |
377 |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
378 |
for (i=0;i<n;++i) { |
379 |
if (colorOf[i]==color) { |
380 |
if (mis_marker[i]==IS_IN_F) { |
381 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
382 |
j=pattern->index[iptr]; |
383 |
if (colorOf[j]<color) |
384 |
mis_marker[j]=IS_IN_C; |
385 |
} |
386 |
} |
387 |
} |
388 |
} |
389 |
} |
390 |
|
391 |
|
392 |
#pragma omp barrier |
393 |
for (color=0;color<num_colors;++color) { |
394 |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
395 |
for (i=0;i<n;i++) { |
396 |
if (colorOf[i]==color) { |
397 |
if (mis_marker[i]==IS_IN_C) { |
398 |
passed=TRUE; |
399 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
400 |
j=pattern->index[iptr]; |
401 |
if (colorOf[j]<color && passed) { |
402 |
if (mis_marker[j]==IS_IN_C) { |
403 |
passed=TRUE; |
404 |
} |
405 |
else { |
406 |
passed=FALSE; |
407 |
/*break;*/ |
408 |
} |
409 |
} |
410 |
} |
411 |
if (passed) mis_marker[i]=IS_IN_F; |
412 |
} |
413 |
|
414 |
} |
415 |
} |
416 |
} |
417 |
|
418 |
/* swap to TRUE/FALSE in mis_marker */ |
419 |
#pragma omp parallel for private(i) schedule(static) |
420 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
421 |
|
422 |
MEMFREE(colorOf); |
423 |
} |
424 |
|
425 |
/*For testing */ |
426 |
void Paso_Pattern_greedy_diag(Paso_SparseMatrix* A, index_t* mis_marker, double threshold) { |
427 |
|
428 |
dim_t i,j=0,k; |
429 |
double *theta; |
430 |
index_t iptr; |
431 |
dim_t n=A->numRows; |
432 |
double rsum,diag=0; |
433 |
index_t *AvADJ; |
434 |
theta=MEMALLOC(n,double); |
435 |
AvADJ=MEMALLOC(n,index_t); |
436 |
|
437 |
|
438 |
|
439 |
|
440 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
441 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_coup: symmetric matrix pattern is not supported yet"); |
442 |
return; |
443 |
} |
444 |
|
445 |
|
446 |
#pragma omp parallel for private(i,iptr,j,rsum) schedule(static) |
447 |
for (i=0;i<n;++i) { |
448 |
rsum=0; |
449 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
450 |
j=A->pattern->index[iptr]; |
451 |
if(j!=i) { |
452 |
rsum+=ABS(A->val[iptr]); |
453 |
} |
454 |
else { |
455 |
diag=ABS(A->val[iptr]); |
456 |
} |
457 |
} |
458 |
theta[i]=diag/rsum; |
459 |
if(theta[i]>threshold) { |
460 |
mis_marker[i]=IS_IN_F; |
461 |
} |
462 |
} |
463 |
|
464 |
while (Paso_Util_isAny(n,mis_marker,IS_AVAILABLE)) { |
465 |
k=0; |
466 |
|
467 |
for (i=0;i<n;++i) { |
468 |
if(mis_marker[i]==IS_AVAILABLE) { |
469 |
if(k==0) { |
470 |
j=i; |
471 |
k++; |
472 |
} |
473 |
if(theta[j]>theta[i]) { |
474 |
j=i; |
475 |
} |
476 |
} |
477 |
} |
478 |
mis_marker[j]=IS_IN_C; |
479 |
|
480 |
for (iptr=A->pattern->ptr[j];iptr<A->pattern->ptr[j+1]; ++iptr) { |
481 |
k=A->pattern->index[iptr]; |
482 |
if(mis_marker[k]==IS_AVAILABLE) { |
483 |
AvADJ[k]=1; |
484 |
} |
485 |
else { |
486 |
AvADJ[k]=-1; |
487 |
} |
488 |
|
489 |
} |
490 |
|
491 |
for (i=0;i<n;++i) { |
492 |
if(AvADJ[i]) { |
493 |
rsum=0; |
494 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
495 |
k=A->pattern->index[iptr]; |
496 |
if(k!=i && mis_marker[k]!=IS_IN_C ) { |
497 |
rsum+=ABS(A->val[iptr]); |
498 |
} |
499 |
if(j==i) { |
500 |
diag=ABS(A->val[iptr]); |
501 |
} |
502 |
} |
503 |
theta[i]=diag/rsum; |
504 |
if(theta[i]>threshold) { |
505 |
mis_marker[i]=IS_IN_F; |
506 |
} |
507 |
} |
508 |
} |
509 |
|
510 |
|
511 |
} |
512 |
|
513 |
/* swap to TRUE/FALSE in mis_marker */ |
514 |
#pragma omp parallel for private(i) schedule(static) |
515 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
516 |
|
517 |
MEMFREE(AvADJ); |
518 |
MEMFREE(theta); |
519 |
} |
520 |
|
521 |
|
522 |
void Paso_Pattern_YS_plus(Paso_SparseMatrix* A, index_t* mis_marker, double alpha, double taw, double delta) { |
523 |
|
524 |
dim_t i,j; |
525 |
/*double sum;*/ |
526 |
index_t iptr,*index,*where_p,*diagptr; |
527 |
double *rsum; |
528 |
double sum; |
529 |
dim_t n=A->numRows; |
530 |
Paso_SparseMatrix* A_alpha; |
531 |
diagptr=MEMALLOC(n,index_t); |
532 |
rsum=MEMALLOC(n,double); |
533 |
|
534 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
535 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_coup: symmetric matrix pattern is not supported yet"); |
536 |
return; |
537 |
} |
538 |
|
539 |
A_alpha=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1, A->pattern,1,1, FALSE); |
540 |
#pragma omp parallel for private(i) schedule(static) |
541 |
for (i=0;i<A->len;++i) { |
542 |
A_alpha->val[i]=A->val[i]; |
543 |
} |
544 |
|
545 |
|
546 |
#pragma omp parallel for private(i) schedule(static) |
547 |
for (i=0;i<n;++i) |
548 |
if(mis_marker[i]==IS_AVAILABLE) |
549 |
mis_marker[i]=IS_IN_C; |
550 |
|
551 |
/*#pragma omp parallel for private(i,index,where_p) schedule(static)*/ |
552 |
for (i=0;i<n;++i) { |
553 |
diagptr[i]=A->pattern->ptr[i]; |
554 |
index=&(A->pattern->index[A->pattern->ptr[i]]); |
555 |
where_p=(index_t*)bsearch(&i, |
556 |
index, |
557 |
A->pattern->ptr[i + 1]-A->pattern->ptr[i], |
558 |
sizeof(index_t), |
559 |
Paso_comparIndex); |
560 |
if (where_p==NULL) { |
561 |
Paso_setError(VALUE_ERROR, "Paso_Pattern_coup: main diagonal element missing."); |
562 |
} else { |
563 |
diagptr[i]+=(index_t)(where_p-index); |
564 |
} |
565 |
} |
566 |
|
567 |
|
568 |
j=0; |
569 |
for (i=0;i<n;++i) { |
570 |
for (iptr=A_alpha->pattern->ptr[i];iptr<A_alpha->pattern->ptr[i+1]; ++iptr) { |
571 |
j=A_alpha->pattern->index[iptr]; |
572 |
if(i==j) { |
573 |
A_alpha->val[iptr]=0; |
574 |
} |
575 |
else { |
576 |
if( !(ABS(A_alpha->val[iptr])<alpha*MIN(ABS(A_alpha->val[diagptr[i]]),ABS(A_alpha->val[diagptr[j]])) || A_alpha->val[iptr]*A_alpha->val[diagptr[i]]>0 || A_alpha->val[iptr]*A_alpha->val[diagptr[i]]<0) ) { |
577 |
A_alpha->val[iptr]=0; |
578 |
} |
579 |
} |
580 |
|
581 |
} |
582 |
} |
583 |
|
584 |
|
585 |
for (i=0;i<n;++i) { |
586 |
rsum[i]=0; |
587 |
for (iptr=A_alpha->pattern->ptr[i];iptr<A_alpha->pattern->ptr[i+1]; ++iptr) { |
588 |
rsum[i]+=A_alpha->val[iptr]; |
589 |
} |
590 |
} |
591 |
|
592 |
#pragma omp parallel for private(i,j) schedule(static) |
593 |
for (i=0;i<n;++i) { |
594 |
for (iptr=A_alpha->pattern->ptr[i];iptr<A_alpha->pattern->ptr[i+1]; ++iptr) { |
595 |
j=A_alpha->pattern->index[iptr]; |
596 |
if (i==j) { |
597 |
A_alpha->val[iptr]=A->val[iptr]-A_alpha->val[iptr]+rsum[i]; |
598 |
} |
599 |
else { |
600 |
A_alpha->val[iptr]=A->val[iptr]-A_alpha->val[iptr]; |
601 |
} |
602 |
|
603 |
} |
604 |
} |
605 |
|
606 |
/*This loop cannot be parallelized, as order matters here.*/ |
607 |
for (i=0;i<n;++i) { |
608 |
if (mis_marker[i]==IS_IN_C) { |
609 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
610 |
j=A->pattern->index[iptr]; |
611 |
if (j!=i && ABS(A->val[iptr])>=alpha*MIN(ABS(A->val[diagptr[i]]),ABS(A->val[diagptr[j]]))) { |
612 |
mis_marker[j]=IS_IN_F; |
613 |
} |
614 |
} |
615 |
} |
616 |
} |
617 |
|
618 |
|
619 |
/*This loop cannot be parallelized, as order matters here.*/ |
620 |
for (i=0;i<n;i++) { |
621 |
if (mis_marker[i]==IS_IN_F) { |
622 |
sum=0; |
623 |
for (iptr=A_alpha->pattern->ptr[i];iptr<A_alpha->pattern->ptr[i+1]; ++iptr) { |
624 |
j=A_alpha->pattern->index[iptr]; |
625 |
if (mis_marker[j]==IS_IN_C) { |
626 |
sum+=A_alpha->val[iptr]; |
627 |
} |
628 |
} |
629 |
if (ABS(sum)>=taw*ABS(A->val[diagptr[i]])) { |
630 |
mis_marker[j]=IS_IN_FA; |
631 |
} |
632 |
} |
633 |
} |
634 |
|
635 |
/*This loop cannot be parallelized, as order matters here.*/ |
636 |
for (i=0;i<n;i++) { |
637 |
if (mis_marker[i]!=IS_IN_C || mis_marker[i]!=IS_IN_FA) { |
638 |
sum=0; |
639 |
for (iptr=A_alpha->pattern->ptr[i];iptr<A_alpha->pattern->ptr[i+1]; ++iptr) { |
640 |
j=A_alpha->pattern->index[iptr]; |
641 |
if (mis_marker[j]==IS_IN_C || mis_marker[j]==IS_IN_FA) { |
642 |
sum+=A_alpha->val[iptr]; |
643 |
} |
644 |
} |
645 |
if (ABS(sum)>=delta*ABS(A->val[diagptr[i]])) { |
646 |
mis_marker[j]=IS_IN_FB; |
647 |
} |
648 |
else { |
649 |
mis_marker[j]=IS_IN_C; |
650 |
} |
651 |
|
652 |
} |
653 |
} |
654 |
|
655 |
|
656 |
/* swap to TRUE/FALSE in mis_marker */ |
657 |
#pragma omp parallel for private(i) schedule(static) |
658 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_FA || mis_marker[i]==IS_IN_FB); |
659 |
|
660 |
MEMFREE(diagptr); |
661 |
MEMFREE(rsum); |
662 |
Paso_SparseMatrix_free(A_alpha); |
663 |
} |
664 |
|
665 |
|
666 |
void Paso_Pattern_Standard(Paso_SparseMatrix* A, index_t* mis_marker, double theta) |
667 |
{ |
668 |
dim_t i,n,j,k; |
669 |
index_t iptr,jptr; |
670 |
/*index_t *index,*where_p;*/ |
671 |
double threshold,max_offdiagonal; |
672 |
dim_t *lambda; /*mesure of importance */ |
673 |
dim_t maxlambda=0; |
674 |
index_t index_maxlambda=0; |
675 |
double time0=0; |
676 |
bool_t verbose=0; |
677 |
|
678 |
Paso_Pattern *S=NULL; |
679 |
Paso_Pattern *ST=NULL; |
680 |
Paso_IndexList* index_list=NULL; |
681 |
|
682 |
/*dim_t lk;*/ |
683 |
|
684 |
index_list=TMPMEMALLOC(A->pattern->numOutput,Paso_IndexList); |
685 |
if (! Paso_checkPtr(index_list)) { |
686 |
#pragma omp parallel for private(i) schedule(static) |
687 |
for(i=0;i<A->pattern->numOutput;++i) { |
688 |
index_list[i].extension=NULL; |
689 |
index_list[i].n=0; |
690 |
} |
691 |
} |
692 |
|
693 |
|
694 |
n=A->numRows; |
695 |
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
696 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_RS: symmetric matrix pattern is not supported yet"); |
697 |
return; |
698 |
} |
699 |
|
700 |
time0=Paso_timer(); |
701 |
k=0; |
702 |
/*S_i={j \in N_i; i strongly coupled to j}*/ |
703 |
#pragma omp parallel for private(i,iptr,max_offdiagonal,threshold,j) schedule(static) |
704 |
for (i=0;i<n;++i) { |
705 |
if(mis_marker[i]==IS_AVAILABLE) { |
706 |
max_offdiagonal = DBL_MIN; |
707 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
708 |
j=A->pattern->index[iptr]; |
709 |
if( j != i){ |
710 |
max_offdiagonal = MAX(max_offdiagonal,ABS(A->val[iptr])); |
711 |
} |
712 |
} |
713 |
threshold = theta*max_offdiagonal; |
714 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
715 |
j=A->pattern->index[iptr]; |
716 |
if(ABS(A->val[iptr])>=threshold && i!=j) { |
717 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
718 |
} |
719 |
} |
720 |
} |
721 |
} |
722 |
|
723 |
S=Paso_IndexList_createPattern(0, A->pattern->numOutput,index_list,0,A->pattern->numInput,0); |
724 |
ST=Paso_Pattern_getTranspose(S); |
725 |
|
726 |
time0=Paso_timer()-time0; |
727 |
if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0); |
728 |
|
729 |
lambda=TMPMEMALLOC(n,dim_t); |
730 |
|
731 |
#pragma omp parallel for private(i) schedule(static) |
732 |
for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; } |
733 |
|
734 |
/*S_i={j \in N_i; i strongly coupled to j}*/ |
735 |
|
736 |
/* |
737 |
#pragma omp parallel for private(i,iptr,lk) schedule(static) |
738 |
for (i=0;i<n;++i) { |
739 |
lk=0; |
740 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
741 |
if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i ) |
742 |
lk++; |
743 |
} |
744 |
#pragma omp critical |
745 |
k+=lk; |
746 |
if(k==0) { |
747 |
mis_marker[i]=IS_IN_F; |
748 |
} |
749 |
} |
750 |
*/ |
751 |
|
752 |
|
753 |
k=0; |
754 |
maxlambda=0; |
755 |
|
756 |
time0=Paso_timer(); |
757 |
|
758 |
for (i=0;i<n;++i) { |
759 |
if(mis_marker[i]==IS_AVAILABLE) { |
760 |
lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/ |
761 |
/*lambda[i]=how_many(i,S,TRUE);*/ |
762 |
/*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/ |
763 |
k++; |
764 |
if(maxlambda<lambda[i]) { |
765 |
maxlambda=lambda[i]; |
766 |
index_maxlambda=i; |
767 |
} |
768 |
} |
769 |
} |
770 |
|
771 |
k=0; |
772 |
time0=Paso_timer()-time0; |
773 |
if (verbose) fprintf(stdout,"timing: Lambdas computations at the begining: %e\n",time0); |
774 |
|
775 |
time0=Paso_timer(); |
776 |
|
777 |
/*Paso_Pattern_getReport(n,mis_marker);*/ |
778 |
|
779 |
while (Paso_Util_isAny(n,mis_marker,IS_AVAILABLE)) { |
780 |
|
781 |
if(index_maxlambda<0) { |
782 |
break; |
783 |
} |
784 |
|
785 |
i=index_maxlambda; |
786 |
if(mis_marker[i]==IS_AVAILABLE) { |
787 |
mis_marker[index_maxlambda]=IS_IN_C; |
788 |
lambda[index_maxlambda]=IS_NOT_AVAILABLE; |
789 |
for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) { |
790 |
j=ST->index[iptr]; |
791 |
if(mis_marker[j]==IS_AVAILABLE) { |
792 |
mis_marker[j]=IS_IN_F; |
793 |
lambda[j]=IS_NOT_AVAILABLE; |
794 |
for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) { |
795 |
k=S->index[jptr]; |
796 |
if(mis_marker[k]==IS_AVAILABLE) { |
797 |
lambda[k]++; |
798 |
} |
799 |
} |
800 |
} |
801 |
} |
802 |
for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) { |
803 |
j=S->index[iptr]; |
804 |
if(mis_marker[j]==IS_AVAILABLE) { |
805 |
lambda[j]--; |
806 |
} |
807 |
} |
808 |
|
809 |
} |
810 |
|
811 |
/* Used when transpose of S is not available */ |
812 |
/* |
813 |
for (i=0;i<n;++i) { |
814 |
if(mis_marker[i]==IS_AVAILABLE) { |
815 |
if (i==index_maxlambda) { |
816 |
mis_marker[index_maxlambda]=IS_IN_C; |
817 |
lambda[index_maxlambda]=-1; |
818 |
for (j=0;j<n;++j) { |
819 |
if(mis_marker[j]==IS_AVAILABLE) { |
820 |
index=&(S->index[S->ptr[j]]); |
821 |
where_p=(index_t*)bsearch(&i, |
822 |
index, |
823 |
S->ptr[j + 1]-S->ptr[j], |
824 |
sizeof(index_t), |
825 |
Paso_comparIndex); |
826 |
if (where_p!=NULL) { |
827 |
mis_marker[j]=IS_IN_F; |
828 |
lambda[j]=-1; |
829 |
for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) { |
830 |
k=S->index[iptr]; |
831 |
if(mis_marker[k]==IS_AVAILABLE) { |
832 |
lambda[k]++; |
833 |
} |
834 |
} |
835 |
} |
836 |
|
837 |
} |
838 |
} |
839 |
|
840 |
} |
841 |
} |
842 |
} |
843 |
*/ |
844 |
index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE); |
845 |
} |
846 |
|
847 |
time0=Paso_timer()-time0; |
848 |
if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0); |
849 |
|
850 |
/*Paso_Pattern_getReport(n,mis_marker);*/ |
851 |
|
852 |
#pragma omp parallel for private(i) schedule(static) |
853 |
for (i=0;i<n;++i) |
854 |
if(mis_marker[i]==IS_AVAILABLE) { |
855 |
mis_marker[i]=IS_IN_F; |
856 |
} |
857 |
|
858 |
/*Paso_Pattern_getReport(n,mis_marker);*/ |
859 |
|
860 |
TMPMEMFREE(lambda); |
861 |
|
862 |
/* clean up */ |
863 |
if (index_list!=NULL) { |
864 |
#pragma omp parallel for private(i) schedule(static) |
865 |
for(i=0;i<A->pattern->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
866 |
} |
867 |
|
868 |
TMPMEMFREE(index_list); |
869 |
Paso_Pattern_free(S); |
870 |
|
871 |
/* swap to TRUE/FALSE in mis_marker */ |
872 |
#pragma omp parallel for private(i) schedule(static) |
873 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
874 |
|
875 |
} |
876 |
|
877 |
void Paso_Pattern_getReport(dim_t n,index_t* mis_marker) { |
878 |
|
879 |
dim_t i,av,nav,f,c; |
880 |
|
881 |
av=0; |
882 |
nav=0; |
883 |
f=0; |
884 |
c=0; |
885 |
|
886 |
for (i=0;i<n;i++) { |
887 |
if(mis_marker[i]==IS_NOT_AVAILABLE) { |
888 |
nav++; |
889 |
} |
890 |
else if (mis_marker[i]==IS_AVAILABLE) { |
891 |
av++; |
892 |
} |
893 |
else if (mis_marker[i]==IS_IN_F) { |
894 |
f++; |
895 |
} |
896 |
else if (mis_marker[i]==IS_IN_C) { |
897 |
c++; |
898 |
} |
899 |
} |
900 |
|
901 |
printf("Available=%d, Not Available = %d, In F = %d, In C = %d \n",av,nav,f,c); |
902 |
|
903 |
} |
904 |
|
905 |
|
906 |
/*Used in Paso_Pattern_RS_MI*/ |
907 |
|
908 |
/*Computes how many connections unknown i have in S. |
909 |
bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise. |
910 |
Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE. |
911 |
*/ |
912 |
dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose) { |
913 |
dim_t j,n; |
914 |
dim_t total,ltotal; |
915 |
index_t *index,*where_p; |
916 |
|
917 |
/*index_t iptr;*/ |
918 |
total=0; |
919 |
ltotal=0; |
920 |
|
921 |
n=S->numOutput; |
922 |
|
923 |
if(transpose) { |
924 |
#pragma omp parallel |
925 |
{ |
926 |
ltotal=0; |
927 |
#pragma omp for private(j,index,where_p,ltotal) schedule(static) |
928 |
for (j=0;j<n;++j) { |
929 |
index=&(S->index[S->ptr[j]]); |
930 |
where_p=(index_t*)bsearch(&i, |
931 |
index, |
932 |
S->ptr[j + 1]-S->ptr[j], |
933 |
sizeof(index_t), |
934 |
Paso_comparIndex); |
935 |
if (where_p!=NULL) { |
936 |
ltotal++; |
937 |
} |
938 |
} |
939 |
} |
940 |
#pragma omp critical |
941 |
{ |
942 |
total+=ltotal; |
943 |
} |
944 |
|
945 |
} |
946 |
else { |
947 |
total=S->ptr[i+1]-S->ptr[i]; |
948 |
/*#pragma omp parallel for private(iptr) schedule(static)*/ |
949 |
/*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) { |
950 |
if(S->index[iptr]!=i && mis_marker[S->index[iptr]]==IS_AVAILABLE) |
951 |
total++; |
952 |
}*/ |
953 |
|
954 |
} |
955 |
|
956 |
if (total==0) total=IS_NOT_AVAILABLE; |
957 |
|
958 |
return total; |
959 |
} |
960 |
|
961 |
dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask) { |
962 |
dim_t i; |
963 |
dim_t max=-1; |
964 |
dim_t argmax=-1; |
965 |
|
966 |
#ifdef _OPENMP |
967 |
dim_t lmax=-1; |
968 |
dim_t li=-1; |
969 |
#pragma omp parallel private(i,lmax,li) |
970 |
{ |
971 |
lmax=-1; |
972 |
li=-1; |
973 |
#pragma omp for schedule(static) |
974 |
for (i=0;i<n;++i) { |
975 |
if(lmax<lambda[i] && lambda[i]!=mask){ |
976 |
lmax=lambda[i]; |
977 |
li=i; |
978 |
} |
979 |
} |
980 |
#pragma omp critical |
981 |
{ |
982 |
if (max<=lmax) { |
983 |
if(max==lmax && argmax>li) |
984 |
{ |
985 |
argmax=li; |
986 |
} |
987 |
if (max < lmax) |
988 |
{ |
989 |
max=lmax; |
990 |
argmax=li; |
991 |
} |
992 |
} |
993 |
} |
994 |
} |
995 |
#else |
996 |
for (i=0;i<n;++i) { |
997 |
if(max<lambda[i] && lambda[i]!=mask){ |
998 |
max=lambda[i]; |
999 |
argmax=i; |
1000 |
} |
1001 |
} |
1002 |
#endif |
1003 |
|
1004 |
return argmax; |
1005 |
} |
1006 |
|
1007 |
|
1008 |
Paso_Pattern* Paso_Pattern_getTranspose(Paso_Pattern* P){ |
1009 |
|
1010 |
Paso_Pattern *outpattern=NULL; |
1011 |
|
1012 |
Paso_IndexList* index_list=NULL; |
1013 |
dim_t C=P->numInput; |
1014 |
dim_t F=P->numOutput-C; |
1015 |
dim_t n=C+F; |
1016 |
dim_t i,j; |
1017 |
index_t iptr; |
1018 |
|
1019 |
index_list=TMPMEMALLOC(C,Paso_IndexList); |
1020 |
if (! Paso_checkPtr(index_list)) { |
1021 |
#pragma omp parallel for private(i) schedule(static) |
1022 |
for(i=0;i<C;++i) { |
1023 |
index_list[i].extension=NULL; |
1024 |
index_list[i].n=0; |
1025 |
} |
1026 |
} |
1027 |
|
1028 |
|
1029 |
/*#pragma omp parallel for private(i,iptr,j) schedule(static)*/ |
1030 |
for (i=0;i<n;++i) { |
1031 |
for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) { |
1032 |
j=P->index[iptr]; |
1033 |
Paso_IndexList_insertIndex(&(index_list[j]),i); |
1034 |
} |
1035 |
} |
1036 |
|
1037 |
outpattern=Paso_IndexList_createPattern(0, C,index_list,0,n,0); |
1038 |
|
1039 |
/* clean up */ |
1040 |
if (index_list!=NULL) { |
1041 |
#pragma omp parallel for private(i) schedule(static) |
1042 |
for(i=0;i<C;++i) Paso_IndexList_free(index_list[i].extension); |
1043 |
} |
1044 |
TMPMEMFREE(index_list); |
1045 |
|
1046 |
return outpattern; |
1047 |
} |
1048 |
|
1049 |
|
1050 |
#undef IS_AVAILABLE |
1051 |
#undef IS_NOT_AVAILABLE |
1052 |
#undef IS_IN_F |
1053 |
#undef IS_IN_C |
1054 |
|
1055 |
#undef IS_UNDECIDED |
1056 |
#undef IS_STRONG |
1057 |
#undef IS_WEAK |
1058 |
|
1059 |
#undef IS_IN_FB |
1060 |
#undef IS_IN_FB |
1061 |
|
1062 |
|
1063 |
|
1064 |
|
1065 |
|
1066 |
|
1067 |
|
1068 |
|
1069 |
|
1070 |
|