36 |
/***************************************************************/ |
/***************************************************************/ |
37 |
|
|
38 |
#define IS_AVAILABLE -1 |
#define IS_AVAILABLE -1 |
39 |
#define IS_IN_SET -3 /* Week connection */ |
#define IS_IN_F -3 /* in F (strong) */ |
40 |
#define IS_REMOVED -4 /* strong */ |
#define IS_IN_C -4 /* in C (weak) */ |
41 |
|
|
42 |
void Paso_Pattern_YS(Paso_SparseMatrix* A, index_t* mis_marker, double threshold) { |
void Paso_Pattern_YS(Paso_SparseMatrix* A, index_t* mis_marker, double threshold) { |
43 |
|
|
56 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
57 |
for (i=0;i<n;++i) |
for (i=0;i<n;++i) |
58 |
if(mis_marker[i]==IS_AVAILABLE) |
if(mis_marker[i]==IS_AVAILABLE) |
59 |
mis_marker[i]=IS_REMOVED; |
mis_marker[i]=IS_IN_C; |
60 |
|
|
61 |
#pragma omp parallel for private(i,index,where_p) schedule(static) |
/*#pragma omp parallel for private(i,index,where_p) schedule(static)*/ |
62 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
63 |
diagptr[i]=A->pattern->ptr[i]; |
diagptr[i]=A->pattern->ptr[i]; |
64 |
index=&(A->pattern->index[A->pattern->ptr[i]]); |
index=&(A->pattern->index[A->pattern->ptr[i]]); |
78 |
|
|
79 |
/*This loop cannot be parallelized, as order matters here.*/ |
/*This loop cannot be parallelized, as order matters here.*/ |
80 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
81 |
if (mis_marker[i]==IS_REMOVED) { |
if (mis_marker[i]==IS_IN_C) { |
82 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
83 |
j=A->pattern->index[iptr]; |
j=A->pattern->index[iptr]; |
84 |
if (j!=i && ABS(A->val[iptr])>=threshold*ABS(A->val[diagptr[i]])) { |
if (j!=i && ABS(A->val[iptr])>=threshold*ABS(A->val[diagptr[i]])) { |
85 |
mis_marker[j]=IS_IN_SET; |
mis_marker[j]=IS_IN_F; |
86 |
} |
} |
87 |
} |
} |
88 |
} |
} |
92 |
|
|
93 |
/*This loop cannot be parallelized, as order matters here.*/ |
/*This loop cannot be parallelized, as order matters here.*/ |
94 |
for (i=0;i<n;i++) { |
for (i=0;i<n;i++) { |
95 |
if (mis_marker[i]==IS_IN_SET) { |
if (mis_marker[i]==IS_IN_F) { |
96 |
passed=TRUE; |
passed=TRUE; |
97 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
98 |
j=A->pattern->index[iptr]; |
j=A->pattern->index[iptr]; |
99 |
if (mis_marker[j]==IS_REMOVED) { |
if (mis_marker[j]==IS_IN_C) { |
100 |
if ((A->val[iptr]/A->val[diagptr[i]])>=-threshold) { |
if ((A->val[iptr]/A->val[diagptr[i]])>=-threshold) { |
101 |
passed=TRUE; |
passed=TRUE; |
102 |
} |
} |
106 |
} |
} |
107 |
} |
} |
108 |
} |
} |
109 |
if (passed) mis_marker[i]=IS_REMOVED; |
if (passed) mis_marker[i]=IS_IN_C; |
110 |
} |
} |
111 |
} |
} |
|
/* This check is to make sure we dont get some nusty rows which were not removed durring coarsening process.*/ |
|
|
/* TODO: we have to mechanism that this does not happend at all, and get rid of this 'If'. */ |
|
|
/*#pragma omp parallel for private(i,iptr,j,sum) schedule(static) |
|
|
for (i=0;i<n;i++) { |
|
|
if (mis_marker[i]==IS_REMOVED) { |
|
|
sum=0; |
|
|
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
|
|
j=A->pattern->index[iptr]; |
|
|
if (mis_marker[j]==IS_REMOVED) |
|
|
sum+=A->val[iptr]; |
|
|
} |
|
|
if(ABS(sum)<1.e-25) |
|
|
mis_marker[i]=IS_IN_SET; |
|
|
} |
|
|
} |
|
|
*/ |
|
112 |
|
|
113 |
/* swap to TRUE/FALSE in mis_marker */ |
/* swap to TRUE/FALSE in mis_marker */ |
114 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
115 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]!=IS_IN_SET); |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
116 |
|
|
117 |
MEMFREE(diagptr); |
MEMFREE(diagptr); |
118 |
} |
} |
119 |
|
|
120 |
|
|
121 |
/* |
/* |
122 |
* Ruge-Stueben strength of connection mask. |
* Ruge-Stueben strength of connection mask. |
123 |
* |
* |
270 |
return; |
return; |
271 |
} |
} |
272 |
|
|
|
/* We do not need this loop if we set IS_REMOVED=IS_AVAILABLE. */ |
|
273 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
274 |
for (i=0;i<n;++i) |
for (i=0;i<n;++i) |
275 |
if(mis_marker[i]==IS_AVAILABLE) |
if(mis_marker[i]==IS_AVAILABLE) |
276 |
mis_marker[i]=IS_IN_SET; |
mis_marker[i]=IS_IN_C; |
277 |
|
|
278 |
|
|
279 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
280 |
if (mis_marker[i]==IS_IN_SET) { |
if (mis_marker[i]==IS_IN_C) { |
281 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
282 |
j=pattern->index[iptr]; |
j=pattern->index[iptr]; |
283 |
mis_marker[j]=IS_REMOVED; |
mis_marker[j]=IS_IN_F; |
284 |
} |
} |
285 |
} |
} |
286 |
} |
} |
288 |
|
|
289 |
|
|
290 |
for (i=0;i<n;i++) { |
for (i=0;i<n;i++) { |
291 |
if (mis_marker[i]==IS_REMOVED) { |
if (mis_marker[i]==IS_IN_F) { |
292 |
passed=TRUE; |
passed=TRUE; |
293 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
294 |
j=pattern->index[iptr]; |
j=pattern->index[iptr]; |
295 |
if (mis_marker[j]==IS_REMOVED) { |
if (mis_marker[j]==IS_IN_F) { |
296 |
passed=TRUE; |
passed=TRUE; |
297 |
} |
} |
298 |
else { |
else { |
300 |
break; |
break; |
301 |
} |
} |
302 |
} |
} |
303 |
if (passed) mis_marker[i]=IS_IN_SET; |
if (passed) mis_marker[i]=IS_IN_C; |
304 |
} |
} |
305 |
} |
} |
306 |
|
|
307 |
/* swap to TRUE/FALSE in mis_marker */ |
/* swap to TRUE/FALSE in mis_marker */ |
308 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
309 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]!=IS_REMOVED); |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
310 |
|
|
311 |
} |
} |
312 |
|
|
336 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
337 |
for (i=0;i<n;++i) |
for (i=0;i<n;++i) |
338 |
if(mis_marker[i]==IS_AVAILABLE) |
if(mis_marker[i]==IS_AVAILABLE) |
339 |
mis_marker[i]=IS_IN_SET; |
mis_marker[i]=IS_IN_F; |
340 |
|
|
341 |
#pragma omp barrier |
#pragma omp barrier |
342 |
for (color=0;color<num_colors;++color) { |
for (color=0;color<num_colors;++color) { |
343 |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
344 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
345 |
if (colorOf[i]==color) { |
if (colorOf[i]==color) { |
346 |
if (mis_marker[i]==IS_IN_SET) { |
if (mis_marker[i]==IS_IN_F) { |
347 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
348 |
j=pattern->index[iptr]; |
j=pattern->index[iptr]; |
349 |
if (colorOf[j]<color) |
if (colorOf[j]<color) |
350 |
mis_marker[j]=IS_REMOVED; |
mis_marker[j]=IS_IN_C; |
351 |
} |
} |
352 |
} |
} |
353 |
} |
} |
360 |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
#pragma omp parallel for schedule(static) private(i,iptr,j) |
361 |
for (i=0;i<n;i++) { |
for (i=0;i<n;i++) { |
362 |
if (colorOf[i]==color) { |
if (colorOf[i]==color) { |
363 |
if (mis_marker[i]==IS_REMOVED) { |
if (mis_marker[i]==IS_IN_C) { |
364 |
passed=TRUE; |
passed=TRUE; |
365 |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
for (iptr=pattern->ptr[i];iptr<pattern->ptr[i+1]; ++iptr) { |
366 |
j=pattern->index[iptr]; |
j=pattern->index[iptr]; |
367 |
if (colorOf[j]<color && passed) { |
if (colorOf[j]<color && passed) { |
368 |
if (mis_marker[j]==IS_REMOVED) { |
if (mis_marker[j]==IS_IN_C) { |
369 |
passed=TRUE; |
passed=TRUE; |
370 |
} |
} |
371 |
else { |
else { |
374 |
} |
} |
375 |
} |
} |
376 |
} |
} |
377 |
if (passed) mis_marker[i]=IS_IN_SET; |
if (passed) mis_marker[i]=IS_IN_F; |
378 |
} |
} |
379 |
|
|
380 |
} |
} |
383 |
|
|
384 |
/* swap to TRUE/FALSE in mis_marker */ |
/* swap to TRUE/FALSE in mis_marker */ |
385 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
386 |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]!=IS_IN_SET); |
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
387 |
|
|
388 |
MEMFREE(colorOf); |
MEMFREE(colorOf); |
389 |
} |
} |
390 |
|
|
391 |
|
/*For testing */ |
392 |
|
void Paso_Pattern_greedy_diag(Paso_SparseMatrix* A, index_t* mis_marker, double threshold) { |
393 |
|
|
394 |
|
dim_t i,j=0,k; |
395 |
|
double *theta; |
396 |
|
index_t iptr; |
397 |
|
dim_t n=A->numRows; |
398 |
|
double rsum,diag=0; |
399 |
|
index_t *AvADJ; |
400 |
|
theta=MEMALLOC(n,double); |
401 |
|
AvADJ=MEMALLOC(n,index_t); |
402 |
|
|
403 |
|
|
404 |
|
|
405 |
|
|
406 |
|
if (A->pattern->type & PATTERN_FORMAT_SYM) { |
407 |
|
Paso_setError(TYPE_ERROR,"Paso_Pattern_coup: symmetric matrix pattern is not supported yet"); |
408 |
|
return; |
409 |
|
} |
410 |
|
|
411 |
|
|
412 |
|
#pragma omp parallel for private(i,iptr,j,rsum) schedule(static) |
413 |
|
for (i=0;i<n;++i) { |
414 |
|
rsum=0; |
415 |
|
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
416 |
|
j=A->pattern->index[iptr]; |
417 |
|
if(j!=i) { |
418 |
|
rsum+=ABS(A->val[iptr]); |
419 |
|
} |
420 |
|
else { |
421 |
|
diag=ABS(A->val[iptr]); |
422 |
|
} |
423 |
|
} |
424 |
|
theta[i]=diag/rsum; |
425 |
|
if(theta[i]>threshold) { |
426 |
|
mis_marker[i]=IS_IN_F; |
427 |
|
} |
428 |
|
} |
429 |
|
|
430 |
|
while (Paso_Util_isAny(n,mis_marker,IS_AVAILABLE)) { |
431 |
|
k=0; |
432 |
|
#pragma omp parallel for private(i,j,k) schedule(static) |
433 |
|
for (i=0;i<n;++i) { |
434 |
|
if(mis_marker[i]==IS_AVAILABLE) { |
435 |
|
if(k==0) { |
436 |
|
j=i; |
437 |
|
k++; |
438 |
|
} |
439 |
|
if(theta[j]>theta[i]) { |
440 |
|
j=i; |
441 |
|
} |
442 |
|
} |
443 |
|
} |
444 |
|
mis_marker[j]=IS_IN_C; |
445 |
|
|
446 |
|
for (iptr=A->pattern->ptr[j];iptr<A->pattern->ptr[j+1]; ++iptr) { |
447 |
|
k=A->pattern->index[iptr]; |
448 |
|
if(mis_marker[k]==IS_AVAILABLE) { |
449 |
|
AvADJ[k]=1; |
450 |
|
} |
451 |
|
else { |
452 |
|
AvADJ[k]=-1; |
453 |
|
} |
454 |
|
|
455 |
|
} |
456 |
|
|
457 |
|
#pragma omp parallel for private(i,iptr,j,rsum) schedule(static) |
458 |
|
for (i=0;i<n;++i) { |
459 |
|
if(AvADJ[i]) { |
460 |
|
rsum=0; |
461 |
|
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
462 |
|
k=A->pattern->index[iptr]; |
463 |
|
if(k!=i && mis_marker[k]!=IS_IN_C ) { |
464 |
|
rsum+=ABS(A->val[iptr]); |
465 |
|
} |
466 |
|
if(j==i) { |
467 |
|
diag=ABS(A->val[iptr]); |
468 |
|
} |
469 |
|
} |
470 |
|
theta[i]=diag/rsum; |
471 |
|
if(theta[i]>threshold) { |
472 |
|
mis_marker[i]=IS_IN_F; |
473 |
|
} |
474 |
|
} |
475 |
|
} |
476 |
|
|
477 |
|
|
478 |
|
} |
479 |
|
|
480 |
|
/* swap to TRUE/FALSE in mis_marker */ |
481 |
|
#pragma omp parallel for private(i) schedule(static) |
482 |
|
for (i=0;i<n;i++) mis_marker[i]=(mis_marker[i]==IS_IN_F); |
483 |
|
|
484 |
|
MEMFREE(AvADJ); |
485 |
|
MEMFREE(theta); |
486 |
|
} |
487 |
|
|
488 |
#undef IS_AVAILABLE |
#undef IS_AVAILABLE |
489 |
#undef IS_IN_SET |
#undef IS_IN_F |
490 |
#undef IS_REMOVED |
#undef IS_IN_C |
491 |
|
|
492 |
|
|
493 |
|
|