1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: SparseMatrix_AMGcomponents */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Artak Amirbekyan, artak@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "SparseMatrix.h" |
27 |
#include "PasoUtil.h" |
28 |
|
29 |
/************************************************************** |
30 |
|
31 |
Methods nessecary for AMG preconditioner |
32 |
|
33 |
*/ |
34 |
|
35 |
/* Prolongation matrix is constracted by W_FC as P<- [W_FC] |
36 |
[I_CC] |
37 |
*/ |
38 |
|
39 |
Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* mis_marker){ |
40 |
|
41 |
Paso_Pattern *outpattern=NULL; |
42 |
Paso_SparseMatrix *out=NULL; |
43 |
index_t iptr,wptr; |
44 |
|
45 |
Paso_IndexList* index_list=NULL; |
46 |
dim_t n=W->numRows+W->numCols; |
47 |
dim_t i,j,k=0; |
48 |
dim_t block_size=W->row_block_size; |
49 |
|
50 |
index_list=TMPMEMALLOC(n,Paso_IndexList); |
51 |
if (! Paso_checkPtr(index_list)) { |
52 |
#pragma omp parallel for private(i) schedule(static) |
53 |
for(i=0;i<n;++i) { |
54 |
index_list[i].extension=NULL; |
55 |
index_list[i].n=0; |
56 |
} |
57 |
} |
58 |
|
59 |
|
60 |
for (i=0;i<n;++i) { |
61 |
if (mis_marker[i]) { |
62 |
for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) { |
63 |
j=W->pattern->index[iptr]; |
64 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
65 |
} |
66 |
k++; |
67 |
} |
68 |
else { |
69 |
Paso_IndexList_insertIndex(&(index_list[i]),i-k); |
70 |
} |
71 |
} |
72 |
|
73 |
outpattern=Paso_IndexList_createPattern(0, n,index_list,0,W->numCols,0); |
74 |
out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE); |
75 |
|
76 |
k=0; |
77 |
|
78 |
if (block_size==1) { |
79 |
for (i=0;i<n;++i) { |
80 |
if (mis_marker[i]) { |
81 |
wptr=W->pattern->ptr[k]; |
82 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
83 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
84 |
wptr++; |
85 |
} |
86 |
k++; |
87 |
} |
88 |
else { |
89 |
iptr=out->pattern->ptr[i]; |
90 |
out->val[iptr*block_size*block_size]=1; |
91 |
} |
92 |
} |
93 |
} else if (block_size==2) { |
94 |
for (i=0;i<n;++i) { |
95 |
if (mis_marker[i]) { |
96 |
wptr=W->pattern->ptr[k]; |
97 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
98 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
99 |
out->val[iptr*block_size*block_size+1]=0; |
100 |
out->val[iptr*block_size*block_size+2]=0; |
101 |
out->val[iptr*block_size*block_size+3]=W->val[wptr*block_size*block_size+3]; |
102 |
wptr++; |
103 |
} |
104 |
k++; |
105 |
} |
106 |
else { |
107 |
iptr=out->pattern->ptr[i]; |
108 |
out->val[iptr*block_size*block_size]=1; |
109 |
out->val[iptr*block_size*block_size+1]=0; |
110 |
out->val[iptr*block_size*block_size+2]=0; |
111 |
out->val[iptr*block_size*block_size+3]=1; |
112 |
} |
113 |
} |
114 |
} else if (block_size==3) { |
115 |
for (i=0;i<n;++i) { |
116 |
if (mis_marker[i]) { |
117 |
wptr=W->pattern->ptr[k]; |
118 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
119 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
120 |
out->val[iptr*block_size*block_size+1]=0; |
121 |
out->val[iptr*block_size*block_size+2]=0; |
122 |
out->val[iptr*block_size*block_size+3]=0; |
123 |
out->val[iptr*block_size*block_size+4]=W->val[wptr*block_size*block_size+4]; |
124 |
out->val[iptr*block_size*block_size+5]=0; |
125 |
out->val[iptr*block_size*block_size+6]=0; |
126 |
out->val[iptr*block_size*block_size+7]=0; |
127 |
out->val[iptr*block_size*block_size+8]=W->val[wptr*block_size*block_size+8]; |
128 |
wptr++; |
129 |
} |
130 |
k++; |
131 |
} |
132 |
else { |
133 |
iptr=out->pattern->ptr[i]; |
134 |
out->val[iptr*block_size*block_size]=1; |
135 |
out->val[iptr*block_size*block_size+1]=0; |
136 |
out->val[iptr*block_size*block_size+2]=0; |
137 |
out->val[iptr*block_size*block_size+3]=0; |
138 |
out->val[iptr*block_size*block_size+4]=1; |
139 |
out->val[iptr*block_size*block_size+5]=0; |
140 |
out->val[iptr*block_size*block_size+6]=0; |
141 |
out->val[iptr*block_size*block_size+7]=0; |
142 |
out->val[iptr*block_size*block_size+8]=1; |
143 |
} |
144 |
} |
145 |
} |
146 |
|
147 |
/* clean up */ |
148 |
if (index_list!=NULL) { |
149 |
#pragma omp parallel for private(i) schedule(static) |
150 |
for(i=0;i<n;++i) Paso_IndexList_free(index_list[i].extension); |
151 |
} |
152 |
TMPMEMFREE(index_list); |
153 |
Paso_Pattern_free(outpattern); |
154 |
return out; |
155 |
} |
156 |
|
157 |
|
158 |
/* Restriction matrix R=P^T */ |
159 |
|
160 |
Paso_SparseMatrix* Paso_SparseMatrix_getRestriction(Paso_SparseMatrix* P){ |
161 |
|
162 |
Paso_Pattern *outpattern=NULL; |
163 |
Paso_SparseMatrix *out=NULL; |
164 |
|
165 |
Paso_IndexList* index_list=NULL; |
166 |
dim_t C=P->numCols; |
167 |
dim_t F=P->numRows-C; |
168 |
dim_t n=C+F; |
169 |
dim_t block_size=P->row_block_size; |
170 |
dim_t i,j,k=0; |
171 |
index_t iptr,jptr; |
172 |
|
173 |
index_list=TMPMEMALLOC(C,Paso_IndexList); |
174 |
if (! Paso_checkPtr(index_list)) { |
175 |
#pragma omp parallel for private(i) schedule(static) |
176 |
for(i=0;i<C;++i) { |
177 |
index_list[i].extension=NULL; |
178 |
index_list[i].n=0; |
179 |
} |
180 |
} |
181 |
|
182 |
|
183 |
for (i=0;i<n;++i) { |
184 |
for (iptr=P->pattern->ptr[i];iptr<P->pattern->ptr[i+1]; ++iptr) { |
185 |
j=P->pattern->index[iptr]; |
186 |
Paso_IndexList_insertIndex(&(index_list[j]),i); |
187 |
} |
188 |
} |
189 |
|
190 |
outpattern=Paso_IndexList_createPattern(0, C,index_list,0,C+F,0); |
191 |
out=Paso_SparseMatrix_alloc(P->type,outpattern,block_size,block_size,FALSE); |
192 |
|
193 |
|
194 |
if (block_size==1) { |
195 |
for (i=0;i<out->numRows;++i) { |
196 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
197 |
j=out->pattern->index[iptr]; |
198 |
/*This for later will be replaced by bsearch!!*/ |
199 |
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
200 |
k=P->pattern->index[jptr]; |
201 |
if(k==i) { |
202 |
out->val[iptr]=P->val[jptr]; |
203 |
} |
204 |
} |
205 |
} |
206 |
} |
207 |
} else if (block_size==2) { |
208 |
for (i=0;i<out->numRows;++i) { |
209 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
210 |
j=out->pattern->index[iptr]; |
211 |
/*This for later will be replaced by bsearch!!*/ |
212 |
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
213 |
k=P->pattern->index[jptr]; |
214 |
if(k==i) { |
215 |
out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size]; |
216 |
out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+2]; |
217 |
out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+1]; |
218 |
out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+3]; |
219 |
} |
220 |
} |
221 |
} |
222 |
} |
223 |
} else if (block_size==3) { |
224 |
for (i=0;i<out->numRows;++i) { |
225 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
226 |
j=out->pattern->index[iptr]; |
227 |
/*This for later will be replaced by bsearch!!*/ |
228 |
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
229 |
k=P->pattern->index[jptr]; |
230 |
if(k==i) { |
231 |
out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size]; |
232 |
out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+3]; |
233 |
out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+6]; |
234 |
out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+1]; |
235 |
out->val[iptr*block_size*block_size+4]=P->val[jptr*block_size*block_size+4]; |
236 |
out->val[iptr*block_size*block_size+5]=P->val[jptr*block_size*block_size+7]; |
237 |
out->val[iptr*block_size*block_size+6]=P->val[jptr*block_size*block_size+2]; |
238 |
out->val[iptr*block_size*block_size+7]=P->val[jptr*block_size*block_size+5]; |
239 |
out->val[iptr*block_size*block_size+8]=P->val[jptr*block_size*block_size+8]; |
240 |
} |
241 |
} |
242 |
} |
243 |
} |
244 |
} |
245 |
|
246 |
/* clean up */ |
247 |
if (index_list!=NULL) { |
248 |
#pragma omp parallel for private(i) schedule(static) |
249 |
for(i=0;i<C;++i) Paso_IndexList_free(index_list[i].extension); |
250 |
} |
251 |
TMPMEMFREE(index_list); |
252 |
Paso_Pattern_free(outpattern); |
253 |
return out; |
254 |
} |
255 |
|
256 |
|
257 |
void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* mis_marker){ |
258 |
|
259 |
double *alpha; |
260 |
double *beta; |
261 |
double a_ii=0; |
262 |
double sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos; |
263 |
double sum_all_neg1,sum_all_pos1,sum_strong_neg1,sum_strong_pos1,sum_all_neg2,sum_all_pos2,sum_strong_neg2,sum_strong_pos2,sum_all_neg3,sum_all_pos3,sum_strong_neg3,sum_strong_pos3; |
264 |
double a_ii1=0; |
265 |
double a_ii2=0; |
266 |
double a_ii3=0; |
267 |
|
268 |
dim_t n=A->numRows; |
269 |
dim_t F=W_FC->numRows; |
270 |
dim_t i,j,k; |
271 |
|
272 |
dim_t block_size=A->row_block_size; |
273 |
|
274 |
index_t iPtr,dptr=0; |
275 |
/*index_t *index, *where_p;*/ |
276 |
|
277 |
alpha=TMPMEMALLOC(F*block_size,double); |
278 |
beta=TMPMEMALLOC(F*block_size,double); |
279 |
|
280 |
|
281 |
if (block_size==1) { |
282 |
k=0; |
283 |
for (i = 0; i < n; ++i) { |
284 |
if(mis_marker[i]) { |
285 |
alpha[k]=0; |
286 |
beta[k]=0; |
287 |
sum_all_neg=0; |
288 |
sum_all_pos=0; |
289 |
sum_strong_neg=0; |
290 |
sum_strong_pos=0; |
291 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
292 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
293 |
j=A->pattern->index[iPtr]; |
294 |
if(j!=i) { |
295 |
if(A->val[iPtr]<0) { |
296 |
sum_all_neg+=A->val[iPtr]; |
297 |
if(!mis_marker[j]) { |
298 |
sum_strong_neg+=A->val[iPtr]; |
299 |
} |
300 |
} |
301 |
else if(A->val[iPtr]>0) { |
302 |
sum_all_pos+=A->val[iPtr]; |
303 |
if(!mis_marker[j]) { |
304 |
sum_strong_pos+=A->val[iPtr]; |
305 |
} |
306 |
} |
307 |
} |
308 |
else { |
309 |
a_ii=A->val[iPtr]; |
310 |
dptr=iPtr; |
311 |
} |
312 |
} |
313 |
|
314 |
if(sum_strong_neg!=0) { |
315 |
alpha[k]=sum_all_neg/(sum_strong_neg); |
316 |
} |
317 |
if(sum_strong_pos!=0) { |
318 |
beta[k]=sum_all_pos/(sum_strong_pos); |
319 |
} |
320 |
else { |
321 |
a_ii+=sum_all_pos; |
322 |
beta[k]=0; |
323 |
} |
324 |
alpha[k]=alpha[k]/(a_ii); |
325 |
beta[k]=beta[k]/(a_ii); |
326 |
k++; |
327 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
328 |
} |
329 |
} |
330 |
} else if (block_size==2) { |
331 |
k=0; |
332 |
for (i = 0; i < n; ++i) { |
333 |
if(mis_marker[i]) { |
334 |
alpha[k*block_size]=0; |
335 |
alpha[k*block_size+1]=0; |
336 |
beta[k*block_size]=0; |
337 |
beta[k*block_size+1]=0; |
338 |
sum_all_neg1=0; |
339 |
sum_all_pos1=0; |
340 |
sum_strong_neg1=0; |
341 |
sum_strong_pos1=0; |
342 |
sum_all_neg2=0; |
343 |
sum_all_pos2=0; |
344 |
sum_strong_neg2=0; |
345 |
sum_strong_pos2=0; |
346 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
347 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
348 |
j=A->pattern->index[iPtr]; |
349 |
if(j!=i) { |
350 |
if(A->val[iPtr*block_size*block_size]<0) { |
351 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
352 |
if(!mis_marker[j]) { |
353 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
354 |
} |
355 |
} |
356 |
else if(A->val[iPtr*block_size*block_size]>0) { |
357 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
358 |
if(!mis_marker[j]) { |
359 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
360 |
} |
361 |
} |
362 |
if(A->val[iPtr*block_size*block_size+3]<0) { |
363 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+3]; |
364 |
if(!mis_marker[j]) { |
365 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+3]; |
366 |
} |
367 |
} else if(A->val[iPtr*block_size*block_size+3]>0) { |
368 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+3]; |
369 |
if(!mis_marker[j]) { |
370 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+3]; |
371 |
} |
372 |
} |
373 |
|
374 |
} |
375 |
else { |
376 |
a_ii1=A->val[iPtr*block_size*block_size]; |
377 |
a_ii2=A->val[iPtr*block_size*block_size+3]; |
378 |
dptr=iPtr; |
379 |
} |
380 |
} |
381 |
|
382 |
if(sum_strong_neg1!=0) { |
383 |
alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1); |
384 |
} |
385 |
if(sum_strong_neg2!=0) { |
386 |
alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2); |
387 |
} |
388 |
|
389 |
if(sum_strong_pos1!=0) { |
390 |
beta[k*block_size]=sum_all_pos1/(sum_strong_pos1); |
391 |
} |
392 |
else { |
393 |
a_ii1+=sum_all_pos1; |
394 |
beta[k*block_size]=0; |
395 |
} |
396 |
if(sum_strong_pos2!=0) { |
397 |
beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2); |
398 |
} |
399 |
else { |
400 |
a_ii2+=sum_all_pos2; |
401 |
beta[k*block_size+1]=0; |
402 |
} |
403 |
|
404 |
alpha[k*block_size]=alpha[k*block_size]/(a_ii1); |
405 |
alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2); |
406 |
beta[k*block_size]=beta[k*block_size]/(a_ii1); |
407 |
beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2); |
408 |
k++; |
409 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
410 |
} |
411 |
} |
412 |
} else if (block_size==3) { |
413 |
k=0; |
414 |
for (i = 0; i < n; ++i) { |
415 |
if(mis_marker[i]) { |
416 |
alpha[k*block_size]=0; |
417 |
alpha[k*block_size+1]=0; |
418 |
alpha[k*block_size+2]=0; |
419 |
beta[k*block_size]=0; |
420 |
beta[k*block_size+1]=0; |
421 |
beta[k*block_size+2]=0; |
422 |
sum_all_neg1=0; |
423 |
sum_all_pos1=0; |
424 |
sum_strong_neg1=0; |
425 |
sum_strong_pos1=0; |
426 |
sum_all_neg2=0; |
427 |
sum_all_pos2=0; |
428 |
sum_strong_neg2=0; |
429 |
sum_strong_pos2=0; |
430 |
sum_all_neg3=0; |
431 |
sum_all_pos3=0; |
432 |
sum_strong_neg3=0; |
433 |
sum_strong_pos3=0; |
434 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
435 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
436 |
j=A->pattern->index[iPtr]; |
437 |
if(j!=i) { |
438 |
if(A->val[iPtr*block_size*block_size]<0) { |
439 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
440 |
if(!mis_marker[j]) { |
441 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
442 |
} |
443 |
} |
444 |
else if(A->val[iPtr*block_size*block_size]>0) { |
445 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
446 |
if(!mis_marker[j]) { |
447 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
448 |
} |
449 |
} |
450 |
if(A->val[iPtr*block_size*block_size+4]<0) { |
451 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+4]; |
452 |
if(!mis_marker[j]) { |
453 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+4]; |
454 |
} |
455 |
} else if(A->val[iPtr*block_size*block_size+4]>0) { |
456 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+4]; |
457 |
if(!mis_marker[j]) { |
458 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+4]; |
459 |
} |
460 |
} |
461 |
if(A->val[iPtr*block_size*block_size+8]<0) { |
462 |
sum_all_neg3+=A->val[iPtr*block_size*block_size+8]; |
463 |
if(!mis_marker[j]) { |
464 |
sum_strong_neg3+=A->val[iPtr*block_size*block_size+8]; |
465 |
} |
466 |
} else if(A->val[iPtr*block_size*block_size+8]>0) { |
467 |
sum_all_pos3+=A->val[iPtr*block_size*block_size+8]; |
468 |
if(!mis_marker[j]) { |
469 |
sum_strong_pos3+=A->val[iPtr*block_size*block_size+8]; |
470 |
} |
471 |
} |
472 |
|
473 |
|
474 |
} |
475 |
else { |
476 |
a_ii1=A->val[iPtr*block_size*block_size]; |
477 |
a_ii2=A->val[iPtr*block_size*block_size+4]; |
478 |
a_ii3=A->val[iPtr*block_size*block_size+8]; |
479 |
dptr=iPtr; |
480 |
} |
481 |
} |
482 |
|
483 |
if(sum_strong_neg1!=0) { |
484 |
alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1); |
485 |
} |
486 |
if(sum_strong_neg2!=0) { |
487 |
alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2); |
488 |
} |
489 |
if(sum_strong_neg3!=0) { |
490 |
alpha[k*block_size+2]=sum_all_neg3/(sum_strong_neg3); |
491 |
} |
492 |
|
493 |
if(sum_strong_pos1!=0) { |
494 |
beta[k*block_size]=sum_all_pos1/(sum_strong_pos1); |
495 |
} |
496 |
else { |
497 |
a_ii1+=sum_all_pos1; |
498 |
beta[k*block_size]=0; |
499 |
} |
500 |
if(sum_strong_pos2!=0) { |
501 |
beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2); |
502 |
} |
503 |
else { |
504 |
a_ii2+=sum_all_pos2; |
505 |
beta[k*block_size+1]=0; |
506 |
} |
507 |
if(sum_strong_pos3!=0) { |
508 |
beta[k*block_size+2]=sum_all_pos3/(sum_strong_pos3); |
509 |
} |
510 |
else { |
511 |
a_ii3+=sum_all_pos3; |
512 |
beta[k*block_size+2]=0; |
513 |
} |
514 |
|
515 |
alpha[k*block_size]=alpha[k*block_size]/(a_ii1); |
516 |
alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2); |
517 |
alpha[k*block_size+2]=alpha[k*block_size+2]/(a_ii3); |
518 |
beta[k*block_size]=beta[k*block_size]/(a_ii1); |
519 |
beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2); |
520 |
beta[k*block_size+2]=beta[k*block_size+2]/(a_ii3); |
521 |
k++; |
522 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
523 |
} |
524 |
} |
525 |
} |
526 |
|
527 |
if (block_size==1) { |
528 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
529 |
for (i = 0; i < W_FC->numRows; ++i) { |
530 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
531 |
if(W_FC->val[iPtr]<0) { |
532 |
W_FC->val[iPtr]=-alpha[i]*W_FC->val[iPtr]; |
533 |
} |
534 |
else if(W_FC->val[iPtr]>0) { |
535 |
W_FC->val[iPtr]=-beta[i]*W_FC->val[iPtr]; |
536 |
} |
537 |
} |
538 |
} |
539 |
} else if (block_size==2) { |
540 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
541 |
for (i = 0; i < W_FC->numRows; ++i) { |
542 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
543 |
if(W_FC->val[iPtr*block_size*block_size]<0) { |
544 |
W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
545 |
} |
546 |
else if(W_FC->val[iPtr*block_size*block_size]>0) { |
547 |
W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
548 |
} |
549 |
if(W_FC->val[iPtr*block_size*block_size+3]<0) { |
550 |
W_FC->val[iPtr*block_size*block_size+3]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3]; |
551 |
} |
552 |
else if(W_FC->val[iPtr*block_size*block_size+3]>0) { |
553 |
W_FC->val[iPtr*block_size*block_size+3]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3]; |
554 |
} |
555 |
W_FC->val[iPtr*block_size*block_size+1]=0; |
556 |
W_FC->val[iPtr*block_size*block_size+2]=0; |
557 |
} |
558 |
} |
559 |
} else if (block_size==3) { |
560 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
561 |
for (i = 0; i < W_FC->numRows; ++i) { |
562 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
563 |
if(W_FC->val[iPtr*block_size*block_size]<0) { |
564 |
W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
565 |
} |
566 |
else if(W_FC->val[iPtr*block_size*block_size]>0) { |
567 |
W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
568 |
} |
569 |
if(W_FC->val[iPtr*block_size*block_size+4]<0) { |
570 |
W_FC->val[iPtr*block_size*block_size+4]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4]; |
571 |
} |
572 |
else if(W_FC->val[iPtr*block_size*block_size+4]>0) { |
573 |
W_FC->val[iPtr*block_size*block_size+4]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4]; |
574 |
} |
575 |
if(W_FC->val[iPtr*block_size*block_size+8]<0) { |
576 |
W_FC->val[iPtr*block_size*block_size+8]=-alpha[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8]; |
577 |
} |
578 |
else if(W_FC->val[iPtr*block_size*block_size+8]>0) { |
579 |
W_FC->val[iPtr*block_size*block_size+8]=-beta[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8]; |
580 |
} |
581 |
W_FC->val[iPtr*block_size*block_size+1]=0; |
582 |
W_FC->val[iPtr*block_size*block_size+2]=0; |
583 |
W_FC->val[iPtr*block_size*block_size+3]=0; |
584 |
W_FC->val[iPtr*block_size*block_size+5]=0; |
585 |
W_FC->val[iPtr*block_size*block_size+6]=0; |
586 |
W_FC->val[iPtr*block_size*block_size+7]=0; |
587 |
} |
588 |
} |
589 |
|
590 |
} |
591 |
|
592 |
|
593 |
TMPMEMFREE(alpha); |
594 |
TMPMEMFREE(beta); |
595 |
} |
596 |
|
597 |
|
598 |
/*A_c=R*A*P taking into account coarseneing */ |
599 |
/*Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix *R, Paso_SparseMatrix *P) { |
600 |
|
601 |
Paso_SparseMatrix *temp=NULL; |
602 |
Paso_SparseMatrix *out=NULL; |
603 |
|
604 |
temp=Paso_SparseMatrix_MatrixMatrix(A,P); |
605 |
out=Paso_SparseMatrix_MatrixMatrix(R,temp); |
606 |
|
607 |
Paso_SparseMatrix_free(temp); |
608 |
|
609 |
return out; |
610 |
} |
611 |
*/ |
612 |
|
613 |
Paso_SparseMatrix* Paso_SparseMatrix_MatrixMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix* B) { |
614 |
index_t iptrA,iptrB,iptrC; |
615 |
dim_t i,j=0,k,l; |
616 |
Paso_Pattern* outpattern=NULL; |
617 |
Paso_SparseMatrix *out=NULL; |
618 |
double sum,b_lj=0; |
619 |
double sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,b_lj1=0,b_lj2=0,b_lj3=0,b_lj4=0,b_lj5=0,b_lj6=0,b_lj7=0,b_lj8=0,b_lj9=0; |
620 |
|
621 |
dim_t block_size=A->row_block_size; |
622 |
|
623 |
double time0=0; |
624 |
bool_t verbose=0; |
625 |
|
626 |
time0=Paso_timer(); |
627 |
|
628 |
outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,B->pattern); |
629 |
out=Paso_SparseMatrix_alloc(A->type, outpattern, block_size, block_size, FALSE); |
630 |
|
631 |
time0=Paso_timer()-time0; |
632 |
if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Pattern creation: %e\n",time0); |
633 |
|
634 |
time0=Paso_timer(); |
635 |
|
636 |
if(block_size==1) { |
637 |
#pragma omp parallel for private(i,iptrC,j,sum,iptrA,k,b_lj,iptrB,l) schedule(static) |
638 |
for(i = 0; i < out->numRows; i++) { |
639 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
640 |
j = out->pattern->index[iptrC]; |
641 |
sum=0; |
642 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
643 |
k=A->pattern->index[iptrA]; |
644 |
/*can be replaced by bsearch */ |
645 |
b_lj=0; |
646 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
647 |
l=B->pattern->index[iptrB]; |
648 |
if(l==j) { |
649 |
b_lj=B->val[iptrB]; |
650 |
break; |
651 |
} |
652 |
} |
653 |
sum+=A->val[iptrA]*b_lj; |
654 |
} |
655 |
out->val[iptrC]=sum; |
656 |
} |
657 |
} |
658 |
} else if (block_size==2) { |
659 |
#pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,iptrB,l) schedule(static) |
660 |
for(i = 0; i < out->numRows; i++) { |
661 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
662 |
j = out->pattern->index[iptrC]; |
663 |
sum1=0; |
664 |
sum2=0; |
665 |
sum3=0; |
666 |
sum4=0; |
667 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
668 |
k=A->pattern->index[iptrA]; |
669 |
/*can be replaced by bsearch */ |
670 |
b_lj1=0; |
671 |
b_lj2=0; |
672 |
b_lj3=0; |
673 |
b_lj4=0; |
674 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
675 |
l=B->pattern->index[iptrB]; |
676 |
if(l==j) { |
677 |
b_lj1=B->val[iptrB*block_size*block_size]; |
678 |
b_lj2=B->val[iptrB*block_size*block_size+1]; |
679 |
b_lj3=B->val[iptrB*block_size*block_size+2]; |
680 |
b_lj4=B->val[iptrB*block_size*block_size+3]; |
681 |
break; |
682 |
} |
683 |
} |
684 |
sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+1]*b_lj3; |
685 |
sum2+=A->val[iptrA*block_size*block_size]*b_lj2+A->val[iptrA*block_size*block_size+1]*b_lj4; |
686 |
sum3+=A->val[iptrA*block_size*block_size+2]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj3; |
687 |
sum4+=A->val[iptrA*block_size*block_size+2]*b_lj2+A->val[iptrA*block_size*block_size+3]*b_lj4; |
688 |
} |
689 |
out->val[iptrC*block_size*block_size]=sum1; |
690 |
out->val[iptrC*block_size*block_size+1]=sum2; |
691 |
out->val[iptrC*block_size*block_size+2]=sum3; |
692 |
out->val[iptrC*block_size*block_size+3]=sum4; |
693 |
} |
694 |
} |
695 |
} else if (block_size==3) { |
696 |
#pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,b_lj5,b_lj6,b_lj7,b_lj8,b_lj9,iptrB,l) schedule(static) |
697 |
for(i = 0; i < out->numRows; i++) { |
698 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
699 |
j = out->pattern->index[iptrC]; |
700 |
sum1=0; |
701 |
sum2=0; |
702 |
sum3=0; |
703 |
sum4=0; |
704 |
sum5=0; |
705 |
sum6=0; |
706 |
sum7=0; |
707 |
sum8=0; |
708 |
sum9=0; |
709 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
710 |
k=A->pattern->index[iptrA]; |
711 |
/*can be replaced by bsearch */ |
712 |
b_lj1=0; |
713 |
b_lj2=0; |
714 |
b_lj3=0; |
715 |
b_lj4=0; |
716 |
b_lj5=0; |
717 |
b_lj6=0; |
718 |
b_lj7=0; |
719 |
b_lj8=0; |
720 |
b_lj9=0; |
721 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
722 |
l=B->pattern->index[iptrB]; |
723 |
if(l==j) { |
724 |
b_lj1=B->val[iptrB*block_size*block_size]; |
725 |
b_lj2=B->val[iptrB*block_size*block_size+1]; |
726 |
b_lj3=B->val[iptrB*block_size*block_size+2]; |
727 |
b_lj4=B->val[iptrB*block_size*block_size+3]; |
728 |
b_lj5=B->val[iptrB*block_size*block_size+4]; |
729 |
b_lj6=B->val[iptrB*block_size*block_size+5]; |
730 |
b_lj7=B->val[iptrB*block_size*block_size+6]; |
731 |
b_lj8=B->val[iptrB*block_size*block_size+7]; |
732 |
b_lj9=B->val[iptrB*block_size*block_size+8]; |
733 |
break; |
734 |
} |
735 |
} |
736 |
sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+1]*b_lj4+A->val[iptrA*block_size*block_size+2]*b_lj7; |
737 |
sum2+=A->val[iptrA*block_size*block_size]*b_lj2+A->val[iptrA*block_size*block_size+1]*b_lj5+A->val[iptrA*block_size*block_size+2]*b_lj8; |
738 |
sum3+=A->val[iptrA*block_size*block_size]*b_lj3+A->val[iptrA*block_size*block_size+1]*b_lj6+A->val[iptrA*block_size*block_size+2]*b_lj9; |
739 |
sum4+=A->val[iptrA*block_size*block_size+3]*b_lj1+A->val[iptrA*block_size*block_size+4]*b_lj4+A->val[iptrA*block_size*block_size+5]*b_lj7; |
740 |
sum5+=A->val[iptrA*block_size*block_size+3]*b_lj2+A->val[iptrA*block_size*block_size+4]*b_lj5+A->val[iptrA*block_size*block_size+5]*b_lj8; |
741 |
sum6+=A->val[iptrA*block_size*block_size+3]*b_lj3+A->val[iptrA*block_size*block_size+4]*b_lj6+A->val[iptrA*block_size*block_size+5]*b_lj9; |
742 |
sum7+=A->val[iptrA*block_size*block_size+6]*b_lj1+A->val[iptrA*block_size*block_size+7]*b_lj4+A->val[iptrA*block_size*block_size+8]*b_lj7; |
743 |
sum8+=A->val[iptrA*block_size*block_size+6]*b_lj2+A->val[iptrA*block_size*block_size+7]*b_lj5+A->val[iptrA*block_size*block_size+8]*b_lj8; |
744 |
sum9+=A->val[iptrA*block_size*block_size+6]*b_lj3+A->val[iptrA*block_size*block_size+7]*b_lj6+A->val[iptrA*block_size*block_size+8]*b_lj9; |
745 |
} |
746 |
out->val[iptrC*block_size*block_size]=sum1; |
747 |
out->val[iptrC*block_size*block_size+1]=sum2; |
748 |
out->val[iptrC*block_size*block_size+2]=sum3; |
749 |
out->val[iptrC*block_size*block_size+3]=sum4; |
750 |
out->val[iptrC*block_size*block_size+4]=sum5; |
751 |
out->val[iptrC*block_size*block_size+5]=sum6; |
752 |
out->val[iptrC*block_size*block_size+6]=sum7; |
753 |
out->val[iptrC*block_size*block_size+7]=sum8; |
754 |
out->val[iptrC*block_size*block_size+8]=sum9; |
755 |
} |
756 |
} |
757 |
|
758 |
} |
759 |
|
760 |
time0=Paso_timer()-time0; |
761 |
if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Matrix multiplication: %e\n",time0); |
762 |
|
763 |
Paso_Pattern_free(outpattern); |
764 |
|
765 |
return out; |
766 |
} |
767 |
|
768 |
|
769 |
Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A,Paso_SparseMatrix *R,Paso_SparseMatrix *P) { |
770 |
|
771 |
index_t iptrA_c,iptrR,iptrP,iptrA; |
772 |
dim_t i,j,k,l,m; |
773 |
double first_sum=0,second_sum=0,p_lj=0,a_kl=0,r_ik=0; |
774 |
Paso_Pattern* temp=NULL; |
775 |
Paso_Pattern* outpattern=NULL; |
776 |
Paso_SparseMatrix *A_c=NULL; |
777 |
|
778 |
double time0=0; |
779 |
bool_t verbose=0; |
780 |
|
781 |
time0=Paso_timer(); |
782 |
|
783 |
temp=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,P->pattern); |
784 |
outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,R->pattern,temp); |
785 |
A_c=Paso_SparseMatrix_alloc(A->type,outpattern,1,1, TRUE); |
786 |
|
787 |
time0=Paso_timer()-time0; |
788 |
if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Pattern creation: %e\n",time0); |
789 |
|
790 |
/*a^c_ij=sum_k^n(r_ik)sum_l^n(a_kl*P_lj)*/ |
791 |
|
792 |
time0=Paso_timer(); |
793 |
|
794 |
#pragma omp parallel for private(i,iptrA_c,j,second_sum,iptrR,k,first_sum,p_lj,iptrP,m,a_kl,r_ik) schedule(static) |
795 |
for(i = 0; i < A_c->numRows; i++) { |
796 |
for(iptrA_c = A_c->pattern->ptr[i]; iptrA_c < A_c->pattern->ptr[i+1]; ++iptrA_c) { |
797 |
j = A_c->pattern->index[iptrA_c]; |
798 |
second_sum=0; |
799 |
for(iptrR = R->pattern->ptr[i]; iptrR < R->pattern->ptr[i+1]; ++iptrR) { |
800 |
k = R->pattern->index[iptrR]; |
801 |
first_sum=0; |
802 |
for(iptrA = A->pattern->ptr[k]; iptrA < A->pattern->ptr[k+1]; ++iptrA) { |
803 |
l= A->pattern->index[iptrA]; |
804 |
p_lj=0; |
805 |
/*This loop can be replaced by bsearch */ |
806 |
for(iptrP = P->pattern->ptr[l]; iptrP < P->pattern->ptr[l+1]; ++iptrP) { |
807 |
m=P->pattern->index[iptrP]; |
808 |
if(m==j) { |
809 |
p_lj=P->val[iptrP]; |
810 |
break; |
811 |
} |
812 |
} |
813 |
a_kl=A->val[iptrA]; |
814 |
first_sum+=a_kl*p_lj; |
815 |
} |
816 |
r_ik=R->val[iptrR]; |
817 |
second_sum+=r_ik*first_sum; |
818 |
} |
819 |
A_c->val[iptrA_c]=second_sum; |
820 |
} |
821 |
} |
822 |
time0=Paso_timer()-time0; |
823 |
if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Matrix multiplication: %e\n",time0); |
824 |
|
825 |
Paso_Pattern_free(outpattern); |
826 |
Paso_Pattern_free(temp); |
827 |
|
828 |
return A_c; |
829 |
} |
830 |
|
831 |
|
832 |
Paso_SparseMatrix* Paso_SparseMatrix_RemovePositiveOffdiagonals(Paso_SparseMatrix* P){ |
833 |
|
834 |
Paso_Pattern *outpattern=NULL; |
835 |
Paso_SparseMatrix *out=NULL; |
836 |
|
837 |
Paso_IndexList* index_list=NULL; |
838 |
dim_t n=P->numRows; |
839 |
dim_t i,j,k=0; |
840 |
index_t iptr,jptr; |
841 |
|
842 |
index_list=TMPMEMALLOC(n,Paso_IndexList); |
843 |
if (! Paso_checkPtr(index_list)) { |
844 |
#pragma omp parallel for private(i) schedule(static) |
845 |
for(i=0;i<n;++i) { |
846 |
index_list[i].extension=NULL; |
847 |
index_list[i].n=0; |
848 |
} |
849 |
} |
850 |
|
851 |
|
852 |
for (i=0;i<n;++i) { |
853 |
for (iptr=P->pattern->ptr[i];iptr<P->pattern->ptr[i+1]; ++iptr) { |
854 |
j=P->pattern->index[iptr]; |
855 |
if(P->val[iptr]<0) { |
856 |
Paso_IndexList_insertIndex(&(index_list[j]),i); |
857 |
} |
858 |
if(i==j) { |
859 |
Paso_IndexList_insertIndex(&(index_list[j]),i); |
860 |
} |
861 |
} |
862 |
} |
863 |
|
864 |
outpattern=Paso_IndexList_createPattern(0, n,index_list,0,n,0); |
865 |
out=Paso_SparseMatrix_alloc(P->type,outpattern,1,1,TRUE); |
866 |
|
867 |
for (i=0;i<out->numRows;++i) { |
868 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
869 |
j=out->pattern->index[iptr]; |
870 |
/*This for later will be replaced by bsearch!!*/ |
871 |
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
872 |
k=P->pattern->index[jptr]; |
873 |
if(k==i) { |
874 |
out->val[iptr]=P->val[jptr]; |
875 |
} |
876 |
} |
877 |
} |
878 |
} |
879 |
|
880 |
/* clean up */ |
881 |
if (index_list!=NULL) { |
882 |
#pragma omp parallel for private(i) schedule(static) |
883 |
for(i=0;i<n;++i) Paso_IndexList_free(index_list[i].extension); |
884 |
} |
885 |
TMPMEMFREE(index_list); |
886 |
Paso_Pattern_free(outpattern); |
887 |
return out; |
888 |
} |
889 |
|
890 |
|