1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: SparseMatrix_AMGcomponents */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Artak Amirbekyan, artak@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "SparseMatrix.h" |
27 |
#include "PasoUtil.h" |
28 |
|
29 |
/************************************************************** |
30 |
|
31 |
Methods nessecary for AMG preconditioner |
32 |
|
33 |
*/ |
34 |
|
35 |
/* Prolongation matrix is constracted by W_FC as P<- [W_FC] |
36 |
[I_CC] |
37 |
*/ |
38 |
|
39 |
Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* marker_F){ |
40 |
|
41 |
Paso_Pattern *outpattern=NULL; |
42 |
Paso_SparseMatrix *out=NULL; |
43 |
index_t iptr,wptr; |
44 |
|
45 |
const dim_t n=W->numRows+W->numCols; |
46 |
dim_t i,j,k=0; |
47 |
dim_t block_size=W->row_block_size; |
48 |
Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n); |
49 |
|
50 |
|
51 |
for (i=0;i<n;++i) { |
52 |
if (marker_F[i]) { |
53 |
for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) { |
54 |
j=W->pattern->index[iptr]; |
55 |
Paso_IndexListArray_insertIndex(index_list,i,j); |
56 |
} |
57 |
k++; |
58 |
} |
59 |
else { |
60 |
Paso_IndexListArray_insertIndex(index_list,i,i-k); |
61 |
} |
62 |
} |
63 |
|
64 |
outpattern=Paso_Pattern_fromIndexListArray(0,index_list,0,W->numCols,0); |
65 |
out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE); |
66 |
|
67 |
k=0; |
68 |
|
69 |
if (block_size==1) { |
70 |
for (i=0;i<n;++i) { |
71 |
if (marker_F[i]) { |
72 |
wptr=W->pattern->ptr[k]; |
73 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
74 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
75 |
wptr++; |
76 |
} |
77 |
k++; |
78 |
} |
79 |
else { |
80 |
iptr=out->pattern->ptr[i]; |
81 |
out->val[iptr*block_size*block_size]=1; |
82 |
} |
83 |
} |
84 |
} else if (block_size==2) { |
85 |
for (i=0;i<n;++i) { |
86 |
if (marker_F[i]) { |
87 |
wptr=W->pattern->ptr[k]; |
88 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
89 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
90 |
out->val[iptr*block_size*block_size+1]=0; |
91 |
out->val[iptr*block_size*block_size+2]=0; |
92 |
out->val[iptr*block_size*block_size+3]=W->val[wptr*block_size*block_size+3]; |
93 |
wptr++; |
94 |
} |
95 |
k++; |
96 |
} |
97 |
else { |
98 |
iptr=out->pattern->ptr[i]; |
99 |
out->val[iptr*block_size*block_size]=1; |
100 |
out->val[iptr*block_size*block_size+1]=0; |
101 |
out->val[iptr*block_size*block_size+2]=0; |
102 |
out->val[iptr*block_size*block_size+3]=1; |
103 |
} |
104 |
} |
105 |
} else if (block_size==3) { |
106 |
for (i=0;i<n;++i) { |
107 |
if (marker_F[i]) { |
108 |
wptr=W->pattern->ptr[k]; |
109 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
110 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
111 |
out->val[iptr*block_size*block_size+1]=0; |
112 |
out->val[iptr*block_size*block_size+2]=0; |
113 |
out->val[iptr*block_size*block_size+3]=0; |
114 |
out->val[iptr*block_size*block_size+4]=W->val[wptr*block_size*block_size+4]; |
115 |
out->val[iptr*block_size*block_size+5]=0; |
116 |
out->val[iptr*block_size*block_size+6]=0; |
117 |
out->val[iptr*block_size*block_size+7]=0; |
118 |
out->val[iptr*block_size*block_size+8]=W->val[wptr*block_size*block_size+8]; |
119 |
wptr++; |
120 |
} |
121 |
k++; |
122 |
} |
123 |
else { |
124 |
iptr=out->pattern->ptr[i]; |
125 |
out->val[iptr*block_size*block_size]=1; |
126 |
out->val[iptr*block_size*block_size+1]=0; |
127 |
out->val[iptr*block_size*block_size+2]=0; |
128 |
out->val[iptr*block_size*block_size+3]=0; |
129 |
out->val[iptr*block_size*block_size+4]=1; |
130 |
out->val[iptr*block_size*block_size+5]=0; |
131 |
out->val[iptr*block_size*block_size+6]=0; |
132 |
out->val[iptr*block_size*block_size+7]=0; |
133 |
out->val[iptr*block_size*block_size+8]=1; |
134 |
} |
135 |
} |
136 |
} |
137 |
|
138 |
/* clean up */ |
139 |
Paso_IndexListArray_free(index_list); |
140 |
Paso_Pattern_free(outpattern); |
141 |
return out; |
142 |
} |
143 |
/* Restriction matrix R=P^T */ |
144 |
|
145 |
|
146 |
|
147 |
void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* marker_F){ |
148 |
|
149 |
double *alpha; |
150 |
double *beta; |
151 |
double a_ii=0; |
152 |
double sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos; |
153 |
double sum_all_neg1,sum_all_pos1,sum_strong_neg1,sum_strong_pos1,sum_all_neg2,sum_all_pos2,sum_strong_neg2,sum_strong_pos2,sum_all_neg3,sum_all_pos3,sum_strong_neg3,sum_strong_pos3; |
154 |
double a_ii1=0; |
155 |
double a_ii2=0; |
156 |
double a_ii3=0; |
157 |
|
158 |
dim_t n=A->numRows; |
159 |
dim_t F=W_FC->numRows; |
160 |
dim_t i,j,k; |
161 |
|
162 |
dim_t block_size=A->row_block_size; |
163 |
|
164 |
index_t iPtr,dptr=0; |
165 |
/*index_t *index, *where_p;*/ |
166 |
|
167 |
alpha=TMPMEMALLOC(F*block_size,double); |
168 |
beta=TMPMEMALLOC(F*block_size,double); |
169 |
|
170 |
|
171 |
if (block_size==1) { |
172 |
k=0; |
173 |
for (i = 0; i < n; ++i) { |
174 |
if(marker_F[i]) { |
175 |
alpha[k]=0; |
176 |
beta[k]=0; |
177 |
sum_all_neg=0; |
178 |
sum_all_pos=0; |
179 |
sum_strong_neg=0; |
180 |
sum_strong_pos=0; |
181 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
182 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
183 |
j=A->pattern->index[iPtr]; |
184 |
if(j!=i) { |
185 |
if(A->val[iPtr]<0) { |
186 |
sum_all_neg+=A->val[iPtr]; |
187 |
if(!marker_F[j]) { |
188 |
sum_strong_neg+=A->val[iPtr]; |
189 |
} |
190 |
} |
191 |
else if(A->val[iPtr]>0) { |
192 |
sum_all_pos+=A->val[iPtr]; |
193 |
if(!marker_F[j]) { |
194 |
sum_strong_pos+=A->val[iPtr]; |
195 |
} |
196 |
} |
197 |
} |
198 |
else { |
199 |
a_ii=A->val[iPtr]; |
200 |
dptr=iPtr; |
201 |
} |
202 |
} |
203 |
|
204 |
if(sum_strong_neg!=0) { |
205 |
alpha[k]=sum_all_neg/(sum_strong_neg); |
206 |
} |
207 |
if(sum_strong_pos!=0) { |
208 |
beta[k]=sum_all_pos/(sum_strong_pos); |
209 |
} |
210 |
else { |
211 |
a_ii+=sum_all_pos; |
212 |
beta[k]=0; |
213 |
} |
214 |
alpha[k]=alpha[k]/(a_ii); |
215 |
beta[k]=beta[k]/(a_ii); |
216 |
k++; |
217 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
218 |
} |
219 |
} |
220 |
} else if (block_size==2) { |
221 |
k=0; |
222 |
for (i = 0; i < n; ++i) { |
223 |
if(marker_F[i]) { |
224 |
alpha[k*block_size]=0; |
225 |
alpha[k*block_size+1]=0; |
226 |
beta[k*block_size]=0; |
227 |
beta[k*block_size+1]=0; |
228 |
sum_all_neg1=0; |
229 |
sum_all_pos1=0; |
230 |
sum_strong_neg1=0; |
231 |
sum_strong_pos1=0; |
232 |
sum_all_neg2=0; |
233 |
sum_all_pos2=0; |
234 |
sum_strong_neg2=0; |
235 |
sum_strong_pos2=0; |
236 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
237 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
238 |
j=A->pattern->index[iPtr]; |
239 |
if(j!=i) { |
240 |
if(A->val[iPtr*block_size*block_size]<0) { |
241 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
242 |
if(!marker_F[j]) { |
243 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
244 |
} |
245 |
} |
246 |
else if(A->val[iPtr*block_size*block_size]>0) { |
247 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
248 |
if(!marker_F[j]) { |
249 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
250 |
} |
251 |
} |
252 |
if(A->val[iPtr*block_size*block_size+3]<0) { |
253 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+3]; |
254 |
if(!marker_F[j]) { |
255 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+3]; |
256 |
} |
257 |
} else if(A->val[iPtr*block_size*block_size+3]>0) { |
258 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+3]; |
259 |
if(!marker_F[j]) { |
260 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+3]; |
261 |
} |
262 |
} |
263 |
|
264 |
} |
265 |
else { |
266 |
a_ii1=A->val[iPtr*block_size*block_size]; |
267 |
a_ii2=A->val[iPtr*block_size*block_size+3]; |
268 |
dptr=iPtr; |
269 |
} |
270 |
} |
271 |
|
272 |
if(sum_strong_neg1!=0) { |
273 |
alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1); |
274 |
} |
275 |
if(sum_strong_neg2!=0) { |
276 |
alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2); |
277 |
} |
278 |
|
279 |
if(sum_strong_pos1!=0) { |
280 |
beta[k*block_size]=sum_all_pos1/(sum_strong_pos1); |
281 |
} |
282 |
else { |
283 |
a_ii1+=sum_all_pos1; |
284 |
beta[k*block_size]=0; |
285 |
} |
286 |
if(sum_strong_pos2!=0) { |
287 |
beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2); |
288 |
} |
289 |
else { |
290 |
a_ii2+=sum_all_pos2; |
291 |
beta[k*block_size+1]=0; |
292 |
} |
293 |
|
294 |
alpha[k*block_size]=alpha[k*block_size]/(a_ii1); |
295 |
alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2); |
296 |
beta[k*block_size]=beta[k*block_size]/(a_ii1); |
297 |
beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2); |
298 |
k++; |
299 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
300 |
} |
301 |
} |
302 |
} else if (block_size==3) { |
303 |
k=0; |
304 |
for (i = 0; i < n; ++i) { |
305 |
if(marker_F[i]) { |
306 |
alpha[k*block_size]=0; |
307 |
alpha[k*block_size+1]=0; |
308 |
alpha[k*block_size+2]=0; |
309 |
beta[k*block_size]=0; |
310 |
beta[k*block_size+1]=0; |
311 |
beta[k*block_size+2]=0; |
312 |
sum_all_neg1=0; |
313 |
sum_all_pos1=0; |
314 |
sum_strong_neg1=0; |
315 |
sum_strong_pos1=0; |
316 |
sum_all_neg2=0; |
317 |
sum_all_pos2=0; |
318 |
sum_strong_neg2=0; |
319 |
sum_strong_pos2=0; |
320 |
sum_all_neg3=0; |
321 |
sum_all_pos3=0; |
322 |
sum_strong_neg3=0; |
323 |
sum_strong_pos3=0; |
324 |
/*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/ |
325 |
for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) { |
326 |
j=A->pattern->index[iPtr]; |
327 |
if(j!=i) { |
328 |
if(A->val[iPtr*block_size*block_size]<0) { |
329 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
330 |
if(!marker_F[j]) { |
331 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
332 |
} |
333 |
} |
334 |
else if(A->val[iPtr*block_size*block_size]>0) { |
335 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
336 |
if(!marker_F[j]) { |
337 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
338 |
} |
339 |
} |
340 |
if(A->val[iPtr*block_size*block_size+4]<0) { |
341 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+4]; |
342 |
if(!marker_F[j]) { |
343 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+4]; |
344 |
} |
345 |
} else if(A->val[iPtr*block_size*block_size+4]>0) { |
346 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+4]; |
347 |
if(!marker_F[j]) { |
348 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+4]; |
349 |
} |
350 |
} |
351 |
if(A->val[iPtr*block_size*block_size+8]<0) { |
352 |
sum_all_neg3+=A->val[iPtr*block_size*block_size+8]; |
353 |
if(!marker_F[j]) { |
354 |
sum_strong_neg3+=A->val[iPtr*block_size*block_size+8]; |
355 |
} |
356 |
} else if(A->val[iPtr*block_size*block_size+8]>0) { |
357 |
sum_all_pos3+=A->val[iPtr*block_size*block_size+8]; |
358 |
if(!marker_F[j]) { |
359 |
sum_strong_pos3+=A->val[iPtr*block_size*block_size+8]; |
360 |
} |
361 |
} |
362 |
|
363 |
|
364 |
} |
365 |
else { |
366 |
a_ii1=A->val[iPtr*block_size*block_size]; |
367 |
a_ii2=A->val[iPtr*block_size*block_size+4]; |
368 |
a_ii3=A->val[iPtr*block_size*block_size+8]; |
369 |
dptr=iPtr; |
370 |
} |
371 |
} |
372 |
|
373 |
if(sum_strong_neg1!=0) { |
374 |
alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1); |
375 |
} |
376 |
if(sum_strong_neg2!=0) { |
377 |
alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2); |
378 |
} |
379 |
if(sum_strong_neg3!=0) { |
380 |
alpha[k*block_size+2]=sum_all_neg3/(sum_strong_neg3); |
381 |
} |
382 |
|
383 |
if(sum_strong_pos1!=0) { |
384 |
beta[k*block_size]=sum_all_pos1/(sum_strong_pos1); |
385 |
} |
386 |
else { |
387 |
a_ii1+=sum_all_pos1; |
388 |
beta[k*block_size]=0; |
389 |
} |
390 |
if(sum_strong_pos2!=0) { |
391 |
beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2); |
392 |
} |
393 |
else { |
394 |
a_ii2+=sum_all_pos2; |
395 |
beta[k*block_size+1]=0; |
396 |
} |
397 |
if(sum_strong_pos3!=0) { |
398 |
beta[k*block_size+2]=sum_all_pos3/(sum_strong_pos3); |
399 |
} |
400 |
else { |
401 |
a_ii3+=sum_all_pos3; |
402 |
beta[k*block_size+2]=0; |
403 |
} |
404 |
|
405 |
alpha[k*block_size]=alpha[k*block_size]/(a_ii1); |
406 |
alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2); |
407 |
alpha[k*block_size+2]=alpha[k*block_size+2]/(a_ii3); |
408 |
beta[k*block_size]=beta[k*block_size]/(a_ii1); |
409 |
beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2); |
410 |
beta[k*block_size+2]=beta[k*block_size+2]/(a_ii3); |
411 |
k++; |
412 |
/*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/ |
413 |
} |
414 |
} |
415 |
} |
416 |
|
417 |
if (block_size==1) { |
418 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
419 |
for (i = 0; i < W_FC->numRows; ++i) { |
420 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
421 |
if(W_FC->val[iPtr]<0) { |
422 |
W_FC->val[iPtr]=-alpha[i]*W_FC->val[iPtr]; |
423 |
} |
424 |
else if(W_FC->val[iPtr]>0) { |
425 |
W_FC->val[iPtr]=-beta[i]*W_FC->val[iPtr]; |
426 |
} |
427 |
} |
428 |
} |
429 |
} else if (block_size==2) { |
430 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
431 |
for (i = 0; i < W_FC->numRows; ++i) { |
432 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
433 |
if(W_FC->val[iPtr*block_size*block_size]<0) { |
434 |
W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
435 |
} |
436 |
else if(W_FC->val[iPtr*block_size*block_size]>0) { |
437 |
W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
438 |
} |
439 |
if(W_FC->val[iPtr*block_size*block_size+3]<0) { |
440 |
W_FC->val[iPtr*block_size*block_size+3]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3]; |
441 |
} |
442 |
else if(W_FC->val[iPtr*block_size*block_size+3]>0) { |
443 |
W_FC->val[iPtr*block_size*block_size+3]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3]; |
444 |
} |
445 |
W_FC->val[iPtr*block_size*block_size+1]=0; |
446 |
W_FC->val[iPtr*block_size*block_size+2]=0; |
447 |
} |
448 |
} |
449 |
} else if (block_size==3) { |
450 |
#pragma omp parallel for private(i,iPtr) schedule(static) |
451 |
for (i = 0; i < W_FC->numRows; ++i) { |
452 |
for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) { |
453 |
if(W_FC->val[iPtr*block_size*block_size]<0) { |
454 |
W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
455 |
} |
456 |
else if(W_FC->val[iPtr*block_size*block_size]>0) { |
457 |
W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size]; |
458 |
} |
459 |
if(W_FC->val[iPtr*block_size*block_size+4]<0) { |
460 |
W_FC->val[iPtr*block_size*block_size+4]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4]; |
461 |
} |
462 |
else if(W_FC->val[iPtr*block_size*block_size+4]>0) { |
463 |
W_FC->val[iPtr*block_size*block_size+4]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4]; |
464 |
} |
465 |
if(W_FC->val[iPtr*block_size*block_size+8]<0) { |
466 |
W_FC->val[iPtr*block_size*block_size+8]=-alpha[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8]; |
467 |
} |
468 |
else if(W_FC->val[iPtr*block_size*block_size+8]>0) { |
469 |
W_FC->val[iPtr*block_size*block_size+8]=-beta[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8]; |
470 |
} |
471 |
W_FC->val[iPtr*block_size*block_size+1]=0; |
472 |
W_FC->val[iPtr*block_size*block_size+2]=0; |
473 |
W_FC->val[iPtr*block_size*block_size+3]=0; |
474 |
W_FC->val[iPtr*block_size*block_size+5]=0; |
475 |
W_FC->val[iPtr*block_size*block_size+6]=0; |
476 |
W_FC->val[iPtr*block_size*block_size+7]=0; |
477 |
} |
478 |
} |
479 |
|
480 |
} |
481 |
|
482 |
|
483 |
TMPMEMFREE(alpha); |
484 |
TMPMEMFREE(beta); |
485 |
} |
486 |
|
487 |
|
488 |
/*A_c=R*A*P taking into account coarseneing */ |
489 |
/*Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix *R, Paso_SparseMatrix *P) { |
490 |
|
491 |
Paso_SparseMatrix *temp=NULL; |
492 |
Paso_SparseMatrix *out=NULL; |
493 |
|
494 |
temp=Paso_SparseMatrix_MatrixMatrix(A,P); |
495 |
out=Paso_SparseMatrix_MatrixMatrix(R,temp); |
496 |
|
497 |
Paso_SparseMatrix_free(temp); |
498 |
|
499 |
return out; |
500 |
} |
501 |
*/ |
502 |
|
503 |
Paso_SparseMatrix* Paso_SparseMatrix_MatrixMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix* B) { |
504 |
index_t iptrA,iptrB,iptrC; |
505 |
dim_t i,j=0,k,l; |
506 |
Paso_Pattern* outpattern=NULL; |
507 |
Paso_SparseMatrix *out=NULL; |
508 |
double sum,b_lj=0; |
509 |
double sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,b_lj1=0,b_lj2=0,b_lj3=0,b_lj4=0,b_lj5=0,b_lj6=0,b_lj7=0,b_lj8=0,b_lj9=0; |
510 |
|
511 |
dim_t block_size=A->row_block_size; |
512 |
|
513 |
double time0=0; |
514 |
bool_t verbose=0; |
515 |
|
516 |
time0=Esys_timer(); |
517 |
|
518 |
outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,B->pattern); |
519 |
out=Paso_SparseMatrix_alloc(A->type, outpattern, block_size, block_size, FALSE); |
520 |
|
521 |
time0=Esys_timer()-time0; |
522 |
if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Pattern creation: %e\n",time0); |
523 |
|
524 |
time0=Esys_timer(); |
525 |
|
526 |
if(block_size==1) { |
527 |
#pragma omp parallel for private(i,iptrC,j,sum,iptrA,k,b_lj,iptrB,l) schedule(static) |
528 |
for(i = 0; i < out->numRows; i++) { |
529 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
530 |
j = out->pattern->index[iptrC]; |
531 |
sum=0; |
532 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
533 |
k=A->pattern->index[iptrA]; |
534 |
/*can be replaced by bsearch */ |
535 |
b_lj=0; |
536 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
537 |
l=B->pattern->index[iptrB]; |
538 |
if(l==j) { |
539 |
b_lj=B->val[iptrB]; |
540 |
break; |
541 |
} |
542 |
} |
543 |
sum+=A->val[iptrA]*b_lj; |
544 |
} |
545 |
out->val[iptrC]=sum; |
546 |
} |
547 |
} |
548 |
} else if (block_size==2) { |
549 |
#pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,iptrB,l) schedule(static) |
550 |
for(i = 0; i < out->numRows; i++) { |
551 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
552 |
j = out->pattern->index[iptrC]; |
553 |
sum1=0; |
554 |
sum2=0; |
555 |
sum3=0; |
556 |
sum4=0; |
557 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
558 |
k=A->pattern->index[iptrA]; |
559 |
/*can be replaced by bsearch */ |
560 |
b_lj1=0; |
561 |
b_lj2=0; |
562 |
b_lj3=0; |
563 |
b_lj4=0; |
564 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
565 |
l=B->pattern->index[iptrB]; |
566 |
if(l==j) { |
567 |
b_lj1=B->val[iptrB*block_size*block_size]; |
568 |
b_lj2=B->val[iptrB*block_size*block_size+1]; |
569 |
b_lj3=B->val[iptrB*block_size*block_size+2]; |
570 |
b_lj4=B->val[iptrB*block_size*block_size+3]; |
571 |
break; |
572 |
} |
573 |
} |
574 |
sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+2]*b_lj2; |
575 |
sum2+=A->val[iptrA*block_size*block_size]*b_lj3+A->val[iptrA*block_size*block_size+2]*b_lj4; |
576 |
sum3+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2; |
577 |
sum4+=A->val[iptrA*block_size*block_size+1]*b_lj3+A->val[iptrA*block_size*block_size+3]*b_lj4; |
578 |
} |
579 |
out->val[iptrC*block_size*block_size]=sum1; |
580 |
out->val[iptrC*block_size*block_size+2]=sum2; |
581 |
out->val[iptrC*block_size*block_size+1]=sum3; |
582 |
out->val[iptrC*block_size*block_size+3]=sum4; |
583 |
} |
584 |
} |
585 |
} else if (block_size==3) { |
586 |
#pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,b_lj5,b_lj6,b_lj7,b_lj8,b_lj9,iptrB,l) schedule(static) |
587 |
for(i = 0; i < out->numRows; i++) { |
588 |
for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) { |
589 |
j = out->pattern->index[iptrC]; |
590 |
sum1=0; |
591 |
sum2=0; |
592 |
sum3=0; |
593 |
sum4=0; |
594 |
sum5=0; |
595 |
sum6=0; |
596 |
sum7=0; |
597 |
sum8=0; |
598 |
sum9=0; |
599 |
for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) { |
600 |
k=A->pattern->index[iptrA]; |
601 |
/*can be replaced by bsearch */ |
602 |
b_lj1=0; |
603 |
b_lj2=0; |
604 |
b_lj3=0; |
605 |
b_lj4=0; |
606 |
b_lj5=0; |
607 |
b_lj6=0; |
608 |
b_lj7=0; |
609 |
b_lj8=0; |
610 |
b_lj9=0; |
611 |
for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) { |
612 |
l=B->pattern->index[iptrB]; |
613 |
if(l==j) { |
614 |
b_lj1=B->val[iptrB*block_size*block_size]; |
615 |
b_lj2=B->val[iptrB*block_size*block_size+1]; |
616 |
b_lj3=B->val[iptrB*block_size*block_size+2]; |
617 |
b_lj4=B->val[iptrB*block_size*block_size+3]; |
618 |
b_lj5=B->val[iptrB*block_size*block_size+4]; |
619 |
b_lj6=B->val[iptrB*block_size*block_size+5]; |
620 |
b_lj7=B->val[iptrB*block_size*block_size+6]; |
621 |
b_lj8=B->val[iptrB*block_size*block_size+7]; |
622 |
b_lj9=B->val[iptrB*block_size*block_size+8]; |
623 |
break; |
624 |
} |
625 |
} |
626 |
sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2+A->val[iptrA*block_size*block_size+6]*b_lj3; |
627 |
sum2+=A->val[iptrA*block_size*block_size]*b_lj4+A->val[iptrA*block_size*block_size+3]*b_lj5+A->val[iptrA*block_size*block_size+6]*b_lj6; |
628 |
sum3+=A->val[iptrA*block_size*block_size]*b_lj7+A->val[iptrA*block_size*block_size+3]*b_lj8+A->val[iptrA*block_size*block_size+6]*b_lj9; |
629 |
sum4+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+4]*b_lj2+A->val[iptrA*block_size*block_size+7]*b_lj3; |
630 |
sum5+=A->val[iptrA*block_size*block_size+1]*b_lj4+A->val[iptrA*block_size*block_size+4]*b_lj5+A->val[iptrA*block_size*block_size+7]*b_lj6; |
631 |
sum6+=A->val[iptrA*block_size*block_size+1]*b_lj7+A->val[iptrA*block_size*block_size+4]*b_lj8+A->val[iptrA*block_size*block_size+7]*b_lj9; |
632 |
sum7+=A->val[iptrA*block_size*block_size+2]*b_lj1+A->val[iptrA*block_size*block_size+5]*b_lj2+A->val[iptrA*block_size*block_size+8]*b_lj3; |
633 |
sum8+=A->val[iptrA*block_size*block_size+2]*b_lj4+A->val[iptrA*block_size*block_size+5]*b_lj5+A->val[iptrA*block_size*block_size+8]*b_lj6; |
634 |
sum9+=A->val[iptrA*block_size*block_size+2]*b_lj7+A->val[iptrA*block_size*block_size+5]*b_lj8+A->val[iptrA*block_size*block_size+8]*b_lj9; |
635 |
} |
636 |
out->val[iptrC*block_size*block_size]=sum1; |
637 |
out->val[iptrC*block_size*block_size+3]=sum2; |
638 |
out->val[iptrC*block_size*block_size+6]=sum3; |
639 |
out->val[iptrC*block_size*block_size+1]=sum4; |
640 |
out->val[iptrC*block_size*block_size+4]=sum5; |
641 |
out->val[iptrC*block_size*block_size+7]=sum6; |
642 |
out->val[iptrC*block_size*block_size+2]=sum7; |
643 |
out->val[iptrC*block_size*block_size+5]=sum8; |
644 |
out->val[iptrC*block_size*block_size+8]=sum9; |
645 |
} |
646 |
} |
647 |
|
648 |
} |
649 |
|
650 |
time0=Esys_timer()-time0; |
651 |
if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Matrix multiplication: %e\n",time0); |
652 |
|
653 |
Paso_Pattern_free(outpattern); |
654 |
|
655 |
return out; |
656 |
} |
657 |
|
658 |
/* for block_size=1 only */ |
659 |
Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A,Paso_SparseMatrix *R,Paso_SparseMatrix *P) { |
660 |
|
661 |
index_t iptrA_c,iptrR,iptrP,iptrA; |
662 |
dim_t i,j,k,l,m; |
663 |
double first_sum=0,second_sum=0,p_lj=0,a_kl=0,r_ik=0; |
664 |
Paso_Pattern* temp=NULL; |
665 |
Paso_Pattern* outpattern=NULL; |
666 |
Paso_SparseMatrix *A_c=NULL; |
667 |
|
668 |
double time0=0; |
669 |
bool_t verbose=0; |
670 |
|
671 |
time0=Esys_timer(); |
672 |
|
673 |
temp=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,P->pattern); |
674 |
outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,R->pattern,temp); |
675 |
A_c=Paso_SparseMatrix_alloc(A->type,outpattern,1,1, TRUE); |
676 |
|
677 |
time0=Esys_timer()-time0; |
678 |
if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Pattern creation: %e\n",time0); |
679 |
|
680 |
/*a^c_ij=sum_k^n(r_ik)sum_l^n(a_kl*P_lj)*/ |
681 |
|
682 |
time0=Esys_timer(); |
683 |
|
684 |
#pragma omp parallel for private(i,iptrA_c,j,second_sum,iptrR,k,first_sum,p_lj,iptrP,m,a_kl,r_ik) schedule(static) |
685 |
for(i = 0; i < A_c->numRows; i++) { |
686 |
for(iptrA_c = A_c->pattern->ptr[i]; iptrA_c < A_c->pattern->ptr[i+1]; ++iptrA_c) { |
687 |
j = A_c->pattern->index[iptrA_c]; |
688 |
second_sum=0; |
689 |
for(iptrR = R->pattern->ptr[i]; iptrR < R->pattern->ptr[i+1]; ++iptrR) { |
690 |
k = R->pattern->index[iptrR]; |
691 |
first_sum=0; |
692 |
for(iptrA = A->pattern->ptr[k]; iptrA < A->pattern->ptr[k+1]; ++iptrA) { |
693 |
l= A->pattern->index[iptrA]; |
694 |
p_lj=0; |
695 |
/*This loop can be replaced by bsearch */ |
696 |
for(iptrP = P->pattern->ptr[l]; iptrP < P->pattern->ptr[l+1]; ++iptrP) { |
697 |
m=P->pattern->index[iptrP]; |
698 |
if(m==j) { |
699 |
p_lj=P->val[iptrP]; |
700 |
break; |
701 |
} |
702 |
} |
703 |
a_kl=A->val[iptrA]; |
704 |
first_sum+=a_kl*p_lj; |
705 |
} |
706 |
r_ik=R->val[iptrR]; |
707 |
second_sum+=r_ik*first_sum; |
708 |
} |
709 |
A_c->val[iptrA_c]=second_sum; |
710 |
} |
711 |
} |
712 |
time0=Esys_timer()-time0; |
713 |
if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Matrix multiplication: %e\n",time0); |
714 |
|
715 |
Paso_Pattern_free(outpattern); |
716 |
Paso_Pattern_free(temp); |
717 |
|
718 |
return A_c; |
719 |
} |
720 |
|
721 |
|