/[escript]/trunk/paso/src/SparseMatrix_AMGcomponents.c
ViewVC logotype

Contents of /trunk/paso/src/SparseMatrix_AMGcomponents.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (show annotations)
Mon Oct 18 22:39:28 2010 UTC (10 years, 4 months ago) by gross
File MIME type: text/plain
File size: 31265 byte(s)
AMG reengineered, BUG is Smoother fixed.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: SparseMatrix_AMGcomponents */
18
19 /**************************************************************/
20
21 /* Author: Artak Amirbekyan, artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "SparseMatrix.h"
27 #include "PasoUtil.h"
28
29 /**************************************************************
30
31 Methods nessecary for AMG preconditioner
32
33 */
34
35 /* Prolongation matrix is constracted by W_FC as P<- [W_FC]
36 [I_CC]
37 */
38
39 Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* marker_F){
40
41 Paso_Pattern *outpattern=NULL;
42 Paso_SparseMatrix *out=NULL;
43 index_t iptr,wptr;
44
45 const dim_t n=W->numRows+W->numCols;
46 dim_t i,j,k=0;
47 dim_t block_size=W->row_block_size;
48 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n);
49
50
51 for (i=0;i<n;++i) {
52 if (marker_F[i]) {
53 for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) {
54 j=W->pattern->index[iptr];
55 Paso_IndexListArray_insertIndex(index_list,i,j);
56 }
57 k++;
58 }
59 else {
60 Paso_IndexListArray_insertIndex(index_list,i,i-k);
61 }
62 }
63
64 outpattern=Paso_Pattern_fromIndexListArray(0,index_list,0,W->numCols,0);
65 out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE);
66
67 k=0;
68
69 if (block_size==1) {
70 for (i=0;i<n;++i) {
71 if (marker_F[i]) {
72 wptr=W->pattern->ptr[k];
73 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
74 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
75 wptr++;
76 }
77 k++;
78 }
79 else {
80 iptr=out->pattern->ptr[i];
81 out->val[iptr*block_size*block_size]=1;
82 }
83 }
84 } else if (block_size==2) {
85 for (i=0;i<n;++i) {
86 if (marker_F[i]) {
87 wptr=W->pattern->ptr[k];
88 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
89 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
90 out->val[iptr*block_size*block_size+1]=0;
91 out->val[iptr*block_size*block_size+2]=0;
92 out->val[iptr*block_size*block_size+3]=W->val[wptr*block_size*block_size+3];
93 wptr++;
94 }
95 k++;
96 }
97 else {
98 iptr=out->pattern->ptr[i];
99 out->val[iptr*block_size*block_size]=1;
100 out->val[iptr*block_size*block_size+1]=0;
101 out->val[iptr*block_size*block_size+2]=0;
102 out->val[iptr*block_size*block_size+3]=1;
103 }
104 }
105 } else if (block_size==3) {
106 for (i=0;i<n;++i) {
107 if (marker_F[i]) {
108 wptr=W->pattern->ptr[k];
109 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
110 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
111 out->val[iptr*block_size*block_size+1]=0;
112 out->val[iptr*block_size*block_size+2]=0;
113 out->val[iptr*block_size*block_size+3]=0;
114 out->val[iptr*block_size*block_size+4]=W->val[wptr*block_size*block_size+4];
115 out->val[iptr*block_size*block_size+5]=0;
116 out->val[iptr*block_size*block_size+6]=0;
117 out->val[iptr*block_size*block_size+7]=0;
118 out->val[iptr*block_size*block_size+8]=W->val[wptr*block_size*block_size+8];
119 wptr++;
120 }
121 k++;
122 }
123 else {
124 iptr=out->pattern->ptr[i];
125 out->val[iptr*block_size*block_size]=1;
126 out->val[iptr*block_size*block_size+1]=0;
127 out->val[iptr*block_size*block_size+2]=0;
128 out->val[iptr*block_size*block_size+3]=0;
129 out->val[iptr*block_size*block_size+4]=1;
130 out->val[iptr*block_size*block_size+5]=0;
131 out->val[iptr*block_size*block_size+6]=0;
132 out->val[iptr*block_size*block_size+7]=0;
133 out->val[iptr*block_size*block_size+8]=1;
134 }
135 }
136 }
137
138 /* clean up */
139 Paso_IndexListArray_free(index_list);
140 Paso_Pattern_free(outpattern);
141 return out;
142 }
143 /* Restriction matrix R=P^T */
144
145
146
147 void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* marker_F){
148
149 double *alpha;
150 double *beta;
151 double a_ii=0;
152 double sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos;
153 double sum_all_neg1,sum_all_pos1,sum_strong_neg1,sum_strong_pos1,sum_all_neg2,sum_all_pos2,sum_strong_neg2,sum_strong_pos2,sum_all_neg3,sum_all_pos3,sum_strong_neg3,sum_strong_pos3;
154 double a_ii1=0;
155 double a_ii2=0;
156 double a_ii3=0;
157
158 dim_t n=A->numRows;
159 dim_t F=W_FC->numRows;
160 dim_t i,j,k;
161
162 dim_t block_size=A->row_block_size;
163
164 index_t iPtr,dptr=0;
165 /*index_t *index, *where_p;*/
166
167 alpha=TMPMEMALLOC(F*block_size,double);
168 beta=TMPMEMALLOC(F*block_size,double);
169
170
171 if (block_size==1) {
172 k=0;
173 for (i = 0; i < n; ++i) {
174 if(marker_F[i]) {
175 alpha[k]=0;
176 beta[k]=0;
177 sum_all_neg=0;
178 sum_all_pos=0;
179 sum_strong_neg=0;
180 sum_strong_pos=0;
181 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
182 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
183 j=A->pattern->index[iPtr];
184 if(j!=i) {
185 if(A->val[iPtr]<0) {
186 sum_all_neg+=A->val[iPtr];
187 if(!marker_F[j]) {
188 sum_strong_neg+=A->val[iPtr];
189 }
190 }
191 else if(A->val[iPtr]>0) {
192 sum_all_pos+=A->val[iPtr];
193 if(!marker_F[j]) {
194 sum_strong_pos+=A->val[iPtr];
195 }
196 }
197 }
198 else {
199 a_ii=A->val[iPtr];
200 dptr=iPtr;
201 }
202 }
203
204 if(sum_strong_neg!=0) {
205 alpha[k]=sum_all_neg/(sum_strong_neg);
206 }
207 if(sum_strong_pos!=0) {
208 beta[k]=sum_all_pos/(sum_strong_pos);
209 }
210 else {
211 a_ii+=sum_all_pos;
212 beta[k]=0;
213 }
214 alpha[k]=alpha[k]/(a_ii);
215 beta[k]=beta[k]/(a_ii);
216 k++;
217 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
218 }
219 }
220 } else if (block_size==2) {
221 k=0;
222 for (i = 0; i < n; ++i) {
223 if(marker_F[i]) {
224 alpha[k*block_size]=0;
225 alpha[k*block_size+1]=0;
226 beta[k*block_size]=0;
227 beta[k*block_size+1]=0;
228 sum_all_neg1=0;
229 sum_all_pos1=0;
230 sum_strong_neg1=0;
231 sum_strong_pos1=0;
232 sum_all_neg2=0;
233 sum_all_pos2=0;
234 sum_strong_neg2=0;
235 sum_strong_pos2=0;
236 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
237 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
238 j=A->pattern->index[iPtr];
239 if(j!=i) {
240 if(A->val[iPtr*block_size*block_size]<0) {
241 sum_all_neg1+=A->val[iPtr*block_size*block_size];
242 if(!marker_F[j]) {
243 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
244 }
245 }
246 else if(A->val[iPtr*block_size*block_size]>0) {
247 sum_all_pos1+=A->val[iPtr*block_size*block_size];
248 if(!marker_F[j]) {
249 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
250 }
251 }
252 if(A->val[iPtr*block_size*block_size+3]<0) {
253 sum_all_neg2+=A->val[iPtr*block_size*block_size+3];
254 if(!marker_F[j]) {
255 sum_strong_neg2+=A->val[iPtr*block_size*block_size+3];
256 }
257 } else if(A->val[iPtr*block_size*block_size+3]>0) {
258 sum_all_pos2+=A->val[iPtr*block_size*block_size+3];
259 if(!marker_F[j]) {
260 sum_strong_pos2+=A->val[iPtr*block_size*block_size+3];
261 }
262 }
263
264 }
265 else {
266 a_ii1=A->val[iPtr*block_size*block_size];
267 a_ii2=A->val[iPtr*block_size*block_size+3];
268 dptr=iPtr;
269 }
270 }
271
272 if(sum_strong_neg1!=0) {
273 alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
274 }
275 if(sum_strong_neg2!=0) {
276 alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
277 }
278
279 if(sum_strong_pos1!=0) {
280 beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
281 }
282 else {
283 a_ii1+=sum_all_pos1;
284 beta[k*block_size]=0;
285 }
286 if(sum_strong_pos2!=0) {
287 beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
288 }
289 else {
290 a_ii2+=sum_all_pos2;
291 beta[k*block_size+1]=0;
292 }
293
294 alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
295 alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
296 beta[k*block_size]=beta[k*block_size]/(a_ii1);
297 beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
298 k++;
299 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
300 }
301 }
302 } else if (block_size==3) {
303 k=0;
304 for (i = 0; i < n; ++i) {
305 if(marker_F[i]) {
306 alpha[k*block_size]=0;
307 alpha[k*block_size+1]=0;
308 alpha[k*block_size+2]=0;
309 beta[k*block_size]=0;
310 beta[k*block_size+1]=0;
311 beta[k*block_size+2]=0;
312 sum_all_neg1=0;
313 sum_all_pos1=0;
314 sum_strong_neg1=0;
315 sum_strong_pos1=0;
316 sum_all_neg2=0;
317 sum_all_pos2=0;
318 sum_strong_neg2=0;
319 sum_strong_pos2=0;
320 sum_all_neg3=0;
321 sum_all_pos3=0;
322 sum_strong_neg3=0;
323 sum_strong_pos3=0;
324 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
325 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
326 j=A->pattern->index[iPtr];
327 if(j!=i) {
328 if(A->val[iPtr*block_size*block_size]<0) {
329 sum_all_neg1+=A->val[iPtr*block_size*block_size];
330 if(!marker_F[j]) {
331 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
332 }
333 }
334 else if(A->val[iPtr*block_size*block_size]>0) {
335 sum_all_pos1+=A->val[iPtr*block_size*block_size];
336 if(!marker_F[j]) {
337 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
338 }
339 }
340 if(A->val[iPtr*block_size*block_size+4]<0) {
341 sum_all_neg2+=A->val[iPtr*block_size*block_size+4];
342 if(!marker_F[j]) {
343 sum_strong_neg2+=A->val[iPtr*block_size*block_size+4];
344 }
345 } else if(A->val[iPtr*block_size*block_size+4]>0) {
346 sum_all_pos2+=A->val[iPtr*block_size*block_size+4];
347 if(!marker_F[j]) {
348 sum_strong_pos2+=A->val[iPtr*block_size*block_size+4];
349 }
350 }
351 if(A->val[iPtr*block_size*block_size+8]<0) {
352 sum_all_neg3+=A->val[iPtr*block_size*block_size+8];
353 if(!marker_F[j]) {
354 sum_strong_neg3+=A->val[iPtr*block_size*block_size+8];
355 }
356 } else if(A->val[iPtr*block_size*block_size+8]>0) {
357 sum_all_pos3+=A->val[iPtr*block_size*block_size+8];
358 if(!marker_F[j]) {
359 sum_strong_pos3+=A->val[iPtr*block_size*block_size+8];
360 }
361 }
362
363
364 }
365 else {
366 a_ii1=A->val[iPtr*block_size*block_size];
367 a_ii2=A->val[iPtr*block_size*block_size+4];
368 a_ii3=A->val[iPtr*block_size*block_size+8];
369 dptr=iPtr;
370 }
371 }
372
373 if(sum_strong_neg1!=0) {
374 alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
375 }
376 if(sum_strong_neg2!=0) {
377 alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
378 }
379 if(sum_strong_neg3!=0) {
380 alpha[k*block_size+2]=sum_all_neg3/(sum_strong_neg3);
381 }
382
383 if(sum_strong_pos1!=0) {
384 beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
385 }
386 else {
387 a_ii1+=sum_all_pos1;
388 beta[k*block_size]=0;
389 }
390 if(sum_strong_pos2!=0) {
391 beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
392 }
393 else {
394 a_ii2+=sum_all_pos2;
395 beta[k*block_size+1]=0;
396 }
397 if(sum_strong_pos3!=0) {
398 beta[k*block_size+2]=sum_all_pos3/(sum_strong_pos3);
399 }
400 else {
401 a_ii3+=sum_all_pos3;
402 beta[k*block_size+2]=0;
403 }
404
405 alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
406 alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
407 alpha[k*block_size+2]=alpha[k*block_size+2]/(a_ii3);
408 beta[k*block_size]=beta[k*block_size]/(a_ii1);
409 beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
410 beta[k*block_size+2]=beta[k*block_size+2]/(a_ii3);
411 k++;
412 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
413 }
414 }
415 }
416
417 if (block_size==1) {
418 #pragma omp parallel for private(i,iPtr) schedule(static)
419 for (i = 0; i < W_FC->numRows; ++i) {
420 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
421 if(W_FC->val[iPtr]<0) {
422 W_FC->val[iPtr]=-alpha[i]*W_FC->val[iPtr];
423 }
424 else if(W_FC->val[iPtr]>0) {
425 W_FC->val[iPtr]=-beta[i]*W_FC->val[iPtr];
426 }
427 }
428 }
429 } else if (block_size==2) {
430 #pragma omp parallel for private(i,iPtr) schedule(static)
431 for (i = 0; i < W_FC->numRows; ++i) {
432 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
433 if(W_FC->val[iPtr*block_size*block_size]<0) {
434 W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
435 }
436 else if(W_FC->val[iPtr*block_size*block_size]>0) {
437 W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
438 }
439 if(W_FC->val[iPtr*block_size*block_size+3]<0) {
440 W_FC->val[iPtr*block_size*block_size+3]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
441 }
442 else if(W_FC->val[iPtr*block_size*block_size+3]>0) {
443 W_FC->val[iPtr*block_size*block_size+3]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
444 }
445 W_FC->val[iPtr*block_size*block_size+1]=0;
446 W_FC->val[iPtr*block_size*block_size+2]=0;
447 }
448 }
449 } else if (block_size==3) {
450 #pragma omp parallel for private(i,iPtr) schedule(static)
451 for (i = 0; i < W_FC->numRows; ++i) {
452 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
453 if(W_FC->val[iPtr*block_size*block_size]<0) {
454 W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
455 }
456 else if(W_FC->val[iPtr*block_size*block_size]>0) {
457 W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
458 }
459 if(W_FC->val[iPtr*block_size*block_size+4]<0) {
460 W_FC->val[iPtr*block_size*block_size+4]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
461 }
462 else if(W_FC->val[iPtr*block_size*block_size+4]>0) {
463 W_FC->val[iPtr*block_size*block_size+4]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
464 }
465 if(W_FC->val[iPtr*block_size*block_size+8]<0) {
466 W_FC->val[iPtr*block_size*block_size+8]=-alpha[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
467 }
468 else if(W_FC->val[iPtr*block_size*block_size+8]>0) {
469 W_FC->val[iPtr*block_size*block_size+8]=-beta[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
470 }
471 W_FC->val[iPtr*block_size*block_size+1]=0;
472 W_FC->val[iPtr*block_size*block_size+2]=0;
473 W_FC->val[iPtr*block_size*block_size+3]=0;
474 W_FC->val[iPtr*block_size*block_size+5]=0;
475 W_FC->val[iPtr*block_size*block_size+6]=0;
476 W_FC->val[iPtr*block_size*block_size+7]=0;
477 }
478 }
479
480 }
481
482
483 TMPMEMFREE(alpha);
484 TMPMEMFREE(beta);
485 }
486
487
488 /*A_c=R*A*P taking into account coarseneing */
489 /*Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix *R, Paso_SparseMatrix *P) {
490
491 Paso_SparseMatrix *temp=NULL;
492 Paso_SparseMatrix *out=NULL;
493
494 temp=Paso_SparseMatrix_MatrixMatrix(A,P);
495 out=Paso_SparseMatrix_MatrixMatrix(R,temp);
496
497 Paso_SparseMatrix_free(temp);
498
499 return out;
500 }
501 */
502
503 Paso_SparseMatrix* Paso_SparseMatrix_MatrixMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix* B) {
504 index_t iptrA,iptrB,iptrC;
505 dim_t i,j=0,k,l;
506 Paso_Pattern* outpattern=NULL;
507 Paso_SparseMatrix *out=NULL;
508 double sum,b_lj=0;
509 double sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,b_lj1=0,b_lj2=0,b_lj3=0,b_lj4=0,b_lj5=0,b_lj6=0,b_lj7=0,b_lj8=0,b_lj9=0;
510
511 dim_t block_size=A->row_block_size;
512
513 double time0=0;
514 bool_t verbose=0;
515
516 time0=Esys_timer();
517
518 outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,B->pattern);
519 out=Paso_SparseMatrix_alloc(A->type, outpattern, block_size, block_size, FALSE);
520
521 time0=Esys_timer()-time0;
522 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Pattern creation: %e\n",time0);
523
524 time0=Esys_timer();
525
526 if(block_size==1) {
527 #pragma omp parallel for private(i,iptrC,j,sum,iptrA,k,b_lj,iptrB,l) schedule(static)
528 for(i = 0; i < out->numRows; i++) {
529 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
530 j = out->pattern->index[iptrC];
531 sum=0;
532 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
533 k=A->pattern->index[iptrA];
534 /*can be replaced by bsearch */
535 b_lj=0;
536 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
537 l=B->pattern->index[iptrB];
538 if(l==j) {
539 b_lj=B->val[iptrB];
540 break;
541 }
542 }
543 sum+=A->val[iptrA]*b_lj;
544 }
545 out->val[iptrC]=sum;
546 }
547 }
548 } else if (block_size==2) {
549 #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,iptrB,l) schedule(static)
550 for(i = 0; i < out->numRows; i++) {
551 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
552 j = out->pattern->index[iptrC];
553 sum1=0;
554 sum2=0;
555 sum3=0;
556 sum4=0;
557 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
558 k=A->pattern->index[iptrA];
559 /*can be replaced by bsearch */
560 b_lj1=0;
561 b_lj2=0;
562 b_lj3=0;
563 b_lj4=0;
564 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
565 l=B->pattern->index[iptrB];
566 if(l==j) {
567 b_lj1=B->val[iptrB*block_size*block_size];
568 b_lj2=B->val[iptrB*block_size*block_size+1];
569 b_lj3=B->val[iptrB*block_size*block_size+2];
570 b_lj4=B->val[iptrB*block_size*block_size+3];
571 break;
572 }
573 }
574 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+2]*b_lj2;
575 sum2+=A->val[iptrA*block_size*block_size]*b_lj3+A->val[iptrA*block_size*block_size+2]*b_lj4;
576 sum3+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2;
577 sum4+=A->val[iptrA*block_size*block_size+1]*b_lj3+A->val[iptrA*block_size*block_size+3]*b_lj4;
578 }
579 out->val[iptrC*block_size*block_size]=sum1;
580 out->val[iptrC*block_size*block_size+2]=sum2;
581 out->val[iptrC*block_size*block_size+1]=sum3;
582 out->val[iptrC*block_size*block_size+3]=sum4;
583 }
584 }
585 } else if (block_size==3) {
586 #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,b_lj5,b_lj6,b_lj7,b_lj8,b_lj9,iptrB,l) schedule(static)
587 for(i = 0; i < out->numRows; i++) {
588 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
589 j = out->pattern->index[iptrC];
590 sum1=0;
591 sum2=0;
592 sum3=0;
593 sum4=0;
594 sum5=0;
595 sum6=0;
596 sum7=0;
597 sum8=0;
598 sum9=0;
599 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
600 k=A->pattern->index[iptrA];
601 /*can be replaced by bsearch */
602 b_lj1=0;
603 b_lj2=0;
604 b_lj3=0;
605 b_lj4=0;
606 b_lj5=0;
607 b_lj6=0;
608 b_lj7=0;
609 b_lj8=0;
610 b_lj9=0;
611 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
612 l=B->pattern->index[iptrB];
613 if(l==j) {
614 b_lj1=B->val[iptrB*block_size*block_size];
615 b_lj2=B->val[iptrB*block_size*block_size+1];
616 b_lj3=B->val[iptrB*block_size*block_size+2];
617 b_lj4=B->val[iptrB*block_size*block_size+3];
618 b_lj5=B->val[iptrB*block_size*block_size+4];
619 b_lj6=B->val[iptrB*block_size*block_size+5];
620 b_lj7=B->val[iptrB*block_size*block_size+6];
621 b_lj8=B->val[iptrB*block_size*block_size+7];
622 b_lj9=B->val[iptrB*block_size*block_size+8];
623 break;
624 }
625 }
626 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2+A->val[iptrA*block_size*block_size+6]*b_lj3;
627 sum2+=A->val[iptrA*block_size*block_size]*b_lj4+A->val[iptrA*block_size*block_size+3]*b_lj5+A->val[iptrA*block_size*block_size+6]*b_lj6;
628 sum3+=A->val[iptrA*block_size*block_size]*b_lj7+A->val[iptrA*block_size*block_size+3]*b_lj8+A->val[iptrA*block_size*block_size+6]*b_lj9;
629 sum4+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+4]*b_lj2+A->val[iptrA*block_size*block_size+7]*b_lj3;
630 sum5+=A->val[iptrA*block_size*block_size+1]*b_lj4+A->val[iptrA*block_size*block_size+4]*b_lj5+A->val[iptrA*block_size*block_size+7]*b_lj6;
631 sum6+=A->val[iptrA*block_size*block_size+1]*b_lj7+A->val[iptrA*block_size*block_size+4]*b_lj8+A->val[iptrA*block_size*block_size+7]*b_lj9;
632 sum7+=A->val[iptrA*block_size*block_size+2]*b_lj1+A->val[iptrA*block_size*block_size+5]*b_lj2+A->val[iptrA*block_size*block_size+8]*b_lj3;
633 sum8+=A->val[iptrA*block_size*block_size+2]*b_lj4+A->val[iptrA*block_size*block_size+5]*b_lj5+A->val[iptrA*block_size*block_size+8]*b_lj6;
634 sum9+=A->val[iptrA*block_size*block_size+2]*b_lj7+A->val[iptrA*block_size*block_size+5]*b_lj8+A->val[iptrA*block_size*block_size+8]*b_lj9;
635 }
636 out->val[iptrC*block_size*block_size]=sum1;
637 out->val[iptrC*block_size*block_size+3]=sum2;
638 out->val[iptrC*block_size*block_size+6]=sum3;
639 out->val[iptrC*block_size*block_size+1]=sum4;
640 out->val[iptrC*block_size*block_size+4]=sum5;
641 out->val[iptrC*block_size*block_size+7]=sum6;
642 out->val[iptrC*block_size*block_size+2]=sum7;
643 out->val[iptrC*block_size*block_size+5]=sum8;
644 out->val[iptrC*block_size*block_size+8]=sum9;
645 }
646 }
647
648 }
649
650 time0=Esys_timer()-time0;
651 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Matrix multiplication: %e\n",time0);
652
653 Paso_Pattern_free(outpattern);
654
655 return out;
656 }
657
658 /* for block_size=1 only */
659 Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A,Paso_SparseMatrix *R,Paso_SparseMatrix *P) {
660
661 index_t iptrA_c,iptrR,iptrP,iptrA;
662 dim_t i,j,k,l,m;
663 double first_sum=0,second_sum=0,p_lj=0,a_kl=0,r_ik=0;
664 Paso_Pattern* temp=NULL;
665 Paso_Pattern* outpattern=NULL;
666 Paso_SparseMatrix *A_c=NULL;
667
668 double time0=0;
669 bool_t verbose=0;
670
671 time0=Esys_timer();
672
673 temp=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,P->pattern);
674 outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,R->pattern,temp);
675 A_c=Paso_SparseMatrix_alloc(A->type,outpattern,1,1, TRUE);
676
677 time0=Esys_timer()-time0;
678 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Pattern creation: %e\n",time0);
679
680 /*a^c_ij=sum_k^n(r_ik)sum_l^n(a_kl*P_lj)*/
681
682 time0=Esys_timer();
683
684 #pragma omp parallel for private(i,iptrA_c,j,second_sum,iptrR,k,first_sum,p_lj,iptrP,m,a_kl,r_ik) schedule(static)
685 for(i = 0; i < A_c->numRows; i++) {
686 for(iptrA_c = A_c->pattern->ptr[i]; iptrA_c < A_c->pattern->ptr[i+1]; ++iptrA_c) {
687 j = A_c->pattern->index[iptrA_c];
688 second_sum=0;
689 for(iptrR = R->pattern->ptr[i]; iptrR < R->pattern->ptr[i+1]; ++iptrR) {
690 k = R->pattern->index[iptrR];
691 first_sum=0;
692 for(iptrA = A->pattern->ptr[k]; iptrA < A->pattern->ptr[k+1]; ++iptrA) {
693 l= A->pattern->index[iptrA];
694 p_lj=0;
695 /*This loop can be replaced by bsearch */
696 for(iptrP = P->pattern->ptr[l]; iptrP < P->pattern->ptr[l+1]; ++iptrP) {
697 m=P->pattern->index[iptrP];
698 if(m==j) {
699 p_lj=P->val[iptrP];
700 break;
701 }
702 }
703 a_kl=A->val[iptrA];
704 first_sum+=a_kl*p_lj;
705 }
706 r_ik=R->val[iptrR];
707 second_sum+=r_ik*first_sum;
708 }
709 A_c->val[iptrA_c]=second_sum;
710 }
711 }
712 time0=Esys_timer()-time0;
713 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Matrix multiplication: %e\n",time0);
714
715 Paso_Pattern_free(outpattern);
716 Paso_Pattern_free(temp);
717
718 return A_c;
719 }
720
721

  ViewVC Help
Powered by ViewVC 1.1.26