/[escript]/trunk/paso/src/SparseMatrix_AMGcomponents.c
ViewVC logotype

Contents of /trunk/paso/src/SparseMatrix_AMGcomponents.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (8 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 36106 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: SparseMatrix_AMGcomponents */
18
19 /**************************************************************/
20
21 /* Author: Artak Amirbekyan, artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "SparseMatrix.h"
27 #include "PasoUtil.h"
28
29 /**************************************************************
30
31 Methods nessecary for AMG preconditioner
32
33 */
34
35 /* Prolongation matrix is constracted by W_FC as P<- [W_FC]
36 [I_CC]
37 */
38
39 Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* mis_marker){
40
41 Paso_Pattern *outpattern=NULL;
42 Paso_SparseMatrix *out=NULL;
43 index_t iptr,wptr;
44
45 Paso_IndexList* index_list=NULL;
46 dim_t n=W->numRows+W->numCols;
47 dim_t i,j,k=0;
48 dim_t block_size=W->row_block_size;
49
50 index_list=TMPMEMALLOC(n,Paso_IndexList);
51 if (! Esys_checkPtr(index_list)) {
52 #pragma omp parallel for private(i) schedule(static)
53 for(i=0;i<n;++i) {
54 index_list[i].extension=NULL;
55 index_list[i].n=0;
56 }
57 }
58
59
60 for (i=0;i<n;++i) {
61 if (mis_marker[i]) {
62 for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) {
63 j=W->pattern->index[iptr];
64 Paso_IndexList_insertIndex(&(index_list[i]),j);
65 }
66 k++;
67 }
68 else {
69 Paso_IndexList_insertIndex(&(index_list[i]),i-k);
70 }
71 }
72
73 outpattern=Paso_IndexList_createPattern(0, n,index_list,0,W->numCols,0);
74 out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE);
75
76 k=0;
77
78 if (block_size==1) {
79 for (i=0;i<n;++i) {
80 if (mis_marker[i]) {
81 wptr=W->pattern->ptr[k];
82 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
83 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
84 wptr++;
85 }
86 k++;
87 }
88 else {
89 iptr=out->pattern->ptr[i];
90 out->val[iptr*block_size*block_size]=1;
91 }
92 }
93 } else if (block_size==2) {
94 for (i=0;i<n;++i) {
95 if (mis_marker[i]) {
96 wptr=W->pattern->ptr[k];
97 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
98 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
99 out->val[iptr*block_size*block_size+1]=0;
100 out->val[iptr*block_size*block_size+2]=0;
101 out->val[iptr*block_size*block_size+3]=W->val[wptr*block_size*block_size+3];
102 wptr++;
103 }
104 k++;
105 }
106 else {
107 iptr=out->pattern->ptr[i];
108 out->val[iptr*block_size*block_size]=1;
109 out->val[iptr*block_size*block_size+1]=0;
110 out->val[iptr*block_size*block_size+2]=0;
111 out->val[iptr*block_size*block_size+3]=1;
112 }
113 }
114 } else if (block_size==3) {
115 for (i=0;i<n;++i) {
116 if (mis_marker[i]) {
117 wptr=W->pattern->ptr[k];
118 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
119 out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
120 out->val[iptr*block_size*block_size+1]=0;
121 out->val[iptr*block_size*block_size+2]=0;
122 out->val[iptr*block_size*block_size+3]=0;
123 out->val[iptr*block_size*block_size+4]=W->val[wptr*block_size*block_size+4];
124 out->val[iptr*block_size*block_size+5]=0;
125 out->val[iptr*block_size*block_size+6]=0;
126 out->val[iptr*block_size*block_size+7]=0;
127 out->val[iptr*block_size*block_size+8]=W->val[wptr*block_size*block_size+8];
128 wptr++;
129 }
130 k++;
131 }
132 else {
133 iptr=out->pattern->ptr[i];
134 out->val[iptr*block_size*block_size]=1;
135 out->val[iptr*block_size*block_size+1]=0;
136 out->val[iptr*block_size*block_size+2]=0;
137 out->val[iptr*block_size*block_size+3]=0;
138 out->val[iptr*block_size*block_size+4]=1;
139 out->val[iptr*block_size*block_size+5]=0;
140 out->val[iptr*block_size*block_size+6]=0;
141 out->val[iptr*block_size*block_size+7]=0;
142 out->val[iptr*block_size*block_size+8]=1;
143 }
144 }
145 }
146
147 /* clean up */
148 if (index_list!=NULL) {
149 #pragma omp parallel for private(i) schedule(static)
150 for(i=0;i<n;++i) Paso_IndexList_free(index_list[i].extension);
151 }
152 TMPMEMFREE(index_list);
153 Paso_Pattern_free(outpattern);
154 return out;
155 }
156
157
158 /* Restriction matrix R=P^T */
159
160 Paso_SparseMatrix* Paso_SparseMatrix_getRestriction(Paso_SparseMatrix* P){
161
162 Paso_Pattern *outpattern=NULL;
163 Paso_SparseMatrix *out=NULL;
164
165 Paso_IndexList* index_list=NULL;
166 dim_t C=P->numCols;
167 dim_t F=P->numRows-C;
168 dim_t n=C+F;
169 dim_t block_size=P->row_block_size;
170 dim_t i,j,k=0;
171 index_t iptr,jptr;
172
173 index_list=TMPMEMALLOC(C,Paso_IndexList);
174 if (! Esys_checkPtr(index_list)) {
175 #pragma omp parallel for private(i) schedule(static)
176 for(i=0;i<C;++i) {
177 index_list[i].extension=NULL;
178 index_list[i].n=0;
179 }
180 }
181
182
183 for (i=0;i<n;++i) {
184 for (iptr=P->pattern->ptr[i];iptr<P->pattern->ptr[i+1]; ++iptr) {
185 j=P->pattern->index[iptr];
186 Paso_IndexList_insertIndex(&(index_list[j]),i);
187 }
188 }
189
190 outpattern=Paso_IndexList_createPattern(0, C,index_list,0,C+F,0);
191 out=Paso_SparseMatrix_alloc(P->type,outpattern,block_size,block_size,FALSE);
192
193
194 if (block_size==1) {
195 for (i=0;i<out->numRows;++i) {
196 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
197 j=out->pattern->index[iptr];
198 /*This can be replaced by bsearch!!*/
199 for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) {
200 k=P->pattern->index[jptr];
201 if(k==i) {
202 out->val[iptr]=P->val[jptr];
203 }
204 }
205 }
206 }
207 } else if (block_size==2) {
208 for (i=0;i<out->numRows;++i) {
209 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
210 j=out->pattern->index[iptr];
211 /*This can be replaced by bsearch!!*/
212 for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) {
213 k=P->pattern->index[jptr];
214 if(k==i) {
215 out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size];
216 out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+2];
217 out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+1];
218 out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+3];
219 }
220 }
221 }
222 }
223 } else if (block_size==3) {
224 for (i=0;i<out->numRows;++i) {
225 for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
226 j=out->pattern->index[iptr];
227 /*This can be replaced by bsearch!!*/
228 for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) {
229 k=P->pattern->index[jptr];
230 if(k==i) {
231 out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size];
232 out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+3];
233 out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+6];
234 out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+1];
235 out->val[iptr*block_size*block_size+4]=P->val[jptr*block_size*block_size+4];
236 out->val[iptr*block_size*block_size+5]=P->val[jptr*block_size*block_size+7];
237 out->val[iptr*block_size*block_size+6]=P->val[jptr*block_size*block_size+2];
238 out->val[iptr*block_size*block_size+7]=P->val[jptr*block_size*block_size+5];
239 out->val[iptr*block_size*block_size+8]=P->val[jptr*block_size*block_size+8];
240 }
241 }
242 }
243 }
244 }
245
246 /* clean up */
247 if (index_list!=NULL) {
248 #pragma omp parallel for private(i) schedule(static)
249 for(i=0;i<C;++i) Paso_IndexList_free(index_list[i].extension);
250 }
251 TMPMEMFREE(index_list);
252 Paso_Pattern_free(outpattern);
253 return out;
254 }
255
256
257 void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* mis_marker){
258
259 double *alpha;
260 double *beta;
261 double a_ii=0;
262 double sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos;
263 double sum_all_neg1,sum_all_pos1,sum_strong_neg1,sum_strong_pos1,sum_all_neg2,sum_all_pos2,sum_strong_neg2,sum_strong_pos2,sum_all_neg3,sum_all_pos3,sum_strong_neg3,sum_strong_pos3;
264 double a_ii1=0;
265 double a_ii2=0;
266 double a_ii3=0;
267
268 dim_t n=A->numRows;
269 dim_t F=W_FC->numRows;
270 dim_t i,j,k;
271
272 dim_t block_size=A->row_block_size;
273
274 index_t iPtr,dptr=0;
275 /*index_t *index, *where_p;*/
276
277 alpha=TMPMEMALLOC(F*block_size,double);
278 beta=TMPMEMALLOC(F*block_size,double);
279
280
281 if (block_size==1) {
282 k=0;
283 for (i = 0; i < n; ++i) {
284 if(mis_marker[i]) {
285 alpha[k]=0;
286 beta[k]=0;
287 sum_all_neg=0;
288 sum_all_pos=0;
289 sum_strong_neg=0;
290 sum_strong_pos=0;
291 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
292 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
293 j=A->pattern->index[iPtr];
294 if(j!=i) {
295 if(A->val[iPtr]<0) {
296 sum_all_neg+=A->val[iPtr];
297 if(!mis_marker[j]) {
298 sum_strong_neg+=A->val[iPtr];
299 }
300 }
301 else if(A->val[iPtr]>0) {
302 sum_all_pos+=A->val[iPtr];
303 if(!mis_marker[j]) {
304 sum_strong_pos+=A->val[iPtr];
305 }
306 }
307 }
308 else {
309 a_ii=A->val[iPtr];
310 dptr=iPtr;
311 }
312 }
313
314 if(sum_strong_neg!=0) {
315 alpha[k]=sum_all_neg/(sum_strong_neg);
316 }
317 if(sum_strong_pos!=0) {
318 beta[k]=sum_all_pos/(sum_strong_pos);
319 }
320 else {
321 a_ii+=sum_all_pos;
322 beta[k]=0;
323 }
324 alpha[k]=alpha[k]/(a_ii);
325 beta[k]=beta[k]/(a_ii);
326 k++;
327 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
328 }
329 }
330 } else if (block_size==2) {
331 k=0;
332 for (i = 0; i < n; ++i) {
333 if(mis_marker[i]) {
334 alpha[k*block_size]=0;
335 alpha[k*block_size+1]=0;
336 beta[k*block_size]=0;
337 beta[k*block_size+1]=0;
338 sum_all_neg1=0;
339 sum_all_pos1=0;
340 sum_strong_neg1=0;
341 sum_strong_pos1=0;
342 sum_all_neg2=0;
343 sum_all_pos2=0;
344 sum_strong_neg2=0;
345 sum_strong_pos2=0;
346 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
347 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
348 j=A->pattern->index[iPtr];
349 if(j!=i) {
350 if(A->val[iPtr*block_size*block_size]<0) {
351 sum_all_neg1+=A->val[iPtr*block_size*block_size];
352 if(!mis_marker[j]) {
353 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
354 }
355 }
356 else if(A->val[iPtr*block_size*block_size]>0) {
357 sum_all_pos1+=A->val[iPtr*block_size*block_size];
358 if(!mis_marker[j]) {
359 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
360 }
361 }
362 if(A->val[iPtr*block_size*block_size+3]<0) {
363 sum_all_neg2+=A->val[iPtr*block_size*block_size+3];
364 if(!mis_marker[j]) {
365 sum_strong_neg2+=A->val[iPtr*block_size*block_size+3];
366 }
367 } else if(A->val[iPtr*block_size*block_size+3]>0) {
368 sum_all_pos2+=A->val[iPtr*block_size*block_size+3];
369 if(!mis_marker[j]) {
370 sum_strong_pos2+=A->val[iPtr*block_size*block_size+3];
371 }
372 }
373
374 }
375 else {
376 a_ii1=A->val[iPtr*block_size*block_size];
377 a_ii2=A->val[iPtr*block_size*block_size+3];
378 dptr=iPtr;
379 }
380 }
381
382 if(sum_strong_neg1!=0) {
383 alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
384 }
385 if(sum_strong_neg2!=0) {
386 alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
387 }
388
389 if(sum_strong_pos1!=0) {
390 beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
391 }
392 else {
393 a_ii1+=sum_all_pos1;
394 beta[k*block_size]=0;
395 }
396 if(sum_strong_pos2!=0) {
397 beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
398 }
399 else {
400 a_ii2+=sum_all_pos2;
401 beta[k*block_size+1]=0;
402 }
403
404 alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
405 alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
406 beta[k*block_size]=beta[k*block_size]/(a_ii1);
407 beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
408 k++;
409 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
410 }
411 }
412 } else if (block_size==3) {
413 k=0;
414 for (i = 0; i < n; ++i) {
415 if(mis_marker[i]) {
416 alpha[k*block_size]=0;
417 alpha[k*block_size+1]=0;
418 alpha[k*block_size+2]=0;
419 beta[k*block_size]=0;
420 beta[k*block_size+1]=0;
421 beta[k*block_size+2]=0;
422 sum_all_neg1=0;
423 sum_all_pos1=0;
424 sum_strong_neg1=0;
425 sum_strong_pos1=0;
426 sum_all_neg2=0;
427 sum_all_pos2=0;
428 sum_strong_neg2=0;
429 sum_strong_pos2=0;
430 sum_all_neg3=0;
431 sum_all_pos3=0;
432 sum_strong_neg3=0;
433 sum_strong_pos3=0;
434 /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
435 for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
436 j=A->pattern->index[iPtr];
437 if(j!=i) {
438 if(A->val[iPtr*block_size*block_size]<0) {
439 sum_all_neg1+=A->val[iPtr*block_size*block_size];
440 if(!mis_marker[j]) {
441 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
442 }
443 }
444 else if(A->val[iPtr*block_size*block_size]>0) {
445 sum_all_pos1+=A->val[iPtr*block_size*block_size];
446 if(!mis_marker[j]) {
447 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
448 }
449 }
450 if(A->val[iPtr*block_size*block_size+4]<0) {
451 sum_all_neg2+=A->val[iPtr*block_size*block_size+4];
452 if(!mis_marker[j]) {
453 sum_strong_neg2+=A->val[iPtr*block_size*block_size+4];
454 }
455 } else if(A->val[iPtr*block_size*block_size+4]>0) {
456 sum_all_pos2+=A->val[iPtr*block_size*block_size+4];
457 if(!mis_marker[j]) {
458 sum_strong_pos2+=A->val[iPtr*block_size*block_size+4];
459 }
460 }
461 if(A->val[iPtr*block_size*block_size+8]<0) {
462 sum_all_neg3+=A->val[iPtr*block_size*block_size+8];
463 if(!mis_marker[j]) {
464 sum_strong_neg3+=A->val[iPtr*block_size*block_size+8];
465 }
466 } else if(A->val[iPtr*block_size*block_size+8]>0) {
467 sum_all_pos3+=A->val[iPtr*block_size*block_size+8];
468 if(!mis_marker[j]) {
469 sum_strong_pos3+=A->val[iPtr*block_size*block_size+8];
470 }
471 }
472
473
474 }
475 else {
476 a_ii1=A->val[iPtr*block_size*block_size];
477 a_ii2=A->val[iPtr*block_size*block_size+4];
478 a_ii3=A->val[iPtr*block_size*block_size+8];
479 dptr=iPtr;
480 }
481 }
482
483 if(sum_strong_neg1!=0) {
484 alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
485 }
486 if(sum_strong_neg2!=0) {
487 alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
488 }
489 if(sum_strong_neg3!=0) {
490 alpha[k*block_size+2]=sum_all_neg3/(sum_strong_neg3);
491 }
492
493 if(sum_strong_pos1!=0) {
494 beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
495 }
496 else {
497 a_ii1+=sum_all_pos1;
498 beta[k*block_size]=0;
499 }
500 if(sum_strong_pos2!=0) {
501 beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
502 }
503 else {
504 a_ii2+=sum_all_pos2;
505 beta[k*block_size+1]=0;
506 }
507 if(sum_strong_pos3!=0) {
508 beta[k*block_size+2]=sum_all_pos3/(sum_strong_pos3);
509 }
510 else {
511 a_ii3+=sum_all_pos3;
512 beta[k*block_size+2]=0;
513 }
514
515 alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
516 alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
517 alpha[k*block_size+2]=alpha[k*block_size+2]/(a_ii3);
518 beta[k*block_size]=beta[k*block_size]/(a_ii1);
519 beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
520 beta[k*block_size+2]=beta[k*block_size+2]/(a_ii3);
521 k++;
522 /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
523 }
524 }
525 }
526
527 if (block_size==1) {
528 #pragma omp parallel for private(i,iPtr) schedule(static)
529 for (i = 0; i < W_FC->numRows; ++i) {
530 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
531 if(W_FC->val[iPtr]<0) {
532 W_FC->val[iPtr]=-alpha[i]*W_FC->val[iPtr];
533 }
534 else if(W_FC->val[iPtr]>0) {
535 W_FC->val[iPtr]=-beta[i]*W_FC->val[iPtr];
536 }
537 }
538 }
539 } else if (block_size==2) {
540 #pragma omp parallel for private(i,iPtr) schedule(static)
541 for (i = 0; i < W_FC->numRows; ++i) {
542 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
543 if(W_FC->val[iPtr*block_size*block_size]<0) {
544 W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
545 }
546 else if(W_FC->val[iPtr*block_size*block_size]>0) {
547 W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
548 }
549 if(W_FC->val[iPtr*block_size*block_size+3]<0) {
550 W_FC->val[iPtr*block_size*block_size+3]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
551 }
552 else if(W_FC->val[iPtr*block_size*block_size+3]>0) {
553 W_FC->val[iPtr*block_size*block_size+3]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
554 }
555 W_FC->val[iPtr*block_size*block_size+1]=0;
556 W_FC->val[iPtr*block_size*block_size+2]=0;
557 }
558 }
559 } else if (block_size==3) {
560 #pragma omp parallel for private(i,iPtr) schedule(static)
561 for (i = 0; i < W_FC->numRows; ++i) {
562 for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
563 if(W_FC->val[iPtr*block_size*block_size]<0) {
564 W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
565 }
566 else if(W_FC->val[iPtr*block_size*block_size]>0) {
567 W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
568 }
569 if(W_FC->val[iPtr*block_size*block_size+4]<0) {
570 W_FC->val[iPtr*block_size*block_size+4]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
571 }
572 else if(W_FC->val[iPtr*block_size*block_size+4]>0) {
573 W_FC->val[iPtr*block_size*block_size+4]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
574 }
575 if(W_FC->val[iPtr*block_size*block_size+8]<0) {
576 W_FC->val[iPtr*block_size*block_size+8]=-alpha[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
577 }
578 else if(W_FC->val[iPtr*block_size*block_size+8]>0) {
579 W_FC->val[iPtr*block_size*block_size+8]=-beta[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
580 }
581 W_FC->val[iPtr*block_size*block_size+1]=0;
582 W_FC->val[iPtr*block_size*block_size+2]=0;
583 W_FC->val[iPtr*block_size*block_size+3]=0;
584 W_FC->val[iPtr*block_size*block_size+5]=0;
585 W_FC->val[iPtr*block_size*block_size+6]=0;
586 W_FC->val[iPtr*block_size*block_size+7]=0;
587 }
588 }
589
590 }
591
592
593 TMPMEMFREE(alpha);
594 TMPMEMFREE(beta);
595 }
596
597
598 /*A_c=R*A*P taking into account coarseneing */
599 /*Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix *R, Paso_SparseMatrix *P) {
600
601 Paso_SparseMatrix *temp=NULL;
602 Paso_SparseMatrix *out=NULL;
603
604 temp=Paso_SparseMatrix_MatrixMatrix(A,P);
605 out=Paso_SparseMatrix_MatrixMatrix(R,temp);
606
607 Paso_SparseMatrix_free(temp);
608
609 return out;
610 }
611 */
612
613 Paso_SparseMatrix* Paso_SparseMatrix_MatrixMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix* B) {
614 index_t iptrA,iptrB,iptrC;
615 dim_t i,j=0,k,l;
616 Paso_Pattern* outpattern=NULL;
617 Paso_SparseMatrix *out=NULL;
618 double sum,b_lj=0;
619 double sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,b_lj1=0,b_lj2=0,b_lj3=0,b_lj4=0,b_lj5=0,b_lj6=0,b_lj7=0,b_lj8=0,b_lj9=0;
620
621 dim_t block_size=A->row_block_size;
622
623 double time0=0;
624 bool_t verbose=0;
625
626 time0=Esys_timer();
627
628 outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,B->pattern);
629 out=Paso_SparseMatrix_alloc(A->type, outpattern, block_size, block_size, FALSE);
630
631 time0=Esys_timer()-time0;
632 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Pattern creation: %e\n",time0);
633
634 time0=Esys_timer();
635
636 if(block_size==1) {
637 #pragma omp parallel for private(i,iptrC,j,sum,iptrA,k,b_lj,iptrB,l) schedule(static)
638 for(i = 0; i < out->numRows; i++) {
639 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
640 j = out->pattern->index[iptrC];
641 sum=0;
642 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
643 k=A->pattern->index[iptrA];
644 /*can be replaced by bsearch */
645 b_lj=0;
646 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
647 l=B->pattern->index[iptrB];
648 if(l==j) {
649 b_lj=B->val[iptrB];
650 break;
651 }
652 }
653 sum+=A->val[iptrA]*b_lj;
654 }
655 out->val[iptrC]=sum;
656 }
657 }
658 } else if (block_size==2) {
659 #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,iptrB,l) schedule(static)
660 for(i = 0; i < out->numRows; i++) {
661 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
662 j = out->pattern->index[iptrC];
663 sum1=0;
664 sum2=0;
665 sum3=0;
666 sum4=0;
667 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
668 k=A->pattern->index[iptrA];
669 /*can be replaced by bsearch */
670 b_lj1=0;
671 b_lj2=0;
672 b_lj3=0;
673 b_lj4=0;
674 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
675 l=B->pattern->index[iptrB];
676 if(l==j) {
677 b_lj1=B->val[iptrB*block_size*block_size];
678 b_lj2=B->val[iptrB*block_size*block_size+1];
679 b_lj3=B->val[iptrB*block_size*block_size+2];
680 b_lj4=B->val[iptrB*block_size*block_size+3];
681 break;
682 }
683 }
684 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+2]*b_lj2;
685 sum2+=A->val[iptrA*block_size*block_size]*b_lj3+A->val[iptrA*block_size*block_size+2]*b_lj4;
686 sum3+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2;
687 sum4+=A->val[iptrA*block_size*block_size+1]*b_lj3+A->val[iptrA*block_size*block_size+3]*b_lj4;
688 }
689 out->val[iptrC*block_size*block_size]=sum1;
690 out->val[iptrC*block_size*block_size+2]=sum2;
691 out->val[iptrC*block_size*block_size+1]=sum3;
692 out->val[iptrC*block_size*block_size+3]=sum4;
693 }
694 }
695 } else if (block_size==3) {
696 #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,b_lj5,b_lj6,b_lj7,b_lj8,b_lj9,iptrB,l) schedule(static)
697 for(i = 0; i < out->numRows; i++) {
698 for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
699 j = out->pattern->index[iptrC];
700 sum1=0;
701 sum2=0;
702 sum3=0;
703 sum4=0;
704 sum5=0;
705 sum6=0;
706 sum7=0;
707 sum8=0;
708 sum9=0;
709 for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
710 k=A->pattern->index[iptrA];
711 /*can be replaced by bsearch */
712 b_lj1=0;
713 b_lj2=0;
714 b_lj3=0;
715 b_lj4=0;
716 b_lj5=0;
717 b_lj6=0;
718 b_lj7=0;
719 b_lj8=0;
720 b_lj9=0;
721 for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
722 l=B->pattern->index[iptrB];
723 if(l==j) {
724 b_lj1=B->val[iptrB*block_size*block_size];
725 b_lj2=B->val[iptrB*block_size*block_size+1];
726 b_lj3=B->val[iptrB*block_size*block_size+2];
727 b_lj4=B->val[iptrB*block_size*block_size+3];
728 b_lj5=B->val[iptrB*block_size*block_size+4];
729 b_lj6=B->val[iptrB*block_size*block_size+5];
730 b_lj7=B->val[iptrB*block_size*block_size+6];
731 b_lj8=B->val[iptrB*block_size*block_size+7];
732 b_lj9=B->val[iptrB*block_size*block_size+8];
733 break;
734 }
735 }
736 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2+A->val[iptrA*block_size*block_size+6]*b_lj3;
737 sum2+=A->val[iptrA*block_size*block_size]*b_lj4+A->val[iptrA*block_size*block_size+3]*b_lj5+A->val[iptrA*block_size*block_size+6]*b_lj6;
738 sum3+=A->val[iptrA*block_size*block_size]*b_lj7+A->val[iptrA*block_size*block_size+3]*b_lj8+A->val[iptrA*block_size*block_size+6]*b_lj9;
739 sum4+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+4]*b_lj2+A->val[iptrA*block_size*block_size+7]*b_lj3;
740 sum5+=A->val[iptrA*block_size*block_size+1]*b_lj4+A->val[iptrA*block_size*block_size+4]*b_lj5+A->val[iptrA*block_size*block_size+7]*b_lj6;
741 sum6+=A->val[iptrA*block_size*block_size+1]*b_lj7+A->val[iptrA*block_size*block_size+4]*b_lj8+A->val[iptrA*block_size*block_size+7]*b_lj9;
742 sum7+=A->val[iptrA*block_size*block_size+2]*b_lj1+A->val[iptrA*block_size*block_size+5]*b_lj2+A->val[iptrA*block_size*block_size+8]*b_lj3;
743 sum8+=A->val[iptrA*block_size*block_size+2]*b_lj4+A->val[iptrA*block_size*block_size+5]*b_lj5+A->val[iptrA*block_size*block_size+8]*b_lj6;
744 sum9+=A->val[iptrA*block_size*block_size+2]*b_lj7+A->val[iptrA*block_size*block_size+5]*b_lj8+A->val[iptrA*block_size*block_size+8]*b_lj9;
745 }
746 out->val[iptrC*block_size*block_size]=sum1;
747 out->val[iptrC*block_size*block_size+3]=sum2;
748 out->val[iptrC*block_size*block_size+6]=sum3;
749 out->val[iptrC*block_size*block_size+1]=sum4;
750 out->val[iptrC*block_size*block_size+4]=sum5;
751 out->val[iptrC*block_size*block_size+7]=sum6;
752 out->val[iptrC*block_size*block_size+2]=sum7;
753 out->val[iptrC*block_size*block_size+5]=sum8;
754 out->val[iptrC*block_size*block_size+8]=sum9;
755 }
756 }
757
758 }
759
760 time0=Esys_timer()-time0;
761 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Matrix multiplication: %e\n",time0);
762
763 Paso_Pattern_free(outpattern);
764
765 return out;
766 }
767
768 /* for block_size=1 only */
769 Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A,Paso_SparseMatrix *R,Paso_SparseMatrix *P) {
770
771 index_t iptrA_c,iptrR,iptrP,iptrA;
772 dim_t i,j,k,l,m;
773 double first_sum=0,second_sum=0,p_lj=0,a_kl=0,r_ik=0;
774 Paso_Pattern* temp=NULL;
775 Paso_Pattern* outpattern=NULL;
776 Paso_SparseMatrix *A_c=NULL;
777
778 double time0=0;
779 bool_t verbose=0;
780
781 time0=Esys_timer();
782
783 temp=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,P->pattern);
784 outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,R->pattern,temp);
785 A_c=Paso_SparseMatrix_alloc(A->type,outpattern,1,1, TRUE);
786
787 time0=Esys_timer()-time0;
788 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Pattern creation: %e\n",time0);
789
790 /*a^c_ij=sum_k^n(r_ik)sum_l^n(a_kl*P_lj)*/
791
792 time0=Esys_timer();
793
794 #pragma omp parallel for private(i,iptrA_c,j,second_sum,iptrR,k,first_sum,p_lj,iptrP,m,a_kl,r_ik) schedule(static)
795 for(i = 0; i < A_c->numRows; i++) {
796 for(iptrA_c = A_c->pattern->ptr[i]; iptrA_c < A_c->pattern->ptr[i+1]; ++iptrA_c) {
797 j = A_c->pattern->index[iptrA_c];
798 second_sum=0;
799 for(iptrR = R->pattern->ptr[i]; iptrR < R->pattern->ptr[i+1]; ++iptrR) {
800 k = R->pattern->index[iptrR];
801 first_sum=0;
802 for(iptrA = A->pattern->ptr[k]; iptrA < A->pattern->ptr[k+1]; ++iptrA) {
803 l= A->pattern->index[iptrA];
804 p_lj=0;
805 /*This loop can be replaced by bsearch */
806 for(iptrP = P->pattern->ptr[l]; iptrP < P->pattern->ptr[l+1]; ++iptrP) {
807 m=P->pattern->index[iptrP];
808 if(m==j) {
809 p_lj=P->val[iptrP];
810 break;
811 }
812 }
813 a_kl=A->val[iptrA];
814 first_sum+=a_kl*p_lj;
815 }
816 r_ik=R->val[iptrR];
817 second_sum+=r_ik*first_sum;
818 }
819 A_c->val[iptrA_c]=second_sum;
820 }
821 }
822 time0=Esys_timer()-time0;
823 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Matrix multiplication: %e\n",time0);
824
825 Paso_Pattern_free(outpattern);
826 Paso_Pattern_free(temp);
827
828 return A_c;
829 }
830
831

  ViewVC Help
Powered by ViewVC 1.1.26