/[escript]/trunk/paso/src/SparseMatrix_AMGcomponents.c
ViewVC logotype

Annotation of /trunk/paso/src/SparseMatrix_AMGcomponents.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (hide annotations)
Mon Oct 18 22:39:28 2010 UTC (10 years, 6 months ago) by gross
File MIME type: text/plain
File size: 31265 byte(s)
AMG reengineered, BUG is Smoother fixed.


1 artak 2759
2     /*******************************************************
3     *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 artak 2759 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: SparseMatrix_AMGcomponents */
18    
19     /**************************************************************/
20    
21     /* Author: Artak Amirbekyan, artak@uq.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "SparseMatrix.h"
27     #include "PasoUtil.h"
28    
29     /**************************************************************
30    
31     Methods nessecary for AMG preconditioner
32    
33     */
34    
35     /* Prolongation matrix is constracted by W_FC as P<- [W_FC]
36     [I_CC]
37     */
38    
39 gross 3283 Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* marker_F){
40 artak 2759
41     Paso_Pattern *outpattern=NULL;
42     Paso_SparseMatrix *out=NULL;
43     index_t iptr,wptr;
44    
45 gross 3283 const dim_t n=W->numRows+W->numCols;
46 artak 2759 dim_t i,j,k=0;
47 artak 3010 dim_t block_size=W->row_block_size;
48 gross 3283 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n);
49    
50 artak 2759
51     for (i=0;i<n;++i) {
52 gross 3283 if (marker_F[i]) {
53 artak 2759 for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) {
54 artak 2802 j=W->pattern->index[iptr];
55 gross 3283 Paso_IndexListArray_insertIndex(index_list,i,j);
56 artak 2802 }
57     k++;
58 artak 2759 }
59     else {
60 gross 3283 Paso_IndexListArray_insertIndex(index_list,i,i-k);
61 artak 2759 }
62     }
63    
64 gross 3283 outpattern=Paso_Pattern_fromIndexListArray(0,index_list,0,W->numCols,0);
65 artak 3010 out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE);
66 artak 2759
67     k=0;
68 artak 3010
69     if (block_size==1) {
70     for (i=0;i<n;++i) {
71 gross 3283 if (marker_F[i]) {
72 artak 3010 wptr=W->pattern->ptr[k];
73     for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
74     out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
75     wptr++;
76     }
77     k++;
78     }
79     else {
80     iptr=out->pattern->ptr[i];
81     out->val[iptr*block_size*block_size]=1;
82     }
83     }
84     } else if (block_size==2) {
85     for (i=0;i<n;++i) {
86 gross 3283 if (marker_F[i]) {
87 artak 3010 wptr=W->pattern->ptr[k];
88     for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
89     out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
90     out->val[iptr*block_size*block_size+1]=0;
91     out->val[iptr*block_size*block_size+2]=0;
92     out->val[iptr*block_size*block_size+3]=W->val[wptr*block_size*block_size+3];
93     wptr++;
94     }
95     k++;
96     }
97     else {
98     iptr=out->pattern->ptr[i];
99     out->val[iptr*block_size*block_size]=1;
100     out->val[iptr*block_size*block_size+1]=0;
101     out->val[iptr*block_size*block_size+2]=0;
102     out->val[iptr*block_size*block_size+3]=1;
103     }
104     }
105     } else if (block_size==3) {
106     for (i=0;i<n;++i) {
107 gross 3283 if (marker_F[i]) {
108 artak 3010 wptr=W->pattern->ptr[k];
109     for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) {
110     out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size];
111     out->val[iptr*block_size*block_size+1]=0;
112     out->val[iptr*block_size*block_size+2]=0;
113     out->val[iptr*block_size*block_size+3]=0;
114     out->val[iptr*block_size*block_size+4]=W->val[wptr*block_size*block_size+4];
115     out->val[iptr*block_size*block_size+5]=0;
116     out->val[iptr*block_size*block_size+6]=0;
117     out->val[iptr*block_size*block_size+7]=0;
118     out->val[iptr*block_size*block_size+8]=W->val[wptr*block_size*block_size+8];
119     wptr++;
120     }
121     k++;
122     }
123     else {
124     iptr=out->pattern->ptr[i];
125     out->val[iptr*block_size*block_size]=1;
126     out->val[iptr*block_size*block_size+1]=0;
127     out->val[iptr*block_size*block_size+2]=0;
128     out->val[iptr*block_size*block_size+3]=0;
129     out->val[iptr*block_size*block_size+4]=1;
130     out->val[iptr*block_size*block_size+5]=0;
131     out->val[iptr*block_size*block_size+6]=0;
132     out->val[iptr*block_size*block_size+7]=0;
133     out->val[iptr*block_size*block_size+8]=1;
134     }
135     }
136 artak 2759 }
137    
138 gross 3283 /* clean up */
139     Paso_IndexListArray_free(index_list);
140 artak 2759 Paso_Pattern_free(outpattern);
141     return out;
142     }
143     /* Restriction matrix R=P^T */
144    
145    
146    
147 gross 3283 void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* marker_F){
148 artak 2759
149     double *alpha;
150     double *beta;
151     double a_ii=0;
152 artak 2804 double sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos;
153 artak 3010 double sum_all_neg1,sum_all_pos1,sum_strong_neg1,sum_strong_pos1,sum_all_neg2,sum_all_pos2,sum_strong_neg2,sum_strong_pos2,sum_all_neg3,sum_all_pos3,sum_strong_neg3,sum_strong_pos3;
154     double a_ii1=0;
155     double a_ii2=0;
156     double a_ii3=0;
157 artak 2759
158     dim_t n=A->numRows;
159     dim_t F=W_FC->numRows;
160     dim_t i,j,k;
161    
162 artak 3010 dim_t block_size=A->row_block_size;
163    
164 artak 2804 index_t iPtr,dptr=0;
165     /*index_t *index, *where_p;*/
166 artak 2759
167 artak 3010 alpha=TMPMEMALLOC(F*block_size,double);
168     beta=TMPMEMALLOC(F*block_size,double);
169 artak 2759
170 artak 2804
171 artak 3010 if (block_size==1) {
172     k=0;
173     for (i = 0; i < n; ++i) {
174 gross 3283 if(marker_F[i]) {
175 artak 3010 alpha[k]=0;
176     beta[k]=0;
177     sum_all_neg=0;
178     sum_all_pos=0;
179     sum_strong_neg=0;
180     sum_strong_pos=0;
181     /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
182     for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
183     j=A->pattern->index[iPtr];
184     if(j!=i) {
185     if(A->val[iPtr]<0) {
186     sum_all_neg+=A->val[iPtr];
187 gross 3283 if(!marker_F[j]) {
188 artak 3010 sum_strong_neg+=A->val[iPtr];
189     }
190     }
191     else if(A->val[iPtr]>0) {
192     sum_all_pos+=A->val[iPtr];
193 gross 3283 if(!marker_F[j]) {
194 artak 3010 sum_strong_pos+=A->val[iPtr];
195     }
196     }
197     }
198     else {
199     a_ii=A->val[iPtr];
200     dptr=iPtr;
201     }
202     }
203    
204     if(sum_strong_neg!=0) {
205     alpha[k]=sum_all_neg/(sum_strong_neg);
206     }
207     if(sum_strong_pos!=0) {
208     beta[k]=sum_all_pos/(sum_strong_pos);
209     }
210     else {
211     a_ii+=sum_all_pos;
212     beta[k]=0;
213     }
214     alpha[k]=alpha[k]/(a_ii);
215     beta[k]=beta[k]/(a_ii);
216     k++;
217     /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
218 artak 2759 }
219 artak 3010 }
220     } else if (block_size==2) {
221     k=0;
222     for (i = 0; i < n; ++i) {
223 gross 3283 if(marker_F[i]) {
224 artak 3010 alpha[k*block_size]=0;
225     alpha[k*block_size+1]=0;
226     beta[k*block_size]=0;
227     beta[k*block_size+1]=0;
228     sum_all_neg1=0;
229     sum_all_pos1=0;
230     sum_strong_neg1=0;
231     sum_strong_pos1=0;
232     sum_all_neg2=0;
233     sum_all_pos2=0;
234     sum_strong_neg2=0;
235     sum_strong_pos2=0;
236     /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
237     for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
238     j=A->pattern->index[iPtr];
239     if(j!=i) {
240     if(A->val[iPtr*block_size*block_size]<0) {
241     sum_all_neg1+=A->val[iPtr*block_size*block_size];
242 gross 3283 if(!marker_F[j]) {
243 artak 3010 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
244     }
245     }
246     else if(A->val[iPtr*block_size*block_size]>0) {
247     sum_all_pos1+=A->val[iPtr*block_size*block_size];
248 gross 3283 if(!marker_F[j]) {
249 artak 3010 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
250     }
251     }
252     if(A->val[iPtr*block_size*block_size+3]<0) {
253     sum_all_neg2+=A->val[iPtr*block_size*block_size+3];
254 gross 3283 if(!marker_F[j]) {
255 artak 3010 sum_strong_neg2+=A->val[iPtr*block_size*block_size+3];
256     }
257     } else if(A->val[iPtr*block_size*block_size+3]>0) {
258     sum_all_pos2+=A->val[iPtr*block_size*block_size+3];
259 gross 3283 if(!marker_F[j]) {
260 artak 3010 sum_strong_pos2+=A->val[iPtr*block_size*block_size+3];
261     }
262     }
263    
264     }
265     else {
266     a_ii1=A->val[iPtr*block_size*block_size];
267     a_ii2=A->val[iPtr*block_size*block_size+3];
268     dptr=iPtr;
269     }
270     }
271    
272     if(sum_strong_neg1!=0) {
273     alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
274     }
275     if(sum_strong_neg2!=0) {
276     alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
277     }
278    
279     if(sum_strong_pos1!=0) {
280     beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
281     }
282     else {
283     a_ii1+=sum_all_pos1;
284     beta[k*block_size]=0;
285     }
286     if(sum_strong_pos2!=0) {
287     beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
288     }
289     else {
290     a_ii2+=sum_all_pos2;
291     beta[k*block_size+1]=0;
292     }
293    
294     alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
295     alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
296     beta[k*block_size]=beta[k*block_size]/(a_ii1);
297     beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
298     k++;
299     /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
300 artak 2759 }
301 artak 3010 }
302     } else if (block_size==3) {
303     k=0;
304     for (i = 0; i < n; ++i) {
305 gross 3283 if(marker_F[i]) {
306 artak 3010 alpha[k*block_size]=0;
307     alpha[k*block_size+1]=0;
308     alpha[k*block_size+2]=0;
309     beta[k*block_size]=0;
310     beta[k*block_size+1]=0;
311     beta[k*block_size+2]=0;
312     sum_all_neg1=0;
313     sum_all_pos1=0;
314     sum_strong_neg1=0;
315     sum_strong_pos1=0;
316     sum_all_neg2=0;
317     sum_all_pos2=0;
318     sum_strong_neg2=0;
319     sum_strong_pos2=0;
320     sum_all_neg3=0;
321     sum_all_pos3=0;
322     sum_strong_neg3=0;
323     sum_strong_pos3=0;
324     /*compute beta_i=sum_{j\inN_i}a^{+}_ij and alpha_i=sum_{j\inN_i}a^{-}_ij and denominators for both alpha_i and beta_i*/
325     for (iPtr=A->pattern->ptr[i];iPtr<A->pattern->ptr[i + 1]; ++iPtr) {
326     j=A->pattern->index[iPtr];
327     if(j!=i) {
328     if(A->val[iPtr*block_size*block_size]<0) {
329     sum_all_neg1+=A->val[iPtr*block_size*block_size];
330 gross 3283 if(!marker_F[j]) {
331 artak 3010 sum_strong_neg1+=A->val[iPtr*block_size*block_size];
332     }
333     }
334     else if(A->val[iPtr*block_size*block_size]>0) {
335     sum_all_pos1+=A->val[iPtr*block_size*block_size];
336 gross 3283 if(!marker_F[j]) {
337 artak 3010 sum_strong_pos1+=A->val[iPtr*block_size*block_size];
338     }
339     }
340     if(A->val[iPtr*block_size*block_size+4]<0) {
341     sum_all_neg2+=A->val[iPtr*block_size*block_size+4];
342 gross 3283 if(!marker_F[j]) {
343 artak 3010 sum_strong_neg2+=A->val[iPtr*block_size*block_size+4];
344     }
345     } else if(A->val[iPtr*block_size*block_size+4]>0) {
346     sum_all_pos2+=A->val[iPtr*block_size*block_size+4];
347 gross 3283 if(!marker_F[j]) {
348 artak 3010 sum_strong_pos2+=A->val[iPtr*block_size*block_size+4];
349     }
350     }
351     if(A->val[iPtr*block_size*block_size+8]<0) {
352     sum_all_neg3+=A->val[iPtr*block_size*block_size+8];
353 gross 3283 if(!marker_F[j]) {
354 artak 3010 sum_strong_neg3+=A->val[iPtr*block_size*block_size+8];
355     }
356     } else if(A->val[iPtr*block_size*block_size+8]>0) {
357     sum_all_pos3+=A->val[iPtr*block_size*block_size+8];
358 gross 3283 if(!marker_F[j]) {
359 artak 3010 sum_strong_pos3+=A->val[iPtr*block_size*block_size+8];
360     }
361     }
362    
363    
364     }
365     else {
366     a_ii1=A->val[iPtr*block_size*block_size];
367     a_ii2=A->val[iPtr*block_size*block_size+4];
368     a_ii3=A->val[iPtr*block_size*block_size+8];
369     dptr=iPtr;
370     }
371 artak 2759 }
372 artak 3010
373     if(sum_strong_neg1!=0) {
374     alpha[k*block_size]=sum_all_neg1/(sum_strong_neg1);
375     }
376     if(sum_strong_neg2!=0) {
377     alpha[k*block_size+1]=sum_all_neg2/(sum_strong_neg2);
378     }
379     if(sum_strong_neg3!=0) {
380     alpha[k*block_size+2]=sum_all_neg3/(sum_strong_neg3);
381     }
382    
383     if(sum_strong_pos1!=0) {
384     beta[k*block_size]=sum_all_pos1/(sum_strong_pos1);
385     }
386     else {
387     a_ii1+=sum_all_pos1;
388     beta[k*block_size]=0;
389     }
390     if(sum_strong_pos2!=0) {
391     beta[k*block_size+1]=sum_all_pos2/(sum_strong_pos2);
392     }
393     else {
394     a_ii2+=sum_all_pos2;
395     beta[k*block_size+1]=0;
396     }
397     if(sum_strong_pos3!=0) {
398     beta[k*block_size+2]=sum_all_pos3/(sum_strong_pos3);
399     }
400     else {
401     a_ii3+=sum_all_pos3;
402     beta[k*block_size+2]=0;
403     }
404    
405     alpha[k*block_size]=alpha[k*block_size]/(a_ii1);
406     alpha[k*block_size+1]=alpha[k*block_size+1]/(a_ii2);
407     alpha[k*block_size+2]=alpha[k*block_size+2]/(a_ii3);
408     beta[k*block_size]=beta[k*block_size]/(a_ii1);
409     beta[k*block_size+1]=beta[k*block_size+1]/(a_ii2);
410     beta[k*block_size+2]=beta[k*block_size+2]/(a_ii3);
411     k++;
412     /*printf("Got in row=%d, alpha[%d]=%e, beta[%d]=%e, a_den=%e, b_den=%e \n",i,k-1,alpha[k-1],k-1,beta[k-1],alpha_den[k-1],beta_den[k-1]);*/
413 artak 2759 }
414     }
415 artak 3010 }
416    
417     if (block_size==1) {
418     #pragma omp parallel for private(i,iPtr) schedule(static)
419     for (i = 0; i < W_FC->numRows; ++i) {
420     for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
421     if(W_FC->val[iPtr]<0) {
422     W_FC->val[iPtr]=-alpha[i]*W_FC->val[iPtr];
423     }
424     else if(W_FC->val[iPtr]>0) {
425     W_FC->val[iPtr]=-beta[i]*W_FC->val[iPtr];
426     }
427     }
428     }
429     } else if (block_size==2) {
430     #pragma omp parallel for private(i,iPtr) schedule(static)
431     for (i = 0; i < W_FC->numRows; ++i) {
432     for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
433     if(W_FC->val[iPtr*block_size*block_size]<0) {
434     W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
435     }
436     else if(W_FC->val[iPtr*block_size*block_size]>0) {
437     W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
438     }
439     if(W_FC->val[iPtr*block_size*block_size+3]<0) {
440     W_FC->val[iPtr*block_size*block_size+3]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
441     }
442     else if(W_FC->val[iPtr*block_size*block_size+3]>0) {
443     W_FC->val[iPtr*block_size*block_size+3]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+3];
444     }
445     W_FC->val[iPtr*block_size*block_size+1]=0;
446     W_FC->val[iPtr*block_size*block_size+2]=0;
447     }
448     }
449     } else if (block_size==3) {
450     #pragma omp parallel for private(i,iPtr) schedule(static)
451     for (i = 0; i < W_FC->numRows; ++i) {
452     for (iPtr=W_FC->pattern->ptr[i];iPtr<W_FC->pattern->ptr[i + 1]; ++iPtr) {
453     if(W_FC->val[iPtr*block_size*block_size]<0) {
454     W_FC->val[iPtr*block_size*block_size]=-alpha[i*block_size]*W_FC->val[iPtr*block_size*block_size];
455     }
456     else if(W_FC->val[iPtr*block_size*block_size]>0) {
457     W_FC->val[iPtr*block_size*block_size]=-beta[i*block_size]*W_FC->val[iPtr*block_size*block_size];
458     }
459     if(W_FC->val[iPtr*block_size*block_size+4]<0) {
460     W_FC->val[iPtr*block_size*block_size+4]=-alpha[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
461     }
462     else if(W_FC->val[iPtr*block_size*block_size+4]>0) {
463     W_FC->val[iPtr*block_size*block_size+4]=-beta[i*block_size+1]*W_FC->val[iPtr*block_size*block_size+4];
464     }
465     if(W_FC->val[iPtr*block_size*block_size+8]<0) {
466     W_FC->val[iPtr*block_size*block_size+8]=-alpha[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
467     }
468     else if(W_FC->val[iPtr*block_size*block_size+8]>0) {
469     W_FC->val[iPtr*block_size*block_size+8]=-beta[i*block_size+2]*W_FC->val[iPtr*block_size*block_size+8];
470     }
471     W_FC->val[iPtr*block_size*block_size+1]=0;
472     W_FC->val[iPtr*block_size*block_size+2]=0;
473     W_FC->val[iPtr*block_size*block_size+3]=0;
474     W_FC->val[iPtr*block_size*block_size+5]=0;
475     W_FC->val[iPtr*block_size*block_size+6]=0;
476     W_FC->val[iPtr*block_size*block_size+7]=0;
477     }
478     }
479    
480     }
481    
482    
483 artak 2759 TMPMEMFREE(alpha);
484     TMPMEMFREE(beta);
485     }
486    
487    
488     /*A_c=R*A*P taking into account coarseneing */
489     /*Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix *R, Paso_SparseMatrix *P) {
490    
491     Paso_SparseMatrix *temp=NULL;
492     Paso_SparseMatrix *out=NULL;
493    
494     temp=Paso_SparseMatrix_MatrixMatrix(A,P);
495     out=Paso_SparseMatrix_MatrixMatrix(R,temp);
496    
497     Paso_SparseMatrix_free(temp);
498    
499     return out;
500     }
501     */
502    
503     Paso_SparseMatrix* Paso_SparseMatrix_MatrixMatrix(Paso_SparseMatrix* A, Paso_SparseMatrix* B) {
504     index_t iptrA,iptrB,iptrC;
505     dim_t i,j=0,k,l;
506     Paso_Pattern* outpattern=NULL;
507     Paso_SparseMatrix *out=NULL;
508     double sum,b_lj=0;
509 artak 3010 double sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,b_lj1=0,b_lj2=0,b_lj3=0,b_lj4=0,b_lj5=0,b_lj6=0,b_lj7=0,b_lj8=0,b_lj9=0;
510 artak 2759
511 artak 3010 dim_t block_size=A->row_block_size;
512    
513 artak 2784 double time0=0;
514     bool_t verbose=0;
515    
516 jfenwick 3259 time0=Esys_timer();
517 artak 2784
518 artak 2759 outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,B->pattern);
519 artak 3010 out=Paso_SparseMatrix_alloc(A->type, outpattern, block_size, block_size, FALSE);
520 artak 2759
521 jfenwick 3259 time0=Esys_timer()-time0;
522 artak 2784 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Pattern creation: %e\n",time0);
523 artak 2759
524 jfenwick 3259 time0=Esys_timer();
525 artak 2784
526 artak 3010 if(block_size==1) {
527     #pragma omp parallel for private(i,iptrC,j,sum,iptrA,k,b_lj,iptrB,l) schedule(static)
528     for(i = 0; i < out->numRows; i++) {
529     for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
530     j = out->pattern->index[iptrC];
531     sum=0;
532     for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
533     k=A->pattern->index[iptrA];
534     /*can be replaced by bsearch */
535     b_lj=0;
536     for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
537     l=B->pattern->index[iptrB];
538     if(l==j) {
539     b_lj=B->val[iptrB];
540     break;
541     }
542     }
543     sum+=A->val[iptrA]*b_lj;
544 artak 2759 }
545 artak 3010 out->val[iptrC]=sum;
546     }
547     }
548     } else if (block_size==2) {
549     #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,iptrB,l) schedule(static)
550     for(i = 0; i < out->numRows; i++) {
551     for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
552     j = out->pattern->index[iptrC];
553     sum1=0;
554     sum2=0;
555     sum3=0;
556     sum4=0;
557     for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
558     k=A->pattern->index[iptrA];
559     /*can be replaced by bsearch */
560     b_lj1=0;
561     b_lj2=0;
562     b_lj3=0;
563     b_lj4=0;
564     for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
565     l=B->pattern->index[iptrB];
566     if(l==j) {
567     b_lj1=B->val[iptrB*block_size*block_size];
568     b_lj2=B->val[iptrB*block_size*block_size+1];
569     b_lj3=B->val[iptrB*block_size*block_size+2];
570     b_lj4=B->val[iptrB*block_size*block_size+3];
571     break;
572     }
573     }
574 artak 3041 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+2]*b_lj2;
575     sum2+=A->val[iptrA*block_size*block_size]*b_lj3+A->val[iptrA*block_size*block_size+2]*b_lj4;
576     sum3+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2;
577     sum4+=A->val[iptrA*block_size*block_size+1]*b_lj3+A->val[iptrA*block_size*block_size+3]*b_lj4;
578 artak 3010 }
579     out->val[iptrC*block_size*block_size]=sum1;
580 artak 3041 out->val[iptrC*block_size*block_size+2]=sum2;
581     out->val[iptrC*block_size*block_size+1]=sum3;
582 artak 3010 out->val[iptrC*block_size*block_size+3]=sum4;
583     }
584     }
585     } else if (block_size==3) {
586     #pragma omp parallel for private(i,iptrC,j,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,iptrA,k,b_lj1,b_lj2,b_lj3,b_lj4,b_lj5,b_lj6,b_lj7,b_lj8,b_lj9,iptrB,l) schedule(static)
587     for(i = 0; i < out->numRows; i++) {
588     for(iptrC = out->pattern->ptr[i]; iptrC < out->pattern->ptr[i+1]; ++iptrC) {
589     j = out->pattern->index[iptrC];
590     sum1=0;
591     sum2=0;
592     sum3=0;
593     sum4=0;
594     sum5=0;
595     sum6=0;
596     sum7=0;
597     sum8=0;
598     sum9=0;
599     for(iptrA = A->pattern->ptr[i]; iptrA < A->pattern->ptr[i+1]; ++iptrA) {
600     k=A->pattern->index[iptrA];
601     /*can be replaced by bsearch */
602     b_lj1=0;
603     b_lj2=0;
604     b_lj3=0;
605     b_lj4=0;
606     b_lj5=0;
607     b_lj6=0;
608     b_lj7=0;
609     b_lj8=0;
610     b_lj9=0;
611     for(iptrB = B->pattern->ptr[k]; iptrB < B->pattern->ptr[k+1]; ++iptrB) {
612     l=B->pattern->index[iptrB];
613     if(l==j) {
614     b_lj1=B->val[iptrB*block_size*block_size];
615     b_lj2=B->val[iptrB*block_size*block_size+1];
616     b_lj3=B->val[iptrB*block_size*block_size+2];
617     b_lj4=B->val[iptrB*block_size*block_size+3];
618     b_lj5=B->val[iptrB*block_size*block_size+4];
619     b_lj6=B->val[iptrB*block_size*block_size+5];
620     b_lj7=B->val[iptrB*block_size*block_size+6];
621     b_lj8=B->val[iptrB*block_size*block_size+7];
622     b_lj9=B->val[iptrB*block_size*block_size+8];
623     break;
624     }
625     }
626 artak 3041 sum1+=A->val[iptrA*block_size*block_size]*b_lj1+A->val[iptrA*block_size*block_size+3]*b_lj2+A->val[iptrA*block_size*block_size+6]*b_lj3;
627     sum2+=A->val[iptrA*block_size*block_size]*b_lj4+A->val[iptrA*block_size*block_size+3]*b_lj5+A->val[iptrA*block_size*block_size+6]*b_lj6;
628     sum3+=A->val[iptrA*block_size*block_size]*b_lj7+A->val[iptrA*block_size*block_size+3]*b_lj8+A->val[iptrA*block_size*block_size+6]*b_lj9;
629     sum4+=A->val[iptrA*block_size*block_size+1]*b_lj1+A->val[iptrA*block_size*block_size+4]*b_lj2+A->val[iptrA*block_size*block_size+7]*b_lj3;
630     sum5+=A->val[iptrA*block_size*block_size+1]*b_lj4+A->val[iptrA*block_size*block_size+4]*b_lj5+A->val[iptrA*block_size*block_size+7]*b_lj6;
631     sum6+=A->val[iptrA*block_size*block_size+1]*b_lj7+A->val[iptrA*block_size*block_size+4]*b_lj8+A->val[iptrA*block_size*block_size+7]*b_lj9;
632     sum7+=A->val[iptrA*block_size*block_size+2]*b_lj1+A->val[iptrA*block_size*block_size+5]*b_lj2+A->val[iptrA*block_size*block_size+8]*b_lj3;
633     sum8+=A->val[iptrA*block_size*block_size+2]*b_lj4+A->val[iptrA*block_size*block_size+5]*b_lj5+A->val[iptrA*block_size*block_size+8]*b_lj6;
634     sum9+=A->val[iptrA*block_size*block_size+2]*b_lj7+A->val[iptrA*block_size*block_size+5]*b_lj8+A->val[iptrA*block_size*block_size+8]*b_lj9;
635 artak 3010 }
636     out->val[iptrC*block_size*block_size]=sum1;
637 artak 3041 out->val[iptrC*block_size*block_size+3]=sum2;
638     out->val[iptrC*block_size*block_size+6]=sum3;
639     out->val[iptrC*block_size*block_size+1]=sum4;
640 artak 3010 out->val[iptrC*block_size*block_size+4]=sum5;
641 artak 3041 out->val[iptrC*block_size*block_size+7]=sum6;
642     out->val[iptrC*block_size*block_size+2]=sum7;
643     out->val[iptrC*block_size*block_size+5]=sum8;
644 artak 3010 out->val[iptrC*block_size*block_size+8]=sum9;
645     }
646     }
647    
648 artak 2759 }
649 artak 2784
650 jfenwick 3259 time0=Esys_timer()-time0;
651 artak 2784 if (verbose) fprintf(stdout,"timing: Paso_SparseMatrix_MatrixMatrix: Matrix multiplication: %e\n",time0);
652 artak 2759
653     Paso_Pattern_free(outpattern);
654    
655     return out;
656     }
657    
658 artak 3041 /* for block_size=1 only */
659 artak 2759 Paso_SparseMatrix* Paso_Solver_getCoarseMatrix(Paso_SparseMatrix* A,Paso_SparseMatrix *R,Paso_SparseMatrix *P) {
660    
661     index_t iptrA_c,iptrR,iptrP,iptrA;
662     dim_t i,j,k,l,m;
663     double first_sum=0,second_sum=0,p_lj=0,a_kl=0,r_ik=0;
664     Paso_Pattern* temp=NULL;
665     Paso_Pattern* outpattern=NULL;
666     Paso_SparseMatrix *A_c=NULL;
667    
668 artak 2784 double time0=0;
669     bool_t verbose=0;
670    
671 jfenwick 3259 time0=Esys_timer();
672 artak 2784
673 artak 2759 temp=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,A->pattern,P->pattern);
674     outpattern=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,R->pattern,temp);
675     A_c=Paso_SparseMatrix_alloc(A->type,outpattern,1,1, TRUE);
676    
677 jfenwick 3259 time0=Esys_timer()-time0;
678 artak 2784 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Pattern creation: %e\n",time0);
679 artak 2759
680     /*a^c_ij=sum_k^n(r_ik)sum_l^n(a_kl*P_lj)*/
681    
682 jfenwick 3259 time0=Esys_timer();
683 artak 2784
684     #pragma omp parallel for private(i,iptrA_c,j,second_sum,iptrR,k,first_sum,p_lj,iptrP,m,a_kl,r_ik) schedule(static)
685 artak 2759 for(i = 0; i < A_c->numRows; i++) {
686     for(iptrA_c = A_c->pattern->ptr[i]; iptrA_c < A_c->pattern->ptr[i+1]; ++iptrA_c) {
687     j = A_c->pattern->index[iptrA_c];
688     second_sum=0;
689     for(iptrR = R->pattern->ptr[i]; iptrR < R->pattern->ptr[i+1]; ++iptrR) {
690     k = R->pattern->index[iptrR];
691     first_sum=0;
692     for(iptrA = A->pattern->ptr[k]; iptrA < A->pattern->ptr[k+1]; ++iptrA) {
693     l= A->pattern->index[iptrA];
694     p_lj=0;
695 artak 2767 /*This loop can be replaced by bsearch */
696 artak 2759 for(iptrP = P->pattern->ptr[l]; iptrP < P->pattern->ptr[l+1]; ++iptrP) {
697     m=P->pattern->index[iptrP];
698     if(m==j) {
699     p_lj=P->val[iptrP];
700     break;
701     }
702     }
703     a_kl=A->val[iptrA];
704     first_sum+=a_kl*p_lj;
705     }
706     r_ik=R->val[iptrR];
707     second_sum+=r_ik*first_sum;
708     }
709     A_c->val[iptrA_c]=second_sum;
710     }
711 artak 2784 }
712 jfenwick 3259 time0=Esys_timer()-time0;
713 artak 2784 if (verbose) fprintf(stdout,"timing: Paso_Solver_getCoarseMatrix: Matrix multiplication: %e\n",time0);
714 artak 2759
715 artak 2784 Paso_Pattern_free(outpattern);
716     Paso_Pattern_free(temp);
717 artak 2759
718     return A_c;
719     }
720    
721 artak 3010

  ViewVC Help
Powered by ViewVC 1.1.26