/[escript]/trunk/paso/src/SchurComplement.cpp
ViewVC logotype

Annotation of /trunk/paso/src/SchurComplement.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4657 - (hide annotations)
Thu Feb 6 06:12:20 2014 UTC (6 years ago) by jfenwick
File size: 10956 byte(s)
I changed some files.
Updated copyright notices, added GeoComp.



1 ksteube 1312
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 dhawcroft 631
17 ksteube 1811
18 jfenwick 3981 /************************************************************************************/
19 jgs 150
20     /* Paso: updates A_CC <- ACC-ACF AFF^{-1} AFC */
21    
22     /* no check of consistency of matrices !!!! */
23    
24 jfenwick 3981 /************************************************************************************/
25 jgs 150
26     /* Copyrights by ACcESS Australia 2003,2004,2005 */
27 caltinay 3642 /* Author: Lutz Gross, l.gross@uq.edu.au */
28 jgs 150
29 jfenwick 3981 /************************************************************************************/
30 jgs 150
31 gross 700 #include "Paso.h"
32 ksteube 1312 #include "SparseMatrix.h"
33 jgs 150 #include "Solver.h"
34    
35 jfenwick 3981 /************************************************************************************/
36 jgs 150
37    
38    
39 ksteube 1312 void Paso_Solver_updateIncompleteSchurComplement(Paso_SparseMatrix* A_CC,Paso_SparseMatrix *A_CF,double* invA_FF,index_t* A_FF_pivot,Paso_SparseMatrix *A_FC) {
40    
41 jgs 150 index_t iPtr_CC,*index_CC,col_CF,col_FC, *where_p,iPtr_CC_2,i,iPtr_CF,iPtr_FC;
42     dim_t index_CC_len;
43 jfenwick 4521 bool set_A;
44 ksteube 1312 dim_t n_loc_rows=A_CC->numRows;
45 jgs 150 dim_t n_block=A_CC->row_block_size;
46     register double A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33,
47     invA_FF_11,invA_FF_21,invA_FF_31,invA_FF_12,invA_FF_22,invA_FF_32,invA_FF_13,invA_FF_23,invA_FF_33,
48 jfenwick 1981 A11=0,A21=0,A31=0,A12=0,A22=0,A32=0,A13=0,A23=0,A33=0,A_FC_11,A_FC_21,A_FC_31,A_FC_12,A_FC_22,A_FC_32,A_FC_13,A_FC_23,A_FC_33;
49 jgs 150 if (n_block==1) {
50 caltinay 2080 #pragma omp parallel for firstprivate(A11) private(i,iPtr_CC,index_CC,index_CC_len,col_CF,set_A,iPtr_CF,iPtr_FC,col_FC,where_p) schedule(static)
51 ksteube 1312 for (i = 0; i < n_loc_rows;++i) {
52 jgs 150 iPtr_CC=A_CC->pattern->ptr[i];
53     index_CC=&(A_CC->pattern->index[iPtr_CC]);
54     index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]);
55     /* now we run through the columns of A_CF in row i */
56     for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) {
57     col_CF=A_CF->pattern->index[iPtr_CF];
58     set_A=TRUE;
59     for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) {
60     col_FC=A_FC->pattern->index[iPtr_FC];
61     /* is (i,col_FC) in the shape of A_CC ? */
62 artak 1902 where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex);
63     if (where_p!=NULL) {
64 jgs 150 if (set_A) {
65     A11=A_CF->val[iPtr_CF]*invA_FF[col_CF];
66     set_A=FALSE;
67     }
68     A_CC->val[iPtr_CC+(index_t)(where_p-index_CC)]-=A11*A_FC->val[iPtr_FC];
69     }
70     } /* end of iPtr_FC loop */
71     } /* end of iPtr_CF loop */
72     } /* end of irow loop */
73     } else if (n_block==2) {
74 caltinay 2080 #pragma omp parallel for firstprivate(A11,A21,A12,A22) private(i,iPtr_CC,index_CC,index_CC_len,iPtr_CF,col_CF,iPtr_FC,col_FC,where_p,iPtr_CC_2,set_A,A_CF_11,A_CF_21,A_CF_12,A_CF_22,invA_FF_11,invA_FF_21,invA_FF_12,invA_FF_22,A_FC_11,A_FC_21,A_FC_12,A_FC_22) schedule(static)
75 ksteube 1312 for (i = 0; i < n_loc_rows;++i) {
76 jgs 150 iPtr_CC=A_CC->pattern->ptr[i];
77     index_CC=&(A_CC->pattern->index[iPtr_CC]);
78     index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]);
79     /* now we run through the columns of A_CF in row i */
80     for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) {
81     col_CF=A_CF->pattern->index[iPtr_CF];
82     set_A=TRUE;
83     for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) {
84     col_FC=A_FC->pattern->index[iPtr_FC];
85     /* is (i,col_FC) in the shape of A_CC ? */
86     where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex);
87     if (where_p!=NULL) {
88     iPtr_CC_2=iPtr_CC+(index_t)(where_p-index_CC);
89 jfenwick 4345 /* this calculates A_CF*invA_FF(i,col_CF) */
90 jgs 150 if (set_A) {
91     A_CF_11=A_CF->val[4*iPtr_CF ];
92     A_CF_21=A_CF->val[4*iPtr_CF+1];
93     A_CF_12=A_CF->val[4*iPtr_CF+2];
94     A_CF_22=A_CF->val[4*iPtr_CF+3];
95    
96     invA_FF_11=invA_FF[4*col_CF ];
97     invA_FF_21=invA_FF[4*col_CF+1];
98     invA_FF_12=invA_FF[4*col_CF+2];
99     invA_FF_22=invA_FF[4*col_CF+3];
100    
101     A11=A_CF_11*invA_FF_11+A_CF_12*invA_FF_21;
102     A21=A_CF_21*invA_FF_11+A_CF_22*invA_FF_21;
103     A12=A_CF_11*invA_FF_12+A_CF_12*invA_FF_22;
104     A22=A_CF_21*invA_FF_12+A_CF_22*invA_FF_22;
105    
106     set_A=FALSE;
107     }
108    
109     A_FC_11=A_FC->val[4*iPtr_FC ];
110     A_FC_21=A_FC->val[4*iPtr_FC+1];
111     A_FC_12=A_FC->val[4*iPtr_FC+2];
112     A_FC_22=A_FC->val[4*iPtr_FC+3];
113    
114     A_CC->val[4*iPtr_CC_2 ]-=A11*A_FC_11+A12*A_FC_21;
115     A_CC->val[4*iPtr_CC_2+1]-=A21*A_FC_11+A22*A_FC_21;
116     A_CC->val[4*iPtr_CC_2+2]-=A11*A_FC_12+A12*A_FC_22;
117     A_CC->val[4*iPtr_CC_2+3]-=A21*A_FC_12+A22*A_FC_22;
118    
119     }
120     } /* end of iPtr_FC loop */
121     } /* end of iPtr_CF loop */
122     } /* end of irow loop */
123     } else if (n_block==3) {
124 caltinay 2080 #pragma omp parallel for firstprivate(A11,A21,A31,A12,A22,A32,A13,A23,A33) private(i,iPtr_CC,index_CC,index_CC_len,iPtr_CF,col_CF,iPtr_FC,col_FC,where_p,iPtr_CC_2,set_A,A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33,invA_FF_11,invA_FF_21,invA_FF_31,invA_FF_12,invA_FF_22,invA_FF_32,invA_FF_13,invA_FF_23,invA_FF_33,A_FC_11,A_FC_21,A_FC_31,A_FC_12,A_FC_22,A_FC_32,A_FC_13,A_FC_23,A_FC_33) schedule(static)
125 ksteube 1312 for (i = 0; i < n_loc_rows;++i) {
126 jgs 150 iPtr_CC=A_CC->pattern->ptr[i];
127     index_CC=&(A_CC->pattern->index[iPtr_CC]);
128     index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]);
129     /* now we run through the columns of A_CF in row i */
130     for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) {
131     col_CF=A_CF->pattern->index[iPtr_CF];
132     set_A=TRUE;
133     for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) {
134     col_FC=A_FC->pattern->index[iPtr_FC];
135     /* is (i,col_FC) in the shape of A_CC ? */
136     where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex);
137     if (where_p!=NULL) {
138     iPtr_CC_2=iPtr_CC+(index_t)(where_p-index_CC);
139 jfenwick 4345 /* this calculates A_CF*invA_FF(i,col_CF) */
140 jgs 150 if (set_A) {
141     A_CF_11=A_CF->val[9*iPtr_CF ];
142     A_CF_21=A_CF->val[9*iPtr_CF+1];
143     A_CF_31=A_CF->val[9*iPtr_CF+2];
144     A_CF_12=A_CF->val[9*iPtr_CF+3];
145     A_CF_22=A_CF->val[9*iPtr_CF+4];
146     A_CF_32=A_CF->val[9*iPtr_CF+5];
147     A_CF_13=A_CF->val[9*iPtr_CF+6];
148     A_CF_23=A_CF->val[9*iPtr_CF+7];
149     A_CF_33=A_CF->val[9*iPtr_CF+8];
150    
151     invA_FF_11=invA_FF[9*col_CF ];
152     invA_FF_21=invA_FF[9*col_CF+1];
153     invA_FF_31=invA_FF[9*col_CF+2];
154     invA_FF_12=invA_FF[9*col_CF+3];
155     invA_FF_22=invA_FF[9*col_CF+4];
156     invA_FF_32=invA_FF[9*col_CF+5];
157     invA_FF_13=invA_FF[9*col_CF+6];
158     invA_FF_23=invA_FF[9*col_CF+7];
159     invA_FF_33=invA_FF[9*col_CF+8];
160    
161     A11=A_CF_11*invA_FF_11+A_CF_12*invA_FF_21+A_CF_13*invA_FF_31;
162     A21=A_CF_21*invA_FF_11+A_CF_22*invA_FF_21+A_CF_23*invA_FF_31;
163     A31=A_CF_31*invA_FF_11+A_CF_32*invA_FF_21+A_CF_33*invA_FF_31;
164     A12=A_CF_11*invA_FF_12+A_CF_12*invA_FF_22+A_CF_13*invA_FF_32;
165     A22=A_CF_21*invA_FF_12+A_CF_22*invA_FF_22+A_CF_23*invA_FF_32;
166     A32=A_CF_31*invA_FF_12+A_CF_32*invA_FF_22+A_CF_33*invA_FF_32;
167     A13=A_CF_11*invA_FF_13+A_CF_12*invA_FF_23+A_CF_13*invA_FF_33;
168     A23=A_CF_21*invA_FF_13+A_CF_22*invA_FF_23+A_CF_23*invA_FF_33;
169     A33=A_CF_31*invA_FF_13+A_CF_32*invA_FF_23+A_CF_33*invA_FF_33;
170    
171     set_A=FALSE;
172     }
173    
174     A_FC_11=A_FC->val[9*iPtr_FC ];
175     A_FC_21=A_FC->val[9*iPtr_FC+1];
176     A_FC_31=A_FC->val[9*iPtr_FC+2];
177     A_FC_12=A_FC->val[9*iPtr_FC+3];
178     A_FC_22=A_FC->val[9*iPtr_FC+4];
179     A_FC_32=A_FC->val[9*iPtr_FC+5];
180     A_FC_13=A_FC->val[9*iPtr_FC+6];
181     A_FC_23=A_FC->val[9*iPtr_FC+7];
182     A_FC_33=A_FC->val[9*iPtr_FC+8];
183    
184     A_CC->val[9*iPtr_CC_2 ]-=A11*A_FC_11+A12*A_FC_21+A13*A_FC_31;
185     A_CC->val[9*iPtr_CC_2+1]-=A21*A_FC_11+A22*A_FC_21+A23*A_FC_31;
186     A_CC->val[9*iPtr_CC_2+2]-=A31*A_FC_11+A32*A_FC_21+A33*A_FC_31;
187     A_CC->val[9*iPtr_CC_2+3]-=A11*A_FC_12+A12*A_FC_22+A13*A_FC_32;
188     A_CC->val[9*iPtr_CC_2+4]-=A21*A_FC_12+A22*A_FC_22+A23*A_FC_32;
189     A_CC->val[9*iPtr_CC_2+5]-=A31*A_FC_12+A32*A_FC_22+A33*A_FC_32;
190     A_CC->val[9*iPtr_CC_2+6]-=A11*A_FC_13+A12*A_FC_23+A13*A_FC_33;
191     A_CC->val[9*iPtr_CC_2+7]-=A21*A_FC_13+A22*A_FC_23+A23*A_FC_33;
192     A_CC->val[9*iPtr_CC_2+8]-=A31*A_FC_13+A32*A_FC_23+A33*A_FC_33;
193    
194     }
195     } /* end of iPtr_FC loop */
196     } /* end of iPtr_CF loop */
197     } /* end of irow loop */
198     }
199     }
200     /*
201     * $Log$
202     * Revision 1.2 2005/09/15 03:44:40 jgs
203     * Merge of development branch dev-02 back to main trunk on 2005-09-15
204     *
205     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
206 jfenwick 4345 * These files have been extracted from finley to define a stand alone library for iterative
207 jgs 150 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
208     * has not been tested yet.
209     *
210     *
211 phornby 1906 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/amg_from_3530/paso/src/SchurComplement.cpp:3531-3826 /branches/lapack2681/paso/src/SchurComplement.cpp:2682-2741 /branches/pasowrap/paso/src/SchurComplement.cpp:3661-3674 /branches/py3_attempt2/paso/src/SchurComplement.cpp:3871-3891 /branches/restext/paso/src/SchurComplement.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SchurComplement.cpp:3669-3791 /branches/stage3.0/paso/src/SchurComplement.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SchurComplement.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SchurComplement.cpp:3517-3974 /release/3.0/paso/src/SchurComplement.cpp:2591-2601 /trunk/paso/src/SchurComplement.cpp:4257-4344 /trunk/ripley/test/python/paso/src/SchurComplement.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26