1 |
/* $Id$ */ |
2 |
|
3 |
/* |
4 |
******************************************************************************** |
5 |
* Copyright 2006 by ACcESS MNRF * |
6 |
* * |
7 |
* http://www.access.edu.au * |
8 |
* Primary Business: Queensland, Australia * |
9 |
* Licensed under the Open Software License version 3.0 * |
10 |
* http://www.opensource.org/licenses/osl-3.0.php * |
11 |
******************************************************************************** |
12 |
*/ |
13 |
|
14 |
/**************************************************************/ |
15 |
|
16 |
/* Paso: updates A_CC <- ACC-ACF AFF^{-1} AFC */ |
17 |
|
18 |
/* no check of consistency of matrices !!!! */ |
19 |
|
20 |
/**************************************************************/ |
21 |
|
22 |
/* Copyrights by ACcESS Australia 2003,2004,2005 */ |
23 |
/* Author: gross@access.edu.au */ |
24 |
|
25 |
/**************************************************************/ |
26 |
|
27 |
#include "../Paso.h" |
28 |
#include "../SystemMatrix.h" |
29 |
#include "Solver.h" |
30 |
|
31 |
/**************************************************************/ |
32 |
|
33 |
|
34 |
|
35 |
void Paso_Solver_updateIncompleteSchurComplement(Paso_SystemMatrix* A_CC,Paso_SystemMatrix *A_CF,double* invA_FF,index_t* A_FF_pivot,Paso_SystemMatrix *A_FC) { |
36 |
index_t iPtr_CC,*index_CC,col_CF,col_FC, *where_p,iPtr_CC_2,i,iPtr_CF,iPtr_FC; |
37 |
dim_t index_CC_len; |
38 |
bool_t set_A; |
39 |
dim_t n_rows=A_CC->num_rows; |
40 |
dim_t n_block=A_CC->row_block_size; |
41 |
register double A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33, |
42 |
invA_FF_11,invA_FF_21,invA_FF_31,invA_FF_12,invA_FF_22,invA_FF_32,invA_FF_13,invA_FF_23,invA_FF_33, |
43 |
A11,A21,A31,A12,A22,A32,A13,A23,A33,A_FC_11,A_FC_21,A_FC_31,A_FC_12,A_FC_22,A_FC_32,A_FC_13,A_FC_23,A_FC_33; |
44 |
if (n_block==1) { |
45 |
#pragma omp parallel for private(i,iPtr_CC,index_CC,index_CC_len,col_CF,set_A,iPtr_CF,iPtr_FC,col_FC,where_p,A11) schedule(static) |
46 |
for (i = 0; i < n_rows;++i) { |
47 |
iPtr_CC=A_CC->pattern->ptr[i]; |
48 |
index_CC=&(A_CC->pattern->index[iPtr_CC]); |
49 |
index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]); |
50 |
/* now we run through the columns of A_CF in row i */ |
51 |
for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) { |
52 |
col_CF=A_CF->pattern->index[iPtr_CF]; |
53 |
set_A=TRUE; |
54 |
for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) { |
55 |
col_FC=A_FC->pattern->index[iPtr_FC]; |
56 |
/* is (i,col_FC) in the shape of A_CC ? */ |
57 |
where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex); |
58 |
if (where_p!=NULL) { |
59 |
if (set_A) { |
60 |
A11=A_CF->val[iPtr_CF]*invA_FF[col_CF]; |
61 |
set_A=FALSE; |
62 |
} |
63 |
A_CC->val[iPtr_CC+(index_t)(where_p-index_CC)]-=A11*A_FC->val[iPtr_FC]; |
64 |
} |
65 |
} /* end of iPtr_FC loop */ |
66 |
} /* end of iPtr_CF loop */ |
67 |
} /* end of irow loop */ |
68 |
} else if (n_block==2) { |
69 |
#pragma omp parallel for private(i,iPtr_CC,index_CC,index_CC_len,iPtr_CF,col_CF,iPtr_FC,col_FC,where_p,iPtr_CC_2,set_A,A_CF_11,A_CF_21,A_CF_12,A_CF_22,invA_FF_11,invA_FF_21,invA_FF_12,invA_FF_22,A11,A21,A12,A22,A_FC_11,A_FC_21,A_FC_12,A_FC_22) schedule(static) |
70 |
for (i = 0; i < n_rows;++i) { |
71 |
iPtr_CC=A_CC->pattern->ptr[i]; |
72 |
index_CC=&(A_CC->pattern->index[iPtr_CC]); |
73 |
index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]); |
74 |
/* now we run through the columns of A_CF in row i */ |
75 |
for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) { |
76 |
col_CF=A_CF->pattern->index[iPtr_CF]; |
77 |
set_A=TRUE; |
78 |
for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) { |
79 |
col_FC=A_FC->pattern->index[iPtr_FC]; |
80 |
/* is (i,col_FC) in the shape of A_CC ? */ |
81 |
where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex); |
82 |
if (where_p!=NULL) { |
83 |
iPtr_CC_2=iPtr_CC+(index_t)(where_p-index_CC); |
84 |
/* this calculutes A_CF*invA_FF(i,col_CF) */ |
85 |
if (set_A) { |
86 |
A_CF_11=A_CF->val[4*iPtr_CF ]; |
87 |
A_CF_21=A_CF->val[4*iPtr_CF+1]; |
88 |
A_CF_12=A_CF->val[4*iPtr_CF+2]; |
89 |
A_CF_22=A_CF->val[4*iPtr_CF+3]; |
90 |
|
91 |
invA_FF_11=invA_FF[4*col_CF ]; |
92 |
invA_FF_21=invA_FF[4*col_CF+1]; |
93 |
invA_FF_12=invA_FF[4*col_CF+2]; |
94 |
invA_FF_22=invA_FF[4*col_CF+3]; |
95 |
|
96 |
A11=A_CF_11*invA_FF_11+A_CF_12*invA_FF_21; |
97 |
A21=A_CF_21*invA_FF_11+A_CF_22*invA_FF_21; |
98 |
A12=A_CF_11*invA_FF_12+A_CF_12*invA_FF_22; |
99 |
A22=A_CF_21*invA_FF_12+A_CF_22*invA_FF_22; |
100 |
|
101 |
set_A=FALSE; |
102 |
} |
103 |
|
104 |
A_FC_11=A_FC->val[4*iPtr_FC ]; |
105 |
A_FC_21=A_FC->val[4*iPtr_FC+1]; |
106 |
A_FC_12=A_FC->val[4*iPtr_FC+2]; |
107 |
A_FC_22=A_FC->val[4*iPtr_FC+3]; |
108 |
|
109 |
A_CC->val[4*iPtr_CC_2 ]-=A11*A_FC_11+A12*A_FC_21; |
110 |
A_CC->val[4*iPtr_CC_2+1]-=A21*A_FC_11+A22*A_FC_21; |
111 |
A_CC->val[4*iPtr_CC_2+2]-=A11*A_FC_12+A12*A_FC_22; |
112 |
A_CC->val[4*iPtr_CC_2+3]-=A21*A_FC_12+A22*A_FC_22; |
113 |
|
114 |
} |
115 |
} /* end of iPtr_FC loop */ |
116 |
} /* end of iPtr_CF loop */ |
117 |
} /* end of irow loop */ |
118 |
} else if (n_block==3) { |
119 |
#pragma omp parallel for private(i,iPtr_CC,index_CC,index_CC_len,iPtr_CF,col_CF,iPtr_FC,col_FC,where_p,iPtr_CC_2,set_A,A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33,invA_FF_11,invA_FF_21,invA_FF_31,invA_FF_12,invA_FF_22,invA_FF_32,invA_FF_13,invA_FF_23,invA_FF_33,A11,A21,A31,A12,A22,A32,A13,A23,A33,A_FC_11,A_FC_21,A_FC_31,A_FC_12,A_FC_22,A_FC_32,A_FC_13,A_FC_23,A_FC_33) schedule(static) |
120 |
for (i = 0; i < n_rows;++i) { |
121 |
iPtr_CC=A_CC->pattern->ptr[i]; |
122 |
index_CC=&(A_CC->pattern->index[iPtr_CC]); |
123 |
index_CC_len=(size_t)(A_CC->pattern->ptr[i+1]-A_CC->pattern->ptr[i]); |
124 |
/* now we run through the columns of A_CF in row i */ |
125 |
for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) { |
126 |
col_CF=A_CF->pattern->index[iPtr_CF]; |
127 |
set_A=TRUE; |
128 |
for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) { |
129 |
col_FC=A_FC->pattern->index[iPtr_FC]; |
130 |
/* is (i,col_FC) in the shape of A_CC ? */ |
131 |
where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),Paso_comparIndex); |
132 |
if (where_p!=NULL) { |
133 |
iPtr_CC_2=iPtr_CC+(index_t)(where_p-index_CC); |
134 |
/* this calculutes A_CF*invA_FF(i,col_CF) */ |
135 |
if (set_A) { |
136 |
A_CF_11=A_CF->val[9*iPtr_CF ]; |
137 |
A_CF_21=A_CF->val[9*iPtr_CF+1]; |
138 |
A_CF_31=A_CF->val[9*iPtr_CF+2]; |
139 |
A_CF_12=A_CF->val[9*iPtr_CF+3]; |
140 |
A_CF_22=A_CF->val[9*iPtr_CF+4]; |
141 |
A_CF_32=A_CF->val[9*iPtr_CF+5]; |
142 |
A_CF_13=A_CF->val[9*iPtr_CF+6]; |
143 |
A_CF_23=A_CF->val[9*iPtr_CF+7]; |
144 |
A_CF_33=A_CF->val[9*iPtr_CF+8]; |
145 |
|
146 |
invA_FF_11=invA_FF[9*col_CF ]; |
147 |
invA_FF_21=invA_FF[9*col_CF+1]; |
148 |
invA_FF_31=invA_FF[9*col_CF+2]; |
149 |
invA_FF_12=invA_FF[9*col_CF+3]; |
150 |
invA_FF_22=invA_FF[9*col_CF+4]; |
151 |
invA_FF_32=invA_FF[9*col_CF+5]; |
152 |
invA_FF_13=invA_FF[9*col_CF+6]; |
153 |
invA_FF_23=invA_FF[9*col_CF+7]; |
154 |
invA_FF_33=invA_FF[9*col_CF+8]; |
155 |
|
156 |
A11=A_CF_11*invA_FF_11+A_CF_12*invA_FF_21+A_CF_13*invA_FF_31; |
157 |
A21=A_CF_21*invA_FF_11+A_CF_22*invA_FF_21+A_CF_23*invA_FF_31; |
158 |
A31=A_CF_31*invA_FF_11+A_CF_32*invA_FF_21+A_CF_33*invA_FF_31; |
159 |
A12=A_CF_11*invA_FF_12+A_CF_12*invA_FF_22+A_CF_13*invA_FF_32; |
160 |
A22=A_CF_21*invA_FF_12+A_CF_22*invA_FF_22+A_CF_23*invA_FF_32; |
161 |
A32=A_CF_31*invA_FF_12+A_CF_32*invA_FF_22+A_CF_33*invA_FF_32; |
162 |
A13=A_CF_11*invA_FF_13+A_CF_12*invA_FF_23+A_CF_13*invA_FF_33; |
163 |
A23=A_CF_21*invA_FF_13+A_CF_22*invA_FF_23+A_CF_23*invA_FF_33; |
164 |
A33=A_CF_31*invA_FF_13+A_CF_32*invA_FF_23+A_CF_33*invA_FF_33; |
165 |
|
166 |
set_A=FALSE; |
167 |
} |
168 |
|
169 |
A_FC_11=A_FC->val[9*iPtr_FC ]; |
170 |
A_FC_21=A_FC->val[9*iPtr_FC+1]; |
171 |
A_FC_31=A_FC->val[9*iPtr_FC+2]; |
172 |
A_FC_12=A_FC->val[9*iPtr_FC+3]; |
173 |
A_FC_22=A_FC->val[9*iPtr_FC+4]; |
174 |
A_FC_32=A_FC->val[9*iPtr_FC+5]; |
175 |
A_FC_13=A_FC->val[9*iPtr_FC+6]; |
176 |
A_FC_23=A_FC->val[9*iPtr_FC+7]; |
177 |
A_FC_33=A_FC->val[9*iPtr_FC+8]; |
178 |
|
179 |
A_CC->val[9*iPtr_CC_2 ]-=A11*A_FC_11+A12*A_FC_21+A13*A_FC_31; |
180 |
A_CC->val[9*iPtr_CC_2+1]-=A21*A_FC_11+A22*A_FC_21+A23*A_FC_31; |
181 |
A_CC->val[9*iPtr_CC_2+2]-=A31*A_FC_11+A32*A_FC_21+A33*A_FC_31; |
182 |
A_CC->val[9*iPtr_CC_2+3]-=A11*A_FC_12+A12*A_FC_22+A13*A_FC_32; |
183 |
A_CC->val[9*iPtr_CC_2+4]-=A21*A_FC_12+A22*A_FC_22+A23*A_FC_32; |
184 |
A_CC->val[9*iPtr_CC_2+5]-=A31*A_FC_12+A32*A_FC_22+A33*A_FC_32; |
185 |
A_CC->val[9*iPtr_CC_2+6]-=A11*A_FC_13+A12*A_FC_23+A13*A_FC_33; |
186 |
A_CC->val[9*iPtr_CC_2+7]-=A21*A_FC_13+A22*A_FC_23+A23*A_FC_33; |
187 |
A_CC->val[9*iPtr_CC_2+8]-=A31*A_FC_13+A32*A_FC_23+A33*A_FC_33; |
188 |
|
189 |
} |
190 |
} /* end of iPtr_FC loop */ |
191 |
} /* end of iPtr_CF loop */ |
192 |
} /* end of irow loop */ |
193 |
} |
194 |
} |
195 |
/* |
196 |
* $Log$ |
197 |
* Revision 1.2 2005/09/15 03:44:40 jgs |
198 |
* Merge of development branch dev-02 back to main trunk on 2005-09-15 |
199 |
* |
200 |
* Revision 1.1.2.1 2005/09/05 06:29:50 gross |
201 |
* These files have been extracted from finley to define a stand alone libray for iterative |
202 |
* linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but |
203 |
* has not been tested yet. |
204 |
* |
205 |
* |
206 |
*/ |