1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2008 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: GS preconditioner with reordering */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */ |
22 |
/* Author: artak@uq.edu.au */ |
23 |
|
24 |
/**************************************************************/ |
25 |
|
26 |
#include "Paso.h" |
27 |
#include "Solver.h" |
28 |
#include "PasoUtil.h" |
29 |
|
30 |
#include <stdio.h> |
31 |
|
32 |
/**************************************************************/ |
33 |
|
34 |
/* free all memory used by GS */ |
35 |
|
36 |
void Paso_Solver_GS_free(Paso_Solver_GS * in) { |
37 |
if (in!=NULL) { |
38 |
MEMFREE(in->colorOf); |
39 |
Paso_SparseMatrix_free(in->factors); |
40 |
MEMFREE(in->diag); |
41 |
MEMFREE(in->main_iptr); |
42 |
Paso_Pattern_free(in->pattern); |
43 |
MEMFREE(in); |
44 |
} |
45 |
} |
46 |
|
47 |
/**************************************************************/ |
48 |
|
49 |
/* constructs the incomplete block factorization of |
50 |
|
51 |
*/ |
52 |
Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) { |
53 |
dim_t n=A->numRows; |
54 |
dim_t n_block=A->row_block_size; |
55 |
dim_t block_size=A->block_size; |
56 |
index_t num_colors=0, *mis_marker=NULL; |
57 |
register index_t i,iptr_main,iPtr; |
58 |
double time0,time_color,time_fac; |
59 |
/* allocations: */ |
60 |
Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS); |
61 |
if (Paso_checkPtr(out)) return NULL; |
62 |
out->colorOf=MEMALLOC(n,index_t); |
63 |
out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double); |
64 |
/*out->diag=MEMALLOC(A->len,double);*/ |
65 |
out->main_iptr=MEMALLOC(n,index_t); |
66 |
out->pattern=Paso_Pattern_getReference(A->pattern); |
67 |
out->factors=Paso_SparseMatrix_getReference(A); |
68 |
out->n_block=n_block; |
69 |
out->n=n; |
70 |
|
71 |
if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) { |
72 |
time0=Paso_timer(); |
73 |
Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf); |
74 |
time_color=Paso_timer()-time0; |
75 |
|
76 |
if (Paso_noError()) { |
77 |
time0=Paso_timer(); |
78 |
|
79 |
if (! (Paso_checkPtr(out->diag))) { |
80 |
if (n_block==1) { |
81 |
#pragma omp parallel for private(i, iPtr) schedule(static) |
82 |
for (i = 0; i < A->pattern->numOutput; i++) { |
83 |
out->diag[i]=1.; |
84 |
/* find main diagonal */ |
85 |
for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) { |
86 |
if (A->pattern->index[iPtr]==i) { |
87 |
iptr_main=iPtr; |
88 |
out->diag[i]=A->val[iPtr]; |
89 |
break; |
90 |
} |
91 |
} |
92 |
out->main_iptr[i]=iptr_main; |
93 |
} |
94 |
} else if (n_block==2) { |
95 |
#pragma omp parallel for private(i, iPtr) schedule(static) |
96 |
for (i = 0; i < A->pattern->numOutput; i++) { |
97 |
out->diag[i*4+0]= 1.; |
98 |
out->diag[i*4+1]= 0.; |
99 |
out->diag[i*4+2]= 0.; |
100 |
out->diag[i*4+3]= 1.; |
101 |
/* find main diagonal */ |
102 |
for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) { |
103 |
if (A->pattern->index[iPtr]==i) { |
104 |
iptr_main=iPtr; |
105 |
out->diag[i*4]= A->val[iPtr*4]; |
106 |
out->diag[i*4+1]=A->val[iPtr*4+1]; |
107 |
out->diag[i*4+2]=A->val[iPtr*4+2]; |
108 |
out->diag[i*4+3]= A->val[iPtr*4+3]; |
109 |
break; |
110 |
} |
111 |
} |
112 |
out->main_iptr[i]=iptr_main; |
113 |
} |
114 |
} else if (n_block==3) { |
115 |
#pragma omp parallel for private(i, iPtr) schedule(static) |
116 |
for (i = 0; i < A->pattern->numOutput; i++) { |
117 |
out->diag[i*9 ]=1.; |
118 |
out->diag[i*9+1]=0.; |
119 |
out->diag[i*9+2]=0.; |
120 |
out->diag[i*9+3]=0.; |
121 |
out->diag[i*9+4]=1.; |
122 |
out->diag[i*9+5]=0.; |
123 |
out->diag[i*9+6]=0.; |
124 |
out->diag[i*9+7]=0.; |
125 |
out->diag[i*9+8]=1.; |
126 |
/* find main diagonal */ |
127 |
for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) { |
128 |
if (A->pattern->index[iPtr]==i) { |
129 |
iptr_main=iPtr; |
130 |
out->diag[i*9 ]=A->val[iPtr*9 ]; |
131 |
out->diag[i*9+1]=A->val[iPtr*9+1]; |
132 |
out->diag[i*9+2]=A->val[iPtr*9+2]; |
133 |
out->diag[i*9+3]=A->val[iPtr*9+3]; |
134 |
out->diag[i*9+4]=A->val[iPtr*9+4]; |
135 |
out->diag[i*9+5]=A->val[iPtr*9+5]; |
136 |
out->diag[i*9+6]=A->val[iPtr*9+6]; |
137 |
out->diag[i*9+7]=A->val[iPtr*9+7]; |
138 |
out->diag[i*9+8]=A->val[iPtr*9+8]; |
139 |
break; |
140 |
} |
141 |
} |
142 |
out->main_iptr[i]=iptr_main; |
143 |
} |
144 |
} |
145 |
} |
146 |
|
147 |
time_fac=Paso_timer()-time0; |
148 |
} |
149 |
} |
150 |
if (Paso_noError()) { |
151 |
if (verbose) { |
152 |
printf("GS: %d color used \n",out->num_colors); |
153 |
printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac); |
154 |
} |
155 |
return out; |
156 |
} else { |
157 |
Paso_Solver_GS_free(out); |
158 |
return NULL; |
159 |
} |
160 |
} |
161 |
|
162 |
/**************************************************************/ |
163 |
|
164 |
/* apply GS precondition b-> x |
165 |
|
166 |
in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b |
167 |
|
168 |
should be called within a parallel region |
169 |
barrier synconization should be performed to make sure that the input vector available |
170 |
|
171 |
*/ |
172 |
|
173 |
void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) { |
174 |
register dim_t i,k; |
175 |
register index_t color,iptr_ik,iptr_main; |
176 |
register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31; |
177 |
dim_t n_block=gs->n_block; |
178 |
dim_t n=gs->n; |
179 |
index_t* pivot=NULL; |
180 |
|
181 |
/* copy x into b*/ |
182 |
#pragma omp parallel for private(i) schedule(static) |
183 |
for (i=0;i<n*n_block;++i) x[i]=b[i]; |
184 |
/* forward substitution */ |
185 |
for (color=0;color<gs->num_colors;++color) { |
186 |
if (n_block==1) { |
187 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main) |
188 |
for (i = 0; i < n; ++i) { |
189 |
if (gs->colorOf[i]==color) { |
190 |
/* x_i=x_i-a_ik*x_k */ |
191 |
S1=x[i]; |
192 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
193 |
k=gs->pattern->index[iptr_ik]; |
194 |
if (gs->colorOf[k]<color) { |
195 |
R1=x[k]; |
196 |
S1-=gs->factors->val[iptr_ik]*R1; |
197 |
} |
198 |
} |
199 |
iptr_main=gs->main_iptr[i]; |
200 |
x[i]=(1/gs->factors->val[iptr_main])*S1; |
201 |
} |
202 |
} |
203 |
} else if (n_block==2) { |
204 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2) |
205 |
for (i = 0; i < n; ++i) { |
206 |
if (gs->colorOf[i]==color) { |
207 |
/* x_i=x_i-a_ik*x_k */ |
208 |
S1=x[2*i]; |
209 |
S2=x[2*i+1]; |
210 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
211 |
k=gs->pattern->index[iptr_ik]; |
212 |
if (gs->colorOf[k]<color) { |
213 |
R1=x[2*k]; |
214 |
R2=x[2*k+1]; |
215 |
S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2; |
216 |
S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2; |
217 |
} |
218 |
} |
219 |
iptr_main=gs->main_iptr[i]; |
220 |
A11=gs->factors->val[iptr_main*4]; |
221 |
A21=gs->factors->val[iptr_main*4+1]; |
222 |
A12=gs->factors->val[iptr_main*4+2]; |
223 |
A22=gs->factors->val[iptr_main*4+3]; |
224 |
D = A11*A22-A12*A21; |
225 |
if (ABS(D)>0.) { |
226 |
D=1./D; |
227 |
S11= A22*D; |
228 |
S21=-A21*D; |
229 |
S12=-A12*D; |
230 |
S22= A11*D; |
231 |
x[2*i ]=S11*S1+S12*S2; |
232 |
x[2*i+1]=S21*S1+S22*S2; |
233 |
} else { |
234 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
235 |
} |
236 |
} |
237 |
|
238 |
} |
239 |
} else if (n_block==3) { |
240 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3) |
241 |
for (i = 0; i < n; ++i) { |
242 |
if (gs->colorOf[i]==color) { |
243 |
/* x_i=x_i-a_ik*x_k */ |
244 |
S1=x[3*i]; |
245 |
S2=x[3*i+1]; |
246 |
S3=x[3*i+2]; |
247 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
248 |
k=gs->pattern->index[iptr_ik]; |
249 |
if (gs->colorOf[k]<color) { |
250 |
R1=x[3*k]; |
251 |
R2=x[3*k+1]; |
252 |
R3=x[3*k+2]; |
253 |
S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3; |
254 |
S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3; |
255 |
S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3; |
256 |
} |
257 |
} |
258 |
iptr_main=gs->main_iptr[i]; |
259 |
A11=gs->factors->val[iptr_main*9 ]; |
260 |
A21=gs->factors->val[iptr_main*9+1]; |
261 |
A31=gs->factors->val[iptr_main*9+2]; |
262 |
A12=gs->factors->val[iptr_main*9+3]; |
263 |
A22=gs->factors->val[iptr_main*9+4]; |
264 |
A32=gs->factors->val[iptr_main*9+5]; |
265 |
A13=gs->factors->val[iptr_main*9+6]; |
266 |
A23=gs->factors->val[iptr_main*9+7]; |
267 |
A33=gs->factors->val[iptr_main*9+8]; |
268 |
D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22); |
269 |
if (ABS(D)>0.) { |
270 |
D=1./D; |
271 |
S11=(A22*A33-A23*A32)*D; |
272 |
S21=(A31*A23-A21*A33)*D; |
273 |
S31=(A21*A32-A31*A22)*D; |
274 |
S12=(A13*A32-A12*A33)*D; |
275 |
S22=(A11*A33-A31*A13)*D; |
276 |
S32=(A12*A31-A11*A32)*D; |
277 |
S13=(A12*A23-A13*A22)*D; |
278 |
S23=(A13*A21-A11*A23)*D; |
279 |
S33=(A11*A22-A12*A21)*D; |
280 |
/* a_ik=a_ii^{-1}*a_ik */ |
281 |
x[3*i ]=S11*S1+S12*S2+S13*S3; |
282 |
x[3*i+1]=S21*S1+S22*S2+S23*S3; |
283 |
x[3*i+2]=S31*S1+S32*S2+S33*S3; |
284 |
} else { |
285 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
286 |
} |
287 |
} |
288 |
} |
289 |
} |
290 |
} |
291 |
|
292 |
/* Multipling with diag(A) */ |
293 |
Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x); |
294 |
|
295 |
/* backward substitution */ |
296 |
for (color=(gs->num_colors)-1;color>-1;--color) { |
297 |
if (n_block==1) { |
298 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1) |
299 |
for (i = 0; i < n; ++i) { |
300 |
if (gs->colorOf[i]==color) { |
301 |
/* x_i=x_i-a_ik*x_k */ |
302 |
S1=x[i]; |
303 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
304 |
k=gs->pattern->index[iptr_ik]; |
305 |
if (gs->colorOf[k]>color) { |
306 |
R1=x[k]; |
307 |
S1-=gs->factors->val[iptr_ik]*R1; |
308 |
} |
309 |
} |
310 |
/*x[i]=S1;*/ |
311 |
iptr_main=gs->main_iptr[i]; |
312 |
x[i]=(1/gs->factors->val[iptr_main])*S1; |
313 |
} |
314 |
} |
315 |
} else if (n_block==2) { |
316 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2) |
317 |
for (i = 0; i < n; ++i) { |
318 |
if (gs->colorOf[i]==color) { |
319 |
/* x_i=x_i-a_ik*x_k */ |
320 |
S1=x[2*i]; |
321 |
S2=x[2*i+1]; |
322 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
323 |
k=gs->pattern->index[iptr_ik]; |
324 |
if (gs->colorOf[k]>color) { |
325 |
R1=x[2*k]; |
326 |
R2=x[2*k+1]; |
327 |
S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2; |
328 |
S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2; |
329 |
} |
330 |
} |
331 |
/*x[2*i]=S1; |
332 |
x[2*i+1]=S2;*/ |
333 |
iptr_main=gs->main_iptr[i]; |
334 |
A11=gs->factors->val[iptr_main*4]; |
335 |
A21=gs->factors->val[iptr_main*4+1]; |
336 |
A12=gs->factors->val[iptr_main*4+2]; |
337 |
A22=gs->factors->val[iptr_main*4+3]; |
338 |
D = A11*A22-A12*A21; |
339 |
if (ABS(D)>0.) { |
340 |
D=1./D; |
341 |
S11= A22*D; |
342 |
S21=-A21*D; |
343 |
S12=-A12*D; |
344 |
S22= A11*D; |
345 |
x[2*i ]=S11*S1+S12*S2; |
346 |
x[2*i+1]=S21*S1+S22*S2; |
347 |
} else { |
348 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
349 |
} |
350 |
|
351 |
} |
352 |
} |
353 |
} else if (n_block==3) { |
354 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3) |
355 |
for (i = 0; i < n; ++i) { |
356 |
if (gs->colorOf[i]==color) { |
357 |
/* x_i=x_i-a_ik*x_k */ |
358 |
S1=x[3*i ]; |
359 |
S2=x[3*i+1]; |
360 |
S3=x[3*i+2]; |
361 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
362 |
k=gs->pattern->index[iptr_ik]; |
363 |
if (gs->colorOf[k]>color) { |
364 |
R1=x[3*k]; |
365 |
R2=x[3*k+1]; |
366 |
R3=x[3*k+2]; |
367 |
S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3; |
368 |
S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3; |
369 |
S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3; |
370 |
} |
371 |
} |
372 |
/* x[3*i]=S1; |
373 |
x[3*i+1]=S2; |
374 |
x[3*i+2]=S3; |
375 |
*/ iptr_main=gs->main_iptr[i]; |
376 |
A11=gs->factors->val[iptr_main*9 ]; |
377 |
A21=gs->factors->val[iptr_main*9+1]; |
378 |
A31=gs->factors->val[iptr_main*9+2]; |
379 |
A12=gs->factors->val[iptr_main*9+3]; |
380 |
A22=gs->factors->val[iptr_main*9+4]; |
381 |
A32=gs->factors->val[iptr_main*9+5]; |
382 |
A13=gs->factors->val[iptr_main*9+6]; |
383 |
A23=gs->factors->val[iptr_main*9+7]; |
384 |
A33=gs->factors->val[iptr_main*9+8]; |
385 |
D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22); |
386 |
if (ABS(D)>0.) { |
387 |
D=1./D; |
388 |
S11=(A22*A33-A23*A32)*D; |
389 |
S21=(A31*A23-A21*A33)*D; |
390 |
S31=(A21*A32-A31*A22)*D; |
391 |
S12=(A13*A32-A12*A33)*D; |
392 |
S22=(A11*A33-A31*A13)*D; |
393 |
S32=(A12*A31-A11*A32)*D; |
394 |
S13=(A12*A23-A13*A22)*D; |
395 |
S23=(A13*A21-A11*A23)*D; |
396 |
S33=(A11*A22-A12*A21)*D; |
397 |
x[3*i ]=S11*S1+S12*S2+S13*S3; |
398 |
x[3*i+1]=S21*S1+S22*S2+S23*S3; |
399 |
x[3*i+2]=S31*S1+S32*S2+S33*S3; |
400 |
} else { |
401 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
402 |
} |
403 |
} |
404 |
} |
405 |
} |
406 |
} |
407 |
|
408 |
return; |
409 |
} |
410 |
|
411 |
/**************************************************************/ |
412 |
|
413 |
/* apply GS precondition b-> x |
414 |
|
415 |
in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b |
416 |
|
417 |
should be called within a parallel region |
418 |
barrier synconization should be performed to make sure that the input vector available |
419 |
|
420 |
*/ |
421 |
|
422 |
void Paso_Solver_solveGS1(Paso_Solver_GS * gs, double * x, double * b) { |
423 |
register dim_t i,k; |
424 |
register index_t color,iptr_ik,iptr_main; |
425 |
register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31; |
426 |
dim_t n_block=gs->n_block; |
427 |
dim_t n=gs->n; |
428 |
index_t* pivot=NULL; |
429 |
|
430 |
/* copy x into b*/ |
431 |
#pragma omp parallel for private(i) schedule(static) |
432 |
/*for (i=0;i<n*n_block;++i) x[i]=b[i];*/ |
433 |
|
434 |
/* forward substitution */ |
435 |
for (color=0;color<gs->num_colors;++color) { |
436 |
if (n_block==1) { |
437 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main) |
438 |
for (i = 0; i < n; ++i) { |
439 |
if (gs->colorOf[i]==color) { |
440 |
/* x_i=x_i-a_ik*x_k */ |
441 |
S1=b[i]; |
442 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
443 |
k=gs->pattern->index[iptr_ik]; |
444 |
if (gs->colorOf[k]<color) { |
445 |
R1=x[k]; |
446 |
S1-=gs->factors->val[iptr_ik]*R1; |
447 |
} |
448 |
} |
449 |
iptr_main=gs->main_iptr[i]; |
450 |
x[i]=(1/gs->factors->val[iptr_main])*S1; |
451 |
} |
452 |
} |
453 |
} else if (n_block==2) { |
454 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2) |
455 |
for (i = 0; i < n; ++i) { |
456 |
if (gs->colorOf[i]==color) { |
457 |
/* x_i=x_i-a_ik*x_k */ |
458 |
S1=b[2*i]; |
459 |
S2=b[2*i+1]; |
460 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
461 |
k=gs->pattern->index[iptr_ik]; |
462 |
if (gs->colorOf[k]<color) { |
463 |
R1=x[2*k]; |
464 |
R2=x[2*k+1]; |
465 |
S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2; |
466 |
S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2; |
467 |
} |
468 |
} |
469 |
iptr_main=gs->main_iptr[i]; |
470 |
A11=gs->factors->val[iptr_main*4]; |
471 |
A21=gs->factors->val[iptr_main*4+1]; |
472 |
A12=gs->factors->val[iptr_main*4+2]; |
473 |
A22=gs->factors->val[iptr_main*4+3]; |
474 |
D = A11*A22-A12*A21; |
475 |
if (ABS(D)>0.) { |
476 |
D=1./D; |
477 |
S11= A22*D; |
478 |
S21=-A21*D; |
479 |
S12=-A12*D; |
480 |
S22= A11*D; |
481 |
x[2*i ]=S11*S1+S12*S2; |
482 |
x[2*i+1]=S21*S1+S22*S2; |
483 |
} else { |
484 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
485 |
} |
486 |
} |
487 |
|
488 |
} |
489 |
} else if (n_block==3) { |
490 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3) |
491 |
for (i = 0; i < n; ++i) { |
492 |
if (gs->colorOf[i]==color) { |
493 |
/* x_i=x_i-a_ik*x_k */ |
494 |
S1=b[3*i]; |
495 |
S2=b[3*i+1]; |
496 |
S3=b[3*i+2]; |
497 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
498 |
k=gs->pattern->index[iptr_ik]; |
499 |
if (gs->colorOf[k]<color) { |
500 |
R1=x[3*k]; |
501 |
R2=x[3*k+1]; |
502 |
R3=x[3*k+2]; |
503 |
S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3; |
504 |
S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3; |
505 |
S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3; |
506 |
} |
507 |
} |
508 |
iptr_main=gs->main_iptr[i]; |
509 |
A11=gs->factors->val[iptr_main*9 ]; |
510 |
A21=gs->factors->val[iptr_main*9+1]; |
511 |
A31=gs->factors->val[iptr_main*9+2]; |
512 |
A12=gs->factors->val[iptr_main*9+3]; |
513 |
A22=gs->factors->val[iptr_main*9+4]; |
514 |
A32=gs->factors->val[iptr_main*9+5]; |
515 |
A13=gs->factors->val[iptr_main*9+6]; |
516 |
A23=gs->factors->val[iptr_main*9+7]; |
517 |
A33=gs->factors->val[iptr_main*9+8]; |
518 |
D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22); |
519 |
if (ABS(D)>0.) { |
520 |
D=1./D; |
521 |
S11=(A22*A33-A23*A32)*D; |
522 |
S21=(A31*A23-A21*A33)*D; |
523 |
S31=(A21*A32-A31*A22)*D; |
524 |
S12=(A13*A32-A12*A33)*D; |
525 |
S22=(A11*A33-A31*A13)*D; |
526 |
S32=(A12*A31-A11*A32)*D; |
527 |
S13=(A12*A23-A13*A22)*D; |
528 |
S23=(A13*A21-A11*A23)*D; |
529 |
S33=(A11*A22-A12*A21)*D; |
530 |
/* a_ik=a_ii^{-1}*a_ik */ |
531 |
x[3*i ]=S11*S1+S12*S2+S13*S3; |
532 |
x[3*i+1]=S21*S1+S22*S2+S23*S3; |
533 |
x[3*i+2]=S31*S1+S32*S2+S33*S3; |
534 |
} else { |
535 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
536 |
} |
537 |
} |
538 |
} |
539 |
} |
540 |
} |
541 |
|
542 |
/* Multipling with diag(A) */ |
543 |
Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x); |
544 |
|
545 |
/* backward substitution */ |
546 |
for (color=(gs->num_colors)-1;color>-1;--color) { |
547 |
if (n_block==1) { |
548 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1) |
549 |
for (i = 0; i < n; ++i) { |
550 |
if (gs->colorOf[i]==color) { |
551 |
/* x_i=x_i-a_ik*x_k */ |
552 |
S1=x[i]; |
553 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
554 |
k=gs->pattern->index[iptr_ik]; |
555 |
if (gs->colorOf[k]>color) { |
556 |
R1=x[k]; |
557 |
S1-=gs->factors->val[iptr_ik]*R1; |
558 |
} |
559 |
} |
560 |
/*x[i]=S1;*/ |
561 |
iptr_main=gs->main_iptr[i]; |
562 |
x[i]=(1/gs->factors->val[iptr_main])*S1; |
563 |
} |
564 |
} |
565 |
} else if (n_block==2) { |
566 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2) |
567 |
for (i = 0; i < n; ++i) { |
568 |
if (gs->colorOf[i]==color) { |
569 |
/* x_i=x_i-a_ik*x_k */ |
570 |
S1=x[2*i]; |
571 |
S2=x[2*i+1]; |
572 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
573 |
k=gs->pattern->index[iptr_ik]; |
574 |
if (gs->colorOf[k]>color) { |
575 |
R1=x[2*k]; |
576 |
R2=x[2*k+1]; |
577 |
S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2; |
578 |
S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2; |
579 |
} |
580 |
} |
581 |
/*x[2*i]=S1; |
582 |
x[2*i+1]=S2;*/ |
583 |
iptr_main=gs->main_iptr[i]; |
584 |
A11=gs->factors->val[iptr_main*4]; |
585 |
A21=gs->factors->val[iptr_main*4+1]; |
586 |
A12=gs->factors->val[iptr_main*4+2]; |
587 |
A22=gs->factors->val[iptr_main*4+3]; |
588 |
D = A11*A22-A12*A21; |
589 |
if (ABS(D)>0.) { |
590 |
D=1./D; |
591 |
S11= A22*D; |
592 |
S21=-A21*D; |
593 |
S12=-A12*D; |
594 |
S22= A11*D; |
595 |
x[2*i ]=S11*S1+S12*S2; |
596 |
x[2*i+1]=S21*S1+S22*S2; |
597 |
} else { |
598 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
599 |
} |
600 |
|
601 |
} |
602 |
} |
603 |
} else if (n_block==3) { |
604 |
#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3) |
605 |
for (i = 0; i < n; ++i) { |
606 |
if (gs->colorOf[i]==color) { |
607 |
/* x_i=x_i-a_ik*x_k */ |
608 |
S1=x[3*i ]; |
609 |
S2=x[3*i+1]; |
610 |
S3=x[3*i+2]; |
611 |
for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) { |
612 |
k=gs->pattern->index[iptr_ik]; |
613 |
if (gs->colorOf[k]>color) { |
614 |
R1=x[3*k]; |
615 |
R2=x[3*k+1]; |
616 |
R3=x[3*k+2]; |
617 |
S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3; |
618 |
S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3; |
619 |
S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3; |
620 |
} |
621 |
} |
622 |
/* x[3*i]=S1; |
623 |
x[3*i+1]=S2; |
624 |
x[3*i+2]=S3; |
625 |
*/ iptr_main=gs->main_iptr[i]; |
626 |
A11=gs->factors->val[iptr_main*9 ]; |
627 |
A21=gs->factors->val[iptr_main*9+1]; |
628 |
A31=gs->factors->val[iptr_main*9+2]; |
629 |
A12=gs->factors->val[iptr_main*9+3]; |
630 |
A22=gs->factors->val[iptr_main*9+4]; |
631 |
A32=gs->factors->val[iptr_main*9+5]; |
632 |
A13=gs->factors->val[iptr_main*9+6]; |
633 |
A23=gs->factors->val[iptr_main*9+7]; |
634 |
A33=gs->factors->val[iptr_main*9+8]; |
635 |
D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22); |
636 |
if (ABS(D)>0.) { |
637 |
D=1./D; |
638 |
S11=(A22*A33-A23*A32)*D; |
639 |
S21=(A31*A23-A21*A33)*D; |
640 |
S31=(A21*A32-A31*A22)*D; |
641 |
S12=(A13*A32-A12*A33)*D; |
642 |
S22=(A11*A33-A31*A13)*D; |
643 |
S32=(A12*A31-A11*A32)*D; |
644 |
S13=(A12*A23-A13*A22)*D; |
645 |
S23=(A13*A21-A11*A23)*D; |
646 |
S33=(A11*A22-A12*A21)*D; |
647 |
x[3*i ]=S11*S1+S12*S2+S13*S3; |
648 |
x[3*i+1]=S21*S1+S22*S2+S23*S3; |
649 |
x[3*i+2]=S31*S1+S32*S2+S33*S3; |
650 |
} else { |
651 |
Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block."); |
652 |
} |
653 |
} |
654 |
} |
655 |
} |
656 |
} |
657 |
|
658 |
return; |
659 |
} |
660 |
|