/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Annotation of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (hide annotations)
Fri Jul 11 13:12:46 2008 UTC (11 years ago) by phornby
File MIME type: text/plain
File size: 15752 byte(s)

Merge in /branches/windows_from_1456_trunk_1620_merged_in branch.

You will find a preserved pre-merge trunk in tags under tags/trunk_at_1625.
That will be useful for diffing & checking on my stupidity.

Here is a list of the conflicts and their resolution at this
point in time.


=================================================================================
(LLWS == looks like white space).

finley/src/Assemble_addToSystemMatrix.c - resolve to branch - unused var. may be wrong.....
finley/src/CPPAdapter/SystemMatrixAdapter.cpp - resolve to branch - LLWS
finley/src/CPPAdapter/MeshAdapter.cpp - resolve to branch - LLWS
paso/src/PCG.c - resolve to branch - unused var fixes.
paso/src/SolverFCT.c - resolve to branch - LLWS
paso/src/FGMRES.c - resolve to branch - LLWS
paso/src/Common.h - resolve to trunk version. It's omp.h's include... not sure it's needed,
but for the sake of saftey.....
paso/src/Functions.c - resolve to branch version, indentation/tab removal and return error
on bad unimplemented Paso_FunctionCall.
paso/src/SolverFCT_solve.c - resolve to branch version, unused vars
paso/src/SparseMatrix_MatrixVector.c - resolve to branch version, unused vars.
escript/src/Utils.cpp - resloved to branch, needs WinSock2.h
escript/src/DataExpanded.cpp - resolved to branch version - LLWS
escript/src/DataFactory.cpp - resolve to branch version
=================================================================================

This currently passes tests on linux (debian), but is not checked on windows or Altix yet.

This checkin is to make a trunk I can check out for windows to do tests on it.

Known outstanding problem is in the operator=() method of exceptions
causing warning messages on the intel compilers.

May the God of doughnuts have mercy on my soul.


1 gross 1363 /* $Id:$ */
2    
3     /*******************************************************
4     *
5 gross 1407 * Copyright 2007,2008 by University of Queensland
6 gross 1363 *
7 gross 1407 * http://esscc.uq.edu_m.au
8 gross 1363 * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /**************************************************************/
15    
16     /* Paso: Flux correction transport solver
17     *
18 gross 1407 * solves Mu_t=Ku+q
19 gross 1570 * u(0) >=0
20 gross 1363 *
21 gross 1407 * Warning: the program assums sum_{j} k_{ij}=0!!!
22 gross 1363 *
23     */
24     /**************************************************************/
25    
26 gross 1407 /* Author: l.gross@uq.edu_m.au */
27 gross 1363
28     /**************************************************************/
29    
30     #include "Paso.h"
31     #include "Solver.h"
32     #include "SolverFCT.h"
33     #include "escript/blocktimer.h"
34 gross 1370 #ifdef _OPENMP
35     #include <omp.h>
36     #endif
37     #ifdef PASO_MPI
38     #include <mpi.h>
39     #endif
40 gross 1363
41 gross 1407 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
42     {
43     dim_t i, n_rows;
44     double dt_max, dt_max_loc;
45     register double rtmp1,rtmp2;
46     n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
47     if (! fctp->valid_matrices) {
48     fctp->dt_max=LARGE_POSITIVE_FLOAT;
49 gross 1363 /* extract the row sum of the advective part */
50 gross 1407 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
51 gross 1363
52 gross 1407 /* set low order transport operator */
53     Paso_FCTransportProblem_setLowOrderOperator(fctp);
54     /*
55 gross 1370 * calculate time step size:
56 gross 1407 */
57     dt_max=LARGE_POSITIVE_FLOAT;
58 gross 1370 if (fctp->theta < 1.) {
59 gross 1407 #pragma omp parallel private(dt_max_loc)
60 gross 1370 {
61 gross 1407 dt_max_loc=LARGE_POSITIVE_FLOAT;
62     #pragma omp for schedule(static) private(i,rtmp1,rtmp2)
63 gross 1372 for (i=0;i<n_rows;++i) {
64 gross 1407 rtmp1=fctp->main_diagonal_low_order_transport_matrix[i];
65 gross 1370 rtmp2=fctp->lumped_mass_matrix[i];
66 gross 1513 if ( (rtmp1<0 && rtmp2>0.) || (rtmp1>0 && rtmp2<0.) ) {
67 gross 1407 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1);
68 gross 1370 }
69     }
70 gross 1372 #pragma omp critical
71 gross 1370 {
72 gross 1407 dt_max=MIN(dt_max,dt_max_loc);
73 gross 1370 }
74     }
75     #ifdef PASO_MPI
76 gross 1407 dt_max_loc = dt_max;
77     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
78     #endif
79     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
80     }
81     if (dt_max <= 0.) {
82 gross 1400 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
83 gross 1407 } else {
84     if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
85 gross 1370 }
86 gross 1407 fctp->dt_max=dt_max;
87 gross 1370 fctp->valid_matrices=Paso_noError();
88 gross 1363 }
89 gross 1407 return fctp->dt_max;
90     }
91 gross 1363
92 gross 1407
93    
94    
95     void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
96 phornby 1628 index_t i;
97     long n_substeps,n, m;
98 gross 1407 double dt_max, omega, dt2,t;
99 phornby 1628 double local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
100 gross 1407 register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp;
101 gross 1562 double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL,*u_m=NULL;
102     Paso_Coupler* QN_n_coupler=NULL, *QP_n_coupler=NULL, *uTilde_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *u_m_coupler=NULL;
103 gross 1407 Paso_SystemMatrix *flux_matrix=NULL;
104     dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
105     bool_t converged;
106 gross 1562 dim_t max_m=500;
107     double inner_tol=1.e-2;
108     dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
109 gross 1407 if (dt<=0.) {
110     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
111     }
112     dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
113 gross 1400 /*
114 gross 1407 * allocate memory
115 gross 1400 *
116     */
117 gross 1562 u_m=TMPMEMALLOC(n_rows,double);
118     Paso_checkPtr(u_m);
119 gross 1407 b_n=TMPMEMALLOC(n_rows,double);
120     Paso_checkPtr(b_n);
121     sourceP=TMPMEMALLOC(n_rows,double);
122     Paso_checkPtr(sourceP);
123     sourceN=TMPMEMALLOC(n_rows,double);
124     Paso_checkPtr(sourceN);
125     uTilde_n=TMPMEMALLOC(n_rows,double);
126     Paso_checkPtr(uTilde_n);
127     QN_n=TMPMEMALLOC(n_rows,double);
128     Paso_checkPtr(QN_n);
129     QP_n=TMPMEMALLOC(n_rows,double);
130     Paso_checkPtr(QP_n);
131     RN_m=TMPMEMALLOC(n_rows,double);
132     Paso_checkPtr(RN_m);
133     RP_m=TMPMEMALLOC(n_rows,double);
134     Paso_checkPtr(RP_m);
135     z_m=TMPMEMALLOC(n_rows,double);
136     Paso_checkPtr(z_m);
137     du_m=TMPMEMALLOC(n_rows,double);
138     Paso_checkPtr(du_m);
139 gross 1562 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
140     QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
141     RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
142     RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
143     uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
144     u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
145 gross 1407 flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
146     fctp->transport_matrix->pattern,
147     fctp->transport_matrix->row_block_size,
148     fctp->transport_matrix->col_block_size);
149 gross 1400 if (Paso_noError()) {
150     /*
151     * Preparation:
152     *
153     */
154    
155     /* decide on substepping */
156 gross 1407 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
157 phornby 1628 n_substeps=(long)ceil(dt/dt_max);
158 gross 1400 } else {
159 gross 1407 n_substeps=1;
160 gross 1400 }
161     dt2=dt/n_substeps;
162 gross 1407 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max);
163     /*
164     * seperate source into positive and negative part:
165     */
166     #pragma omp parallel for private(i,rtmp)
167     for (i = 0; i < n_rows; ++i) {
168     rtmp=source[i];
169     if (rtmp <0) {
170 gross 1410 sourceN[i]=-rtmp;
171     sourceP[i]=0;
172 gross 1407 } else {
173 gross 1410 sourceN[i]= 0;
174     sourceP[i]= rtmp;
175 gross 1400 }
176 gross 1407 }
177 gross 1562 u_m_coupler->data=u_m;
178 gross 1410 /* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */
179 gross 1407 /*
180 gross 1410 * let the show begin!!!!
181 gross 1407 *
182     */
183 gross 1400 t=dt2;
184     n=0;
185     tolerance=options->tolerance;
186     while(n<n_substeps && Paso_noError()) {
187 gross 1562
188 gross 1400 printf("substep step %d at t=%e\n",n+1,t);
189 gross 1407 /*
190 gross 1410 * b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i]
191 gross 1407 *
192     * note that iteration_matrix stores the negative values of the
193 gross 1410 * low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used.
194 gross 1407 *
195     */
196 gross 1562 Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,fctp->u_coupler,
197 gross 1410 -dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP);
198 gross 1407 /*
199     * uTilde_n[i]=b[i]/m[i]
200     *
201 gross 1562 * fctp->iteration_matrix[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
202 gross 1407 *
203     */
204     if (fctp->theta > 0) {
205     Paso_solve_free(fctp->iteration_matrix);
206 gross 1410 omega=1./(dt2*fctp->theta);
207     rtmp2=dt2*omega;
208 gross 1407 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4)
209     for (i = 0; i < n_rows; ++i) {
210     rtmp=fctp->lumped_mass_matrix[i];
211     if (ABS(rtmp)>0.) {
212     rtmp3=b_n[i]/rtmp;
213     } else {
214 gross 1562 rtmp3=fctp->u[i];
215 gross 1407 }
216 gross 1410 rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i];
217 gross 1407 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3;
218     fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
219     uTilde_n[i]=rtmp3;
220 gross 1400 }
221     } else {
222 gross 1407 #pragma omp parallel for private(i,rtmp,rtmp3)
223     for (i = 0; i < n_rows; ++i) {
224     rtmp=fctp->lumped_mass_matrix[i];
225     if (ABS(rtmp)>0.) {
226     rtmp3=b_n[i]/rtmp;
227     } else {
228 gross 1562 rtmp3=fctp->u[i];
229 gross 1407 }
230     uTilde_n[i]=rtmp3;
231     }
232 gross 1410 omega=1.;
233 gross 1562 /* no update of iteration_matrix required! */
234 gross 1407 } /* end (fctp->theta > 0) */
235 gross 1562
236     /* distribute uTilde_n: */
237     Paso_Coupler_startCollect(uTilde_n_coupler,uTilde_n);
238     Paso_Coupler_finishCollect(uTilde_n_coupler);
239 gross 1407 /*
240     * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
241     * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
242     *
243     */
244 gross 1562 Paso_SolverFCT_setQs(uTilde_n_coupler,QN_n,QP_n,fctp->iteration_matrix);
245     Paso_Coupler_startCollect(QN_n_coupler,QN_n);
246     Paso_Coupler_startCollect(QP_n_coupler,QP_n);
247     Paso_Coupler_finishCollect(QN_n_coupler);
248     Paso_Coupler_finishCollect(QP_n_coupler);
249 gross 1407 /*
250 gross 1562 * now we enter the iteration on a time-step:
251 gross 1407 *
252     */
253 gross 1562 Paso_Coupler_copyAll(fctp->u_coupler, u_m_coupler);
254     /* REPLACE BY JACOBEAN FREE NEWTON */
255 gross 1407 m=0;
256     converged=FALSE;
257 gross 1562 while ( (!converged) && (m<max_m) && Paso_noError()) {
258 gross 1407 printf("iteration step %d\n",m+1);
259     /*
260     * set the ant diffusion fluxes:
261     *
262 gross 1562 */
263     Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u_m_coupler);
264 gross 1407 /*
265     * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
266     *
267     */
268 gross 1562 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n_coupler);
269 gross 1407 /*
270 gross 1562 * set flux limiters RN_m,RP_m
271 gross 1407 *
272     */
273 gross 1562 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n_coupler,QP_n_coupler,RN_m,RP_m);
274     Paso_Coupler_startCollect(RN_m_coupler,RN_m);
275     Paso_Coupler_startCollect(RP_m_coupler,RP_m);
276 gross 1407 /*
277 gross 1410 * z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i])
278 gross 1407 *
279     * note that iteration_matrix stores the negative values of the
280 gross 1410 * low order transport matrix l therefore a=dt2*fctp->theta is used.
281 gross 1407 */
282 gross 1410
283 gross 1562 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,
284 gross 1410 dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN);
285 gross 1562 /* for (i = 0; i < n_rows; ++i) {
286     * printf("%d: %e %e\n",i,z_m[i], b_n[i]);
287     * }
288     */
289 gross 1410 #pragma omp parallel for private(i)
290     for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i];
291 gross 1407
292 gross 1562 Paso_Coupler_finishCollect(RN_m_coupler);
293     Paso_Coupler_finishCollect(RP_m_coupler);
294     /* add corrected fluxes into z_m */
295     Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
296     /*
297 gross 1410 * now we solve the linear system to get the correction dt2:
298 gross 1407 *
299 gross 1562 */
300     if (fctp->theta > 0) {
301 gross 1407 /* set the right hand side of the linear system: */
302 gross 1562 options->tolerance=inner_tol;
303     options->verbose=TRUE;
304 gross 1407 Paso_solve(fctp->iteration_matrix,du_m,z_m,options);
305     /* TODO: check errors ! */
306     } else {
307     #pragma omp parallel for private(i,rtmp,rtmp1)
308     for (i = 0; i < n_rows; ++i) {
309     rtmp=fctp->lumped_mass_matrix[i];
310     if (ABS(rtmp)>0.) {
311 gross 1410 rtmp1=z_m[i]/rtmp;
312 gross 1407 } else {
313     rtmp1=0;
314     }
315     du_m[i]=rtmp1;
316     }
317     }
318     /*
319     * update u and calculate norm of du_m and the new u:
320     *
321     */
322     norm_u=0.;
323     norm_du=0.;
324     #pragma omp parallel private(local_norm_u,local_norm_du)
325     {
326     local_norm_u=0.;
327     local_norm_du=0.;
328     #pragma omp for schedule(static) private(i)
329     for (i=0;i<n_rows;++i) {
330 gross 1562 u_m[i]+=omega*du_m[i];
331     local_norm_u=MAX(local_norm_u,ABS(u_m[i]));
332 gross 1407 local_norm_du=MAX(local_norm_du,ABS(du_m[i]));
333     }
334     #pragma omp critical
335     {
336     norm_u=MAX(norm_u,local_norm_u);
337     norm_du=MAX(norm_du,local_norm_du);
338     }
339     }
340 gross 1562 Paso_Coupler_startCollect(u_m_coupler,u_m);
341 gross 1407 #ifdef PASO_MPI
342     local_norm[0]=norm_u;
343     local_norm[1]=norm_du;
344 ksteube 1415 MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
345 gross 1407 norm_u=norm[0];
346     norm_du=norm[1];
347     #endif
348     norm_du*=omega;
349     converged=(norm_du <= tolerance * norm_u);
350     m++;
351     printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du);
352     /* TODO: check if du_m has been redu_mced */
353 gross 1562 Paso_Coupler_finishCollect(u_m_coupler);
354 gross 1410 } /* end of inner iteration */
355 gross 1562 SWAP(fctp->u,u_m,double*);
356     SWAP(fctp->u_coupler,u_m_coupler,Paso_Coupler*);
357 gross 1410 n++;
358 gross 1407 } /* end of time integration */
359 gross 1562 options->tolerance=tolerance;
360 gross 1407 #pragma omp parallel for schedule(static) private(i)
361 gross 1570 for (i=0;i<n_rows;++i) u[i]=fctp->u[i];
362 gross 1370 }
363 gross 1407 /*
364     * clean-up:
365 gross 1400 *
366     */
367 gross 1407 TMPMEMFREE(b_n);
368     Paso_SystemMatrix_free(flux_matrix);
369     TMPMEMFREE(sourceP);
370     TMPMEMFREE(sourceN);
371     TMPMEMFREE(uTilde_n);
372     TMPMEMFREE(QN_n);
373     TMPMEMFREE(QP_n);
374     TMPMEMFREE(RN_m);
375     TMPMEMFREE(RP_m);
376     TMPMEMFREE(z_m);
377     TMPMEMFREE(du_m);
378 gross 1562 TMPMEMFREE(u_m);
379     Paso_Coupler_free(QN_n_coupler);
380     Paso_Coupler_free(QP_n_coupler);
381     Paso_Coupler_free(RN_m_coupler);
382     Paso_Coupler_free(RP_m_coupler);
383     Paso_Coupler_free(uTilde_n_coupler);
384     Paso_Coupler_free(u_m_coupler);
385 gross 1375 }

  ViewVC Help
Powered by ViewVC 1.1.26