27 |
#include "Options.h" |
#include "Options.h" |
28 |
#include "PasoUtil.h" |
#include "PasoUtil.h" |
29 |
#include "UMFPACK.h" |
#include "UMFPACK.h" |
30 |
|
#include "MKL.h" |
31 |
|
#include "SystemMatrix.h" |
32 |
#include "Pattern_coupling.h" |
#include "Pattern_coupling.h" |
33 |
|
|
34 |
/**************************************************************/ |
/**************************************************************/ |
85 |
Paso_SparseMatrix * schur=NULL; |
Paso_SparseMatrix * schur=NULL; |
86 |
Paso_SparseMatrix * schur_withFillIn=NULL; |
Paso_SparseMatrix * schur_withFillIn=NULL; |
87 |
double S=0; |
double S=0; |
88 |
|
double time0; |
89 |
|
|
90 |
/*Make sure we have block sizes 1*/ |
/*Make sure we have block sizes 1*/ |
91 |
A_p->pattern->input_block_size=A_p->col_block_size; |
A_p->pattern->input_block_size=A_p->col_block_size; |
121 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
122 |
for (i=0;i<n;++i) mis_marker[i]=-1; |
for (i=0;i<n;++i) mis_marker[i]=-1; |
123 |
|
|
124 |
/*Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);*/ |
time0=Paso_timer(); |
125 |
/*fprintf(stderr,"\n Coarsening method == %d, %d \n",coarsening_method,PASO_RUGE_STUEBEN_COARSENING);*/ |
|
126 |
if (coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) { |
if (coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) { |
127 |
Paso_Pattern_coup(A_p,mis_marker,coarsening_threshold); |
Paso_Pattern_coup(A_p,mis_marker,coarsening_threshold); |
128 |
} |
} |
137 |
Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold); |
Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold); |
138 |
} |
} |
139 |
|
|
140 |
|
time0=Paso_timer()-time0; |
141 |
|
if (verbose) fprintf(stderr,"timing: Coarseneing: %e\n",time0); |
142 |
if (Paso_noError()) { |
if (Paso_noError()) { |
143 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
144 |
for (i = 0; i < n; ++i) counter[i]=mis_marker[i]; |
for (i = 0; i < n; ++i) counter[i]=mis_marker[i]; |
207 |
if (Paso_noError()) { |
if (Paso_noError()) { |
208 |
schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C); |
schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C); |
209 |
/*find the pattern of the schur complement with fill in*/ |
/*find the pattern of the schur complement with fill in*/ |
210 |
|
|
211 |
|
time0=Paso_timer(); |
212 |
schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1); |
schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1); |
213 |
|
time0=Paso_timer()-time0; |
214 |
|
if (verbose) fprintf(stderr,"timing: Fill_in pattern computation: %e\n",time0); |
215 |
|
|
216 |
|
time0=Paso_timer(); |
217 |
/* copy values over*/ |
/* copy values over*/ |
218 |
#pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static) |
#pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static) |
219 |
for (i = 0; i < schur_withFillIn->numRows; ++i) { |
for (i = 0; i < schur_withFillIn->numRows; ++i) { |
220 |
for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) { |
for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) { |
221 |
j=schur_withFillIn->pattern->index[iPtr]; |
j=schur_withFillIn->pattern->index[iPtr]; |
222 |
iPtr_s=schur->pattern->ptr[i]; |
iPtr_s=schur->pattern->ptr[i]; |
223 |
/*schur_withFillIn->val[iPtr]=0.;*/ |
schur_withFillIn->val[iPtr]=0.; |
224 |
index=&(schur->pattern->index[iPtr_s]); |
index=&(schur->pattern->index[iPtr_s]); |
225 |
where_p=(index_t*)bsearch(&j, |
where_p=(index_t*)bsearch(&j, |
226 |
index, |
index, |
232 |
} |
} |
233 |
} |
} |
234 |
} |
} |
235 |
|
time0=Paso_timer()-time0; |
236 |
|
if (verbose) fprintf(stderr,"timing: Copying values for Fill in: %e\n",time0); |
237 |
|
|
238 |
if (Paso_noError()) { |
if (Paso_noError()) { |
239 |
Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC); |
Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC); |
277 |
if (Paso_noError()) { |
if (Paso_noError()) { |
278 |
if (verbose && level>0) { |
if (verbose && level>0) { |
279 |
printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,out->n_C); |
printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,out->n_C); |
|
/*if (level>0 && out->n_F>500) {*/ |
|
|
/* if (out->n_F<500) {*/ |
|
|
/* printf("timing: AMG: MIS/reordering/elemination : %e/%e\n",time0,time1); |
|
|
} else { |
|
|
printf("timing: AMG: MIS: %e\n",time2); |
|
|
}*/ |
|
280 |
} |
} |
281 |
return out; |
return out; |
282 |
} else { |
} else { |
313 |
dim_t i; |
dim_t i; |
314 |
double *r=MEMALLOC(amg->n,double); |
double *r=MEMALLOC(amg->n,double); |
315 |
double *x0=MEMALLOC(amg->n,double); |
double *x0=MEMALLOC(amg->n,double); |
316 |
/*double time0=0;*/ |
double time0=0; |
317 |
|
bool_t verbose=1; |
318 |
|
|
319 |
|
#ifdef MKL |
320 |
|
Paso_SparseMatrix *temp=NULL; |
321 |
|
#endif |
322 |
|
|
323 |
|
|
324 |
if (amg->level==0 || amg->n_C<=500) { |
if (amg->level==0 || amg->n_C<=500) { |
325 |
|
|
326 |
/*if (amg->n_F<=500) {*/ |
/*if (amg->n_F<=500) {*/ |
327 |
/*time0=Paso_timer();*/ |
time0=Paso_timer(); |
328 |
|
|
329 |
|
|
330 |
/*Paso_Solver_solveJacobi(amg->GS,x,b);*/ |
/* Paso_Solver_solveJacobi(amg->GS,x,b);*/ |
331 |
|
|
332 |
/* #pragma omp parallel for private(i) schedule(static) |
/* #pragma omp parallel for private(i) schedule(static) |
333 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
338 |
x[i]+=x0[i]; |
x[i]+=x0[i]; |
339 |
} |
} |
340 |
*/ |
*/ |
341 |
Paso_UMFPACK1(amg->A,x,b,0); |
|
342 |
|
#ifdef MKL |
343 |
/* |
/*dim_t iptr; |
344 |
|
temp=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, amg->A->pattern,1,1); |
345 |
|
#pragma omp parallel for private(i,iptr) schedule(static) |
346 |
|
for (i=0;i<amg->A->numRows;++i) { |
347 |
|
for (iptr=amg->A->pattern->ptr[i];iptr<amg->A->pattern->ptr[i+1]; ++iptr) |
348 |
|
temp->val[iptr]=amg->A->val[iptr]; |
349 |
|
}*/ |
350 |
|
temp=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, amg->A->pattern,1,1); |
351 |
|
#pragma omp parallel for private(i) schedule(static) |
352 |
|
for (i=0;i<amg->A->len;++i) { |
353 |
|
temp->val[i]=amg->A->val[i]; |
354 |
|
} |
355 |
|
Paso_MKL1(temp,x,b,0); |
356 |
|
Paso_SparseMatrix_free(temp); |
357 |
|
#else |
358 |
|
#ifdef UMFPACK |
359 |
|
Paso_UMFPACK1(amg->A,x,b,0); |
360 |
|
#else |
361 |
|
Paso_Solver_solveJacobi(amg->GS,x,b); |
362 |
|
#endif |
363 |
|
#endif |
364 |
|
|
365 |
time0=Paso_timer()-time0; |
time0=Paso_timer()-time0; |
366 |
fprintf(stderr,"timing: DIRECT SOLVER: %e/\n",time0); |
if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n\n\n",time0); |
367 |
*/ |
|
368 |
} else { |
} else { |
369 |
/* presmoothing */ |
/* presmoothing */ |
370 |
|
time0=Paso_timer(); |
371 |
Paso_Solver_solveJacobi(amg->GS,x,b); |
Paso_Solver_solveJacobi(amg->GS,x,b); |
372 |
|
time0=Paso_timer()-time0; |
373 |
|
if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0); |
374 |
/* #pragma omp parallel for private(i) schedule(static) |
/* #pragma omp parallel for private(i) schedule(static) |
375 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
376 |
|
|
384 |
*/ |
*/ |
385 |
/* end of presmoothing */ |
/* end of presmoothing */ |
386 |
|
|
387 |
|
|
388 |
|
time0=Paso_timer(); |
389 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
390 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
391 |
|
|
405 |
/* b_C=b_C-A_CF*x_F */ |
/* b_C=b_C-A_CF*x_F */ |
406 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C); |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C); |
407 |
|
|
408 |
|
time0=Paso_timer()-time0; |
409 |
|
if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0); |
410 |
|
|
411 |
/* x_C=AMG(b_C) */ |
/* x_C=AMG(b_C) */ |
412 |
Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C); |
Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C); |
413 |
|
|
414 |
|
time0=Paso_timer(); |
415 |
/* b_F=b_F-A_FC*x_C */ |
/* b_F=b_F-A_FC*x_C */ |
416 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F); |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F); |
417 |
/* x_F=invA_FF*b_F */ |
/* x_F=invA_FF*b_F */ |
427 |
} |
} |
428 |
} |
} |
429 |
|
|
430 |
/*postsmoothing*/ |
time0=Paso_timer()-time0; |
431 |
|
if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0); |
432 |
|
|
433 |
|
/*postsmoothing*/ |
434 |
|
time0=Paso_timer(); |
435 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
436 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
437 |
|
|
442 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
443 |
for (i=0;i<amg->n;++i) x[i]+=x0[i]; |
for (i=0;i<amg->n;++i) x[i]+=x0[i]; |
444 |
|
|
445 |
|
time0=Paso_timer()-time0; |
446 |
|
if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0); |
447 |
/* |
/* |
448 |
#pragma omp parallel for private(i) schedule(static) |
#pragma omp parallel for private(i) schedule(static) |
449 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
for (i=0;i<amg->n;++i) r[i]=b[i]; |