154 |
omegaDenumtr = 0.0; |
omegaDenumtr = 0.0; |
155 |
} |
} |
156 |
#pragma omp barrier |
#pragma omp barrier |
|
#pragma ivdep |
|
157 |
#pragma omp for private(i0) reduction(+:sum_1) schedule(static) |
#pragma omp for private(i0) reduction(+:sum_1) schedule(static) |
158 |
for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0]; |
for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0]; |
159 |
rho = sum_1; |
rho = sum_1; |
163 |
|
|
164 |
if (num_iter > 1) { |
if (num_iter > 1) { |
165 |
beta = rho / rho1 * (alpha / omega); |
beta = rho / rho1 * (alpha / omega); |
|
#pragma ivdep |
|
166 |
#pragma omp for private(i0) schedule(static) |
#pragma omp for private(i0) schedule(static) |
167 |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]); |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]); |
168 |
} else { |
} else { |
|
#pragma ivdep |
|
169 |
#pragma omp for private(i0) schedule(static) |
#pragma omp for private(i0) schedule(static) |
170 |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0]; |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0]; |
171 |
} |
} |
175 |
Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]); |
Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]); |
176 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]); |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]); |
177 |
|
|
|
// #pragma ivdep |
|
178 |
#pragma omp for private(i0) reduction(+:sum_2) schedule(static) |
#pragma omp for private(i0) reduction(+:sum_2) schedule(static) |
179 |
for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0]; |
for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0]; |
180 |
if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) { |
if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) { |
181 |
alpha = rho / sum_2; |
alpha = rho / sum_2; |
182 |
|
|
|
// #pragma ivdep |
|
183 |
#pragma omp for private(i0) reduction(+:sum_3) schedule(static) |
#pragma omp for private(i0) reduction(+:sum_3) schedule(static) |
184 |
for (i0 = 0; i0 < n; i0++) { |
for (i0 = 0; i0 < n; i0++) { |
185 |
r[i0] -= alpha * v[i0]; |
r[i0] -= alpha * v[i0]; |
190 |
|
|
191 |
/* Early check for tolerance. */ |
/* Early check for tolerance. */ |
192 |
if ( (convergeFlag = (norm_of_residual <= tol)) ) { |
if ( (convergeFlag = (norm_of_residual <= tol)) ) { |
|
// #pragma ivdep |
|
193 |
#pragma omp for private(i0) schedule(static) |
#pragma omp for private(i0) schedule(static) |
194 |
for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0]; |
for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0]; |
195 |
maxIterFlag = FALSE; |
maxIterFlag = FALSE; |
199 |
Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]); |
Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]); |
200 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]); |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]); |
201 |
|
|
|
// #pragma ivdep |
|
202 |
#pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static) |
#pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static) |
203 |
for (i0 = 0; i0 < n; i0++) { |
for (i0 = 0; i0 < n; i0++) { |
204 |
omegaNumtr +=t[i0] * s[i0]; |
omegaNumtr +=t[i0] * s[i0]; |
207 |
if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) { |
if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) { |
208 |
omega = omegaNumtr / omegaDenumtr; |
omega = omegaNumtr / omegaDenumtr; |
209 |
|
|
|
// #pragma ivdep |
|
210 |
#pragma omp for private(i0) reduction(+:sum_4) schedule(static) |
#pragma omp for private(i0) reduction(+:sum_4) schedule(static) |
211 |
for (i0 = 0; i0 < n; i0++) { |
for (i0 = 0; i0 < n; i0++) { |
212 |
x[i0] += alpha * phat[i0] + omega * shat[i0]; |
x[i0] += alpha * phat[i0] + omega * shat[i0]; |