/[escript]/trunk/paso/src/BlockOps.h
ViewVC logotype

Contents of /trunk/paso/src/BlockOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6144 - (show annotations)
Wed Apr 6 05:25:13 2016 UTC (2 years, 7 months ago) by caltinay
File MIME type: text/plain
File size: 6937 byte(s)
last round of namespacing defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #ifndef __PASO_BLOCKOPS_H__
18 #define __PASO_BLOCKOPS_H__
19
20 #include "Paso.h"
21 #include "PasoException.h"
22
23 #include <cstring> // memcpy
24
25 #ifdef ESYS_HAVE_LAPACK
26 #ifdef ESYS_MKL_LAPACK
27 #include <mkl_lapack.h>
28 #include <mkl_cblas.h>
29 #else
30 extern "C" {
31 #include <clapack.h>
32 #include <cblas.h>
33 }
34 #endif
35 #endif
36
37 namespace paso {
38
39 inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
40 {
41 memcpy((void*)R, (void*)V, N*sizeof(double));
42 }
43
44 /// performs operation R=R-mat*V (V and R are not overlapping) - 2x2
45 inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
46 {
47 const double S1 = V[0];
48 const double S2 = V[1];
49 const double A11 = mat[0];
50 const double A12 = mat[2];
51 const double A21 = mat[1];
52 const double A22 = mat[3];
53 R[0] -= A11 * S1 + A12 * S2;
54 R[1] -= A21 * S1 + A22 * S2;
55 }
56
57 /// performs operation R=R-mat*V (V and R are not overlapping) - 3x3
58 inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
59 {
60 const double S1 = V[0];
61 const double S2 = V[1];
62 const double S3 = V[2];
63 const double A11 = mat[0];
64 const double A21 = mat[1];
65 const double A31 = mat[2];
66 const double A12 = mat[3];
67 const double A22 = mat[4];
68 const double A32 = mat[5];
69 const double A13 = mat[6];
70 const double A23 = mat[7];
71 const double A33 = mat[8];
72 R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
73 R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
74 R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
75 }
76
77 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
78
79 /// performs operation R=R-mat*V (V and R are not overlapping) - NxN
80 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
81 {
82 #ifdef ESYS_HAVE_LAPACK
83 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
84 #else
85 PASO_MISSING_CLAPACK;
86 #endif
87 }
88
89 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
90 {
91 #ifdef ESYS_HAVE_LAPACK
92 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
93 #else
94 PASO_MISSING_CLAPACK;
95 #endif
96 }
97
98 inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
99 {
100 const double A11 = A[0];
101 const double A12 = A[2];
102 const double A21 = A[1];
103 const double A22 = A[3];
104 double D = A11*A22-A12*A21;
105 if (std::abs(D) > 0) {
106 D = 1./D;
107 invA[0] = A22*D;
108 invA[1] = -A21*D;
109 invA[2] = -A12*D;
110 invA[3] = A11*D;
111 } else {
112 *failed = 1;
113 }
114 }
115
116 inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
117 {
118 const double A11 = A[0];
119 const double A21 = A[1];
120 const double A31 = A[2];
121 const double A12 = A[3];
122 const double A22 = A[4];
123 const double A32 = A[5];
124 const double A13 = A[6];
125 const double A23 = A[7];
126 const double A33 = A[8];
127 double D = A11*(A22*A33-A23*A32) +
128 A12*(A31*A23-A21*A33) +
129 A13*(A21*A32-A31*A22);
130 if (std::abs(D) > 0) {
131 D = 1./D;
132 invA[0] = (A22*A33-A23*A32)*D;
133 invA[1] = (A31*A23-A21*A33)*D;
134 invA[2] = (A21*A32-A31*A22)*D;
135 invA[3] = (A13*A32-A12*A33)*D;
136 invA[4] = (A11*A33-A31*A13)*D;
137 invA[5] = (A12*A31-A11*A32)*D;
138 invA[6] = (A12*A23-A13*A22)*D;
139 invA[7] = (A13*A21-A11*A23)*D;
140 invA[8] = (A11*A22-A12*A21)*D;
141 } else {
142 *failed = 1;
143 }
144 }
145
146 /// LU factorization of NxN matrix mat with partial pivoting
147 inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
148 {
149 #ifdef ESYS_HAVE_LAPACK
150 #ifdef ESYS_MKL_LAPACK
151 int res = 0;
152 dgetrf(&N, &N, mat, &N, pivot, &res);
153 if (res != 0)
154 *failed = 1;
155 #else
156 int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
157 if (res != 0)
158 *failed = 1;
159 #endif // ESYS_MKL_LAPACK
160 #else
161 PASO_MISSING_CLAPACK;
162 #endif
163 }
164
165 /// solves system of linear equations A*X=B
166 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
167 {
168 #ifdef ESYS_HAVE_LAPACK
169 #ifdef ESYS_MKL_LAPACK
170 int res = 0;
171 int ONE = 1;
172 dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
173 if (res != 0)
174 *failed = 1;
175 #else
176 int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
177 if (res != 0)
178 *failed = 1;
179 #endif // ESYS_MKL_LAPACK
180 #else
181 PASO_MISSING_CLAPACK;
182 #endif
183 }
184
185 /// inplace matrix vector product - order 2
186 inline void BlockOps_MViP_2(const double* mat, double* V)
187 {
188 const double S1 = V[0];
189 const double S2 = V[1];
190 const double A11 = mat[0];
191 const double A12 = mat[2];
192 const double A21 = mat[1];
193 const double A22 = mat[3];
194 V[0] = A11 * S1 + A12 * S2;
195 V[1] = A21 * S1 + A22 * S2;
196 }
197
198 /// inplace matrix vector product - order 3
199 inline void BlockOps_MViP_3(const double* mat, double* V)
200 {
201 const double S1 = V[0];
202 const double S2 = V[1];
203 const double S3 = V[2];
204 const double A11 = mat[0];
205 const double A21 = mat[1];
206 const double A31 = mat[2];
207 const double A12 = mat[3];
208 const double A22 = mat[4];
209 const double A32 = mat[5];
210 const double A13 = mat[6];
211 const double A23 = mat[7];
212 const double A33 = mat[8];
213 V[0] = A11 * S1 + A12 * S2 + A13 * S3;
214 V[1] = A21 * S1 + A22 * S2 + A23 * S3;
215 V[2] = A31 * S1 + A32 * S2 + A33 * S3;
216 }
217
218 inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
219 index_t* pivot, double* x)
220 {
221 if (n_block == 1) {
222 #pragma omp parallel for
223 for (dim_t i=0; i<n; ++i)
224 x[i] *= D[i];
225 } else if (n_block == 2) {
226 #pragma omp parallel for
227 for (dim_t i=0; i<n; ++i)
228 BlockOps_MViP_2(&D[4*i], &x[2*i]);
229 } else if (n_block == 3) {
230 #pragma omp parallel for
231 for (dim_t i=0; i<n; ++i)
232 BlockOps_MViP_3(&D[9*i], &x[3*i]);
233 } else {
234 int failed = 0;
235 #pragma omp parallel for
236 for (dim_t i=0; i<n; ++i) {
237 const dim_t block_size = n_block*n_block;
238 BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
239 }
240 if (failed > 0) {
241 throw PasoException("BlockOps_solveAll: solution failed.");
242 }
243 }
244 }
245
246 } // namespace paso
247
248 #endif // __PASO_BLOCKOPS_H__
249

  ViewVC Help
Powered by ViewVC 1.1.26