/[escript]/branches/doubleplusgood/paso/src/BlockOps.h
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/BlockOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4257 - (show annotations)
Wed Feb 27 03:42:40 2013 UTC (6 years, 1 month ago) by jfenwick
File MIME type: text/plain
File size: 5951 byte(s)
Some simple experiments for c++ Finley

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 #ifndef INC_PASO_BLOCKOPS
18 #define INC_PASO_BLOCKOPS
19
20 #include "Common.h"
21 #include "Paso.h"
22
23 #ifdef USE_LAPACK
24 #ifdef MKL_LAPACK
25 #include <mkl_lapack.h>
26 #include <mkl_cblas.h>
27 #else
28 #include <clapack.h>
29 #include <cblas.h>
30 #endif
31 #endif
32
33 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x);
34
35 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
36
37
38 /* copy functions: */
39 #define Paso_BlockOps_Cpy_2(R, V) \
40 {\
41 *(R+0)=*(V+0);\
42 *(R+1)=*(V+1);\
43 }
44
45 #define Paso_BlockOps_Cpy_3(R, V) \
46 {\
47 *(R+0)=*(V+0);\
48 *(R+1)=*(V+1);\
49 *(R+2)=*(V+2);\
50 }
51
52 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
53
54
55 /* operations R=R-MAT*V (V and R are not overlapping) */
56
57 #define Paso_BlockOps_SMV_2(R,MAT, V) \
58 {\
59 register double S1=*(V+0); \
60 register double S2=*(V+1);\
61 register double A11=*(MAT+0);\
62 register double A12=*(MAT+2);\
63 register double A21=*(MAT+1);\
64 register double A22=*(MAT+3);\
65 *(R+0)-=A11 * S1 + A12 * S2;\
66 *(R+1)-=A21 * S1 + A22 * S2;\
67 }
68
69 #define Paso_BlockOps_SMV_3(R, MAT, V) \
70 {\
71 register double S1=*(V+0);\
72 register double S2=*(V+1);\
73 register double S3=*(V+2);\
74 register double A11=*(MAT+0);\
75 register double A21=*(MAT+1);\
76 register double A31=*(MAT+2);\
77 register double A12=*(MAT+3);\
78 register double A22=*(MAT+4);\
79 register double A32=*(MAT+5);\
80 register double A13=*(MAT+6);\
81 register double A23=*(MAT+7);\
82 register double A33=*(MAT+8);\
83 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
84 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
85 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
86 }
87
88
89 /* Paso_BlockOps_SMV_N */
90
91 #ifdef USE_LAPACK
92 #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
93 #else
94 #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
95 #endif
96
97 #ifdef USE_LAPACK
98 #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
99 #else
100 #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
101 #endif
102
103 #define Paso_BlockOps_invM_2(invA, A, failed) \
104 {\
105 register double A11=*(A+0);\
106 register double A12=*(A+2);\
107 register double A21=*(A+1);\
108 register double A22=*(A+3);\
109 register double D = A11*A22-A12*A21; \
110 if (ABS(D) > 0 ){\
111 D=1./D;\
112 *(invA)= A22*D;\
113 *(invA+1)=-A21*D;\
114 *(invA+2)=-A12*D;\
115 *(invA+3)= A11*D;\
116 } else {\
117 *failed=1;\
118 }\
119 }
120
121 #define Paso_BlockOps_invM_3(invA, A, failed) \
122 {\
123 register double A11=*(A+0);\
124 register double A21=*(A+1);\
125 register double A31=*(A+2);\
126 register double A12=*(A+3);\
127 register double A22=*(A+4);\
128 register double A32=*(A+5);\
129 register double A13=*(A+6);\
130 register double A23=*(A+7);\
131 register double A33=*(A+8);\
132 register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
133 if (ABS(D) > 0 ){\
134 D=1./D;\
135 *(invA )=(A22*A33-A23*A32)*D;\
136 *(invA+1)=(A31*A23-A21*A33)*D;\
137 *(invA+2)=(A21*A32-A31*A22)*D;\
138 *(invA+3)=(A13*A32-A12*A33)*D;\
139 *(invA+4)=(A11*A33-A31*A13)*D;\
140 *(invA+5)=(A12*A31-A11*A32)*D;\
141 *(invA+6)=(A12*A23-A13*A22)*D;\
142 *(invA+7)=(A13*A21-A11*A23)*D;\
143 *(invA+8)=(A11*A22-A12*A21)*D;\
144 } else {\
145 *failed=1;\
146 }\
147 }
148
149 #ifdef USE_LAPACK
150 #ifdef MKL_LAPACK
151 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
152 {\
153 int NN=N; \
154 int res =0; \
155 dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
156 if (res!=0) *failed=1;\
157 }
158 #else
159 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
160 {\
161 int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
162 if (res!=0) *failed=1;\
163 }
164 #endif
165 #else
166 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
167 #endif
168
169 #ifdef USE_LAPACK
170 #ifdef MKL_LAPACK
171 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
172 {\
173 int NN=N; \
174 int res =0; \
175 int ONE=1; \
176 dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
177 if (res!=0) *failed=1;\
178 }
179 #else
180 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
181 {\
182 int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
183 if (res!=0) *failed=1;\
184 }
185 #endif
186 #else
187 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
188 #endif
189
190
191 /* inplace matrix vector product */
192
193 #define Paso_BlockOps_MViP_2(MAT, V) \
194 { \
195 register double S1=*(V+0);\
196 register double S2=*(V+1);\
197 register double A11=*(MAT+0);\
198 register double A12=*(MAT+2);\
199 register double A21=*(MAT+1);\
200 register double A22=*(MAT+3);\
201 *(V+0) = A11 * S1 + A12 * S2;\
202 *(V+1) = A21 * S1 + A22 * S2;\
203 }
204
205
206 #define Paso_BlockOps_MViP_3(MAT, V) \
207 { \
208 register double S1=*(V+0);\
209 register double S2=*(V+1);\
210 register double S3=*(V+2);\
211 register double A11=*(MAT+0);\
212 register double A21=*(MAT+1);\
213 register double A31=*(MAT+2);\
214 register double A12=*(MAT+3);\
215 register double A22=*(MAT+4);\
216 register double A32=*(MAT+5);\
217 register double A13=*(MAT+6);\
218 register double A23=*(MAT+7);\
219 register double A33=*(MAT+8);\
220 *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
221 *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
222 *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\
223 }
224
225
226 #endif /* #INC_Paso_BlockOpsOPS */

  ViewVC Help
Powered by ViewVC 1.1.26