/[escript]/branches/trilinos_from_5897/dudley/src/Assemble_gradient.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/dudley/src/Assemble_gradient.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6079 - (show annotations)
Mon Mar 21 12:22:38 2016 UTC (2 years, 11 months ago) by caltinay
File size: 10636 byte(s)
Big commit - making dudley much more like finley to make it more
managable. Fixed quite a few issues that had been fixed in finley.
Disposed of all ReducedNode/ReducedDOF entities that dudley never supported.
Compiles and passes tests.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "Assemble.h"
18 #include "Util.h"
19
20 // Unless the loops in here get complicated again this file should be compiled
21 // with loop unrolling
22
23 namespace dudley {
24
25 void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
26 escript::Data& grad_data, const escript::Data& data)
27 {
28 if (!nodes || !elements)
29 return;
30
31 const int numComps = data.getDataPointSize();
32 const int NN = elements->numNodes;
33 const bool reducedIntegrationOrder = hasReducedIntegrationOrder(grad_data);
34 const int data_type = data.getFunctionSpace().getTypeCode();
35
36 dim_t numNodes = 0;
37 if (data_type == DUDLEY_NODES) {
38 numNodes = nodes->getNumNodes();
39 } else if (data_type == DUDLEY_DEGREES_OF_FREEDOM) {
40 if (elements->MPIInfo->size > 1) {
41 throw DudleyException("Assemble_gradient: for more than one "
42 "processor DEGREES_OF_FREEDOM data are not accepted as input.");
43 }
44 numNodes = nodes->getNumDegreesOfFreedom();
45 } else {
46 throw DudleyException("Assemble_gradient: Cannot calculate gradient "
47 "of data because of unsuitable input data representation.");
48 }
49
50 ElementFile_Jacobians* jac = elements->borrowJacobians(nodes,
51 reducedIntegrationOrder);
52 const int numDim = jac->numDim;
53 const int numShapesTotal = jac->numShapes;
54 const int numQuad = jac->numQuad;
55 const size_t localGradSize = sizeof(double) * numDim * numQuad * numComps;
56
57 // check the dimensions of data
58 if (!grad_data.numSamplesEqual(numQuad, elements->numElements)) {
59 throw DudleyException("Assemble_gradient: illegal number of samples in gradient Data object");
60 } else if (!data.numSamplesEqual(1, numNodes)) {
61 throw DudleyException("Assemble_gradient: illegal number of samples of input Data object");
62 } else if (numDim * numComps != grad_data.getDataPointSize()) {
63 throw DudleyException("Assemble_gradient: illegal number of components in gradient data object.");
64 } else if (!grad_data.actsExpanded()) {
65 throw DudleyException("Assemble_gradient: expanded Data object is expected for output data.");
66 }
67
68 grad_data.requireWrite();
69 #pragma omp parallel
70 {
71 if (data_type == DUDLEY_NODES) {
72 if (numDim == 1) {
73 const int numShapes = 2;
74 #pragma omp for
75 for (index_t e = 0; e < elements->numElements; e++) {
76 double* grad_data_e = grad_data.getSampleDataRW(e);
77 memset(grad_data_e, 0, localGradSize);
78 for (int s = 0; s < numShapes; s++) {
79 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
80 const double* data_array = data.getSampleDataRO(n);
81 for (int q = 0; q < numQuad; q++) {
82 #pragma ivdep
83 for (int l = 0; l < numComps; l++) {
84 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
85 data_array[l] *
86 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
87 }
88 }
89 }
90 }
91 } else if (numDim == 2) {
92 const int numShapes = 3;
93 #pragma omp for
94 for (index_t e = 0; e < elements->numElements; e++) {
95 double* grad_data_e = grad_data.getSampleDataRW(e);
96 memset(grad_data_e, 0, localGradSize);
97 for (int s = 0; s < numShapes; s++) {
98 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
99 const double* data_array = data.getSampleDataRO(n);
100 for (int q = 0; q < numQuad; q++) {
101 #pragma ivdep
102 for (int l = 0; l < numComps; l++) {
103 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
104 data_array[l] *
105 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
106 grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
107 data_array[l] *
108 jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
109 }
110 }
111 }
112 }
113 } else if (numDim == 3) {
114 const int numShapes = 4;
115 #pragma omp for
116 for (index_t e = 0; e < elements->numElements; e++) {
117 double* grad_data_e = grad_data.getSampleDataRW(e);
118 memset(grad_data_e, 0, localGradSize);
119 for (int s = 0; s < numShapes; s++) {
120 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
121 const double* data_array = data.getSampleDataRO(n);
122 for (int q = 0; q < numQuad; q++) {
123 #pragma ivdep
124 for (int l = 0; l < numComps; l++) {
125 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
126 data_array[l] *
127 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
128 grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
129 data_array[l] *
130 jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
131 grad_data_e[INDEX4(l, 2, q, 0, numComps, numDim, numQuad)] +=
132 data_array[l] *
133 jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
134 }
135 }
136 }
137 }
138 }
139 } else if (data_type == DUDLEY_DEGREES_OF_FREEDOM) {
140 const index_t* target = nodes->borrowTargetDegreesOfFreedom();
141 if (numDim == 1) {
142 const int numShapes = 2;
143 #pragma omp for
144 for (index_t e = 0; e < elements->numElements; e++) {
145 double* grad_data_e = grad_data.getSampleDataRW(e);
146 memset(grad_data_e, 0, localGradSize);
147 for (int s = 0; s < numShapes; s++) {
148 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
149 const double* data_array = data.getSampleDataRO(target[n]);
150 for (int q = 0; q < numQuad; q++) {
151 #pragma ivdep
152 for (int l = 0; l < numComps; l++) {
153 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
154 data_array[l] *
155 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
156 }
157 }
158 }
159 }
160 } else if (numDim == 2) {
161 const int numShapes = 3;
162 #pragma omp for
163 for (index_t e = 0; e < elements->numElements; e++) {
164 double* grad_data_e = grad_data.getSampleDataRW(e);
165 memset(grad_data_e, 0, localGradSize);
166 for (int s = 0; s < numShapes; s++) {
167 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
168 const double* data_array = data.getSampleDataRO(target[n]);
169 for (int q = 0; q < numQuad; q++) {
170 #pragma ivdep
171 for (int l = 0; l < numComps; l++) {
172 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
173 data_array[l] *
174 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
175 grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
176 data_array[l] *
177 jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
178 }
179 }
180 }
181 }
182 } else if (numDim == 3) {
183 const int numShapes = 4;
184 #pragma omp for
185 for (index_t e = 0; e < elements->numElements; e++) {
186 double* grad_data_e = grad_data.getSampleDataRW(e);
187 memset(grad_data_e, 0, localGradSize);
188 for (int s = 0; s < numShapes; s++) {
189 const index_t n = elements->Nodes[INDEX2(s, e, NN)];
190 const double* data_array = data.getSampleDataRO(target[n]);
191 for (int q = 0; q < numQuad; q++) {
192 #pragma ivdep
193 for (int l = 0; l < numComps; l++) {
194 grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
195 data_array[l] *
196 jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
197 grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
198 data_array[l] *
199 jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
200 grad_data_e[INDEX4(l, 2, q, 0, numComps, numDim, numQuad)] +=
201 data_array[l] *
202 jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
203 }
204 }
205 }
206 }
207 }
208 }
209 } // end parallel region
210 }
211
212 } // namespace dudley
213

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/4.0fordebian/dudley/src/Assemble_gradient.cpp:5567-5588 /branches/complex/dudley/src/Assemble_gradient.cpp:5866-5937 /branches/lapack2681/finley/src/Assemble_gradient.cpp:2682-2741 /branches/pasowrap/dudley/src/Assemble_gradient.cpp:3661-3674 /branches/py3_attempt2/dudley/src/Assemble_gradient.cpp:3871-3891 /branches/restext/finley/src/Assemble_gradient.cpp:2610-2624 /branches/ripleygmg_from_3668/dudley/src/Assemble_gradient.cpp:3669-3791 /branches/stage3.0/finley/src/Assemble_gradient.cpp:2569-2590 /branches/symbolic_from_3470/dudley/src/Assemble_gradient.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/dudley/src/Assemble_gradient.cpp:3517-3974 /release/3.0/finley/src/Assemble_gradient.cpp:2591-2601 /release/4.0/dudley/src/Assemble_gradient.cpp:5380-5406 /trunk/dudley/src/Assemble_gradient.cpp:4257-4344,5898-6007 /trunk/ripley/test/python/dudley/src/Assemble_gradient.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26