1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2009 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
#include "DataLazy.h" |
16 |
#ifdef USE_NETCDF |
17 |
#include <netcdfcpp.h> |
18 |
#endif |
19 |
#ifdef PASO_MPI |
20 |
#include <mpi.h> |
21 |
#endif |
22 |
#ifdef _OPENMP |
23 |
#include <omp.h> |
24 |
#endif |
25 |
#include "FunctionSpace.h" |
26 |
#include "DataTypes.h" |
27 |
#include "Data.h" |
28 |
#include "UnaryFuncs.h" // for escript::fsign |
29 |
#include "Utils.h" |
30 |
|
31 |
#include "EscriptParams.h" |
32 |
|
33 |
#include <iomanip> // for some fancy formatting in debug |
34 |
|
35 |
// #define LAZYDEBUG(X) if (privdebug){X;} |
36 |
#define LAZYDEBUG(X) |
37 |
namespace |
38 |
{ |
39 |
bool privdebug=false; |
40 |
|
41 |
#define ENABLEDEBUG privdebug=true; |
42 |
#define DISABLEDEBUG privdebug=false; |
43 |
} |
44 |
|
45 |
// #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();} |
46 |
|
47 |
#define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();} |
48 |
|
49 |
|
50 |
/* |
51 |
How does DataLazy work? |
52 |
~~~~~~~~~~~~~~~~~~~~~~~ |
53 |
|
54 |
Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally |
55 |
denoted left and right. |
56 |
|
57 |
A special operation, IDENTITY, stores an instance of DataReady in the m_id member. |
58 |
This means that all "internal" nodes in the structure are instances of DataLazy. |
59 |
|
60 |
Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ... |
61 |
Note that IDENITY is not considered a unary operation. |
62 |
|
63 |
I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a). |
64 |
It must however form a DAG (directed acyclic graph). |
65 |
I will refer to individual DataLazy objects with the structure as nodes. |
66 |
|
67 |
Each node also stores: |
68 |
- m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was |
69 |
evaluated. |
70 |
- m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to |
71 |
evaluate the expression. |
72 |
- m_samplesize ~ the number of doubles stored in a sample. |
73 |
|
74 |
When a new node is created, the above values are computed based on the values in the child nodes. |
75 |
Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples. |
76 |
|
77 |
The resolve method, which produces a DataReady from a DataLazy, does the following: |
78 |
1) Create a DataReady to hold the new result. |
79 |
2) Allocate a vector (v) big enough to hold m_buffsrequired samples. |
80 |
3) For each sample, call resolveSample with v, to get its values and copy them into the result object. |
81 |
|
82 |
(In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.) |
83 |
|
84 |
resolveSample returns a Vector* and an offset within that vector where the result is stored. |
85 |
Normally, this would be v, but for identity nodes their internal vector is returned instead. |
86 |
|
87 |
The convention that I use, is that the resolve methods should store their results starting at the offset they are passed. |
88 |
|
89 |
For expressions which evaluate to Constant or Tagged, there is a different evaluation method. |
90 |
The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression. |
91 |
|
92 |
To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example): |
93 |
1) Add to the ES_optype. |
94 |
2) determine what opgroup your operation belongs to (X) |
95 |
3) add a string for the op to the end of ES_opstrings |
96 |
4) increase ES_opcount |
97 |
5) add an entry (X) to opgroups |
98 |
6) add an entry to the switch in collapseToReady |
99 |
7) add an entry to resolveX |
100 |
*/ |
101 |
|
102 |
|
103 |
using namespace std; |
104 |
using namespace boost; |
105 |
|
106 |
namespace escript |
107 |
{ |
108 |
|
109 |
namespace |
110 |
{ |
111 |
|
112 |
|
113 |
// enabling this will print out when ever the maximum stacksize used by resolve increases |
114 |
// it assumes _OPENMP is also in use |
115 |
//#define LAZY_STACK_PROF |
116 |
|
117 |
|
118 |
|
119 |
#ifndef _OPENMP |
120 |
#ifdef LAZY_STACK_PROF |
121 |
#undef LAZY_STACK_PROF |
122 |
#endif |
123 |
#endif |
124 |
|
125 |
|
126 |
#ifdef LAZY_STACK_PROF |
127 |
std::vector<void*> stackstart(getNumberOfThreads()); |
128 |
std::vector<void*> stackend(getNumberOfThreads()); |
129 |
size_t maxstackuse=0; |
130 |
#endif |
131 |
|
132 |
enum ES_opgroup |
133 |
{ |
134 |
G_UNKNOWN, |
135 |
G_IDENTITY, |
136 |
G_BINARY, // pointwise operations with two arguments |
137 |
G_UNARY, // pointwise operations with one argument |
138 |
G_UNARY_P, // pointwise operations with one argument, requiring a parameter |
139 |
G_NP1OUT, // non-pointwise op with one output |
140 |
G_NP1OUT_P, // non-pointwise op with one output requiring a parameter |
141 |
G_TENSORPROD, // general tensor product |
142 |
G_NP1OUT_2P, // non-pointwise op with one output requiring two params |
143 |
G_REDUCTION // non-pointwise unary op with a scalar output |
144 |
}; |
145 |
|
146 |
|
147 |
|
148 |
|
149 |
string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^", |
150 |
"sin","cos","tan", |
151 |
"asin","acos","atan","sinh","cosh","tanh","erf", |
152 |
"asinh","acosh","atanh", |
153 |
"log10","log","sign","abs","neg","pos","exp","sqrt", |
154 |
"1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0", |
155 |
"symmetric","nonsymmetric", |
156 |
"prod", |
157 |
"transpose", "trace", |
158 |
"swapaxes", |
159 |
"minval", "maxval"}; |
160 |
int ES_opcount=43; |
161 |
ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY, |
162 |
G_UNARY,G_UNARY,G_UNARY, //10 |
163 |
G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17 |
164 |
G_UNARY,G_UNARY,G_UNARY, // 20 |
165 |
G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28 |
166 |
G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35 |
167 |
G_NP1OUT,G_NP1OUT, |
168 |
G_TENSORPROD, |
169 |
G_NP1OUT_P, G_NP1OUT_P, |
170 |
G_NP1OUT_2P, |
171 |
G_REDUCTION, G_REDUCTION}; |
172 |
inline |
173 |
ES_opgroup |
174 |
getOpgroup(ES_optype op) |
175 |
{ |
176 |
return opgroups[op]; |
177 |
} |
178 |
|
179 |
// return the FunctionSpace of the result of "left op right" |
180 |
FunctionSpace |
181 |
resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op) |
182 |
{ |
183 |
// perhaps this should call interpolate and throw or something? |
184 |
// maybe we need an interpolate node - |
185 |
// that way, if interpolate is required in any other op we can just throw a |
186 |
// programming error exception. |
187 |
|
188 |
FunctionSpace l=left->getFunctionSpace(); |
189 |
FunctionSpace r=right->getFunctionSpace(); |
190 |
if (l!=r) |
191 |
{ |
192 |
if (r.probeInterpolation(l)) |
193 |
{ |
194 |
return l; |
195 |
} |
196 |
if (l.probeInterpolation(r)) |
197 |
{ |
198 |
return r; |
199 |
} |
200 |
throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+"."); |
201 |
} |
202 |
return l; |
203 |
} |
204 |
|
205 |
// return the shape of the result of "left op right" |
206 |
// the shapes resulting from tensor product are more complex to compute so are worked out elsewhere |
207 |
DataTypes::ShapeType |
208 |
resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op) |
209 |
{ |
210 |
if (left->getShape()!=right->getShape()) |
211 |
{ |
212 |
if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT)) |
213 |
{ |
214 |
throw DataException("Shapes not the name - shapes must match for (point)binary operations."); |
215 |
} |
216 |
|
217 |
if (left->getRank()==0) // we need to allow scalar * anything |
218 |
{ |
219 |
return right->getShape(); |
220 |
} |
221 |
if (right->getRank()==0) |
222 |
{ |
223 |
return left->getShape(); |
224 |
} |
225 |
throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data."); |
226 |
} |
227 |
return left->getShape(); |
228 |
} |
229 |
|
230 |
// return the shape for "op left" |
231 |
|
232 |
DataTypes::ShapeType |
233 |
resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset) |
234 |
{ |
235 |
switch(op) |
236 |
{ |
237 |
case TRANS: |
238 |
{ // for the scoping of variables |
239 |
const DataTypes::ShapeType& s=left->getShape(); |
240 |
DataTypes::ShapeType sh; |
241 |
int rank=left->getRank(); |
242 |
if (axis_offset<0 || axis_offset>rank) |
243 |
{ |
244 |
stringstream e; |
245 |
e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank; |
246 |
throw DataException(e.str()); |
247 |
} |
248 |
for (int i=0; i<rank; i++) |
249 |
{ |
250 |
int index = (axis_offset+i)%rank; |
251 |
sh.push_back(s[index]); // Append to new shape |
252 |
} |
253 |
return sh; |
254 |
} |
255 |
break; |
256 |
case TRACE: |
257 |
{ |
258 |
int rank=left->getRank(); |
259 |
if (rank<2) |
260 |
{ |
261 |
throw DataException("Trace can only be computed for objects with rank 2 or greater."); |
262 |
} |
263 |
if ((axis_offset>rank-2) || (axis_offset<0)) |
264 |
{ |
265 |
throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive."); |
266 |
} |
267 |
if (rank==2) |
268 |
{ |
269 |
return DataTypes::scalarShape; |
270 |
} |
271 |
else if (rank==3) |
272 |
{ |
273 |
DataTypes::ShapeType sh; |
274 |
if (axis_offset==0) |
275 |
{ |
276 |
sh.push_back(left->getShape()[2]); |
277 |
} |
278 |
else // offset==1 |
279 |
{ |
280 |
sh.push_back(left->getShape()[0]); |
281 |
} |
282 |
return sh; |
283 |
} |
284 |
else if (rank==4) |
285 |
{ |
286 |
DataTypes::ShapeType sh; |
287 |
const DataTypes::ShapeType& s=left->getShape(); |
288 |
if (axis_offset==0) |
289 |
{ |
290 |
sh.push_back(s[2]); |
291 |
sh.push_back(s[3]); |
292 |
} |
293 |
else if (axis_offset==1) |
294 |
{ |
295 |
sh.push_back(s[0]); |
296 |
sh.push_back(s[3]); |
297 |
} |
298 |
else // offset==2 |
299 |
{ |
300 |
sh.push_back(s[0]); |
301 |
sh.push_back(s[1]); |
302 |
} |
303 |
return sh; |
304 |
} |
305 |
else // unknown rank |
306 |
{ |
307 |
throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object."); |
308 |
} |
309 |
} |
310 |
break; |
311 |
default: |
312 |
throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+"."); |
313 |
} |
314 |
} |
315 |
|
316 |
DataTypes::ShapeType |
317 |
SwapShape(DataAbstract_ptr left, const int axis0, const int axis1) |
318 |
{ |
319 |
// This code taken from the Data.cpp swapaxes() method |
320 |
// Some of the checks are probably redundant here |
321 |
int axis0_tmp,axis1_tmp; |
322 |
const DataTypes::ShapeType& s=left->getShape(); |
323 |
DataTypes::ShapeType out_shape; |
324 |
// Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset] |
325 |
// which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0) |
326 |
int rank=left->getRank(); |
327 |
if (rank<2) { |
328 |
throw DataException("Error - Data::swapaxes argument must have at least rank 2."); |
329 |
} |
330 |
if (axis0<0 || axis0>rank-1) { |
331 |
stringstream e; |
332 |
e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1); |
333 |
throw DataException(e.str()); |
334 |
} |
335 |
if (axis1<0 || axis1>rank-1) { |
336 |
stringstream e; |
337 |
e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1); |
338 |
throw DataException(e.str()); |
339 |
} |
340 |
if (axis0 == axis1) { |
341 |
throw DataException("Error - Data::swapaxes: axis indices must be different."); |
342 |
} |
343 |
if (axis0 > axis1) { |
344 |
axis0_tmp=axis1; |
345 |
axis1_tmp=axis0; |
346 |
} else { |
347 |
axis0_tmp=axis0; |
348 |
axis1_tmp=axis1; |
349 |
} |
350 |
for (int i=0; i<rank; i++) { |
351 |
if (i == axis0_tmp) { |
352 |
out_shape.push_back(s[axis1_tmp]); |
353 |
} else if (i == axis1_tmp) { |
354 |
out_shape.push_back(s[axis0_tmp]); |
355 |
} else { |
356 |
out_shape.push_back(s[i]); |
357 |
} |
358 |
} |
359 |
return out_shape; |
360 |
} |
361 |
|
362 |
|
363 |
// determine the output shape for the general tensor product operation |
364 |
// the additional parameters return information required later for the product |
365 |
// the majority of this code is copy pasted from C_General_Tensor_Product |
366 |
DataTypes::ShapeType |
367 |
GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR) |
368 |
{ |
369 |
|
370 |
// Get rank and shape of inputs |
371 |
int rank0 = left->getRank(); |
372 |
int rank1 = right->getRank(); |
373 |
const DataTypes::ShapeType& shape0 = left->getShape(); |
374 |
const DataTypes::ShapeType& shape1 = right->getShape(); |
375 |
|
376 |
// Prepare for the loops of the product and verify compatibility of shapes |
377 |
int start0=0, start1=0; |
378 |
if (transpose == 0) {} |
379 |
else if (transpose == 1) { start0 = axis_offset; } |
380 |
else if (transpose == 2) { start1 = rank1-axis_offset; } |
381 |
else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); } |
382 |
|
383 |
if (rank0<axis_offset) |
384 |
{ |
385 |
throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset"); |
386 |
} |
387 |
|
388 |
// Adjust the shapes for transpose |
389 |
DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather |
390 |
DataTypes::ShapeType tmpShape1(rank1); // than using push_back |
391 |
for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; } |
392 |
for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; } |
393 |
|
394 |
// Prepare for the loops of the product |
395 |
SL=1, SM=1, SR=1; |
396 |
for (int i=0; i<rank0-axis_offset; i++) { |
397 |
SL *= tmpShape0[i]; |
398 |
} |
399 |
for (int i=rank0-axis_offset; i<rank0; i++) { |
400 |
if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) { |
401 |
throw DataException("C_GeneralTensorProduct: Error - incompatible shapes"); |
402 |
} |
403 |
SM *= tmpShape0[i]; |
404 |
} |
405 |
for (int i=axis_offset; i<rank1; i++) { |
406 |
SR *= tmpShape1[i]; |
407 |
} |
408 |
|
409 |
// Define the shape of the output (rank of shape is the sum of the loop ranges below) |
410 |
DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset); |
411 |
{ // block to limit the scope of out_index |
412 |
int out_index=0; |
413 |
for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z |
414 |
for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z |
415 |
} |
416 |
|
417 |
if (shape2.size()>ESCRIPT_MAX_DATA_RANK) |
418 |
{ |
419 |
ostringstream os; |
420 |
os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << "."; |
421 |
throw DataException(os.str()); |
422 |
} |
423 |
|
424 |
return shape2; |
425 |
} |
426 |
|
427 |
} // end anonymous namespace |
428 |
|
429 |
|
430 |
|
431 |
// Return a string representing the operation |
432 |
const std::string& |
433 |
opToString(ES_optype op) |
434 |
{ |
435 |
if (op<0 || op>=ES_opcount) |
436 |
{ |
437 |
op=UNKNOWNOP; |
438 |
} |
439 |
return ES_opstrings[op]; |
440 |
} |
441 |
|
442 |
void DataLazy::LazyNodeSetup() |
443 |
{ |
444 |
#ifdef _OPENMP |
445 |
int numthreads=omp_get_max_threads(); |
446 |
m_samples.resize(numthreads*m_samplesize); |
447 |
m_sampleids=new int[numthreads]; |
448 |
for (int i=0;i<numthreads;++i) |
449 |
{ |
450 |
m_sampleids[i]=-1; |
451 |
} |
452 |
#else |
453 |
m_samples.resize(m_samplesize); |
454 |
m_sampleids=new int[1]; |
455 |
m_sampleids[0]=-1; |
456 |
#endif // _OPENMP |
457 |
} |
458 |
|
459 |
|
460 |
// Creates an identity node |
461 |
DataLazy::DataLazy(DataAbstract_ptr p) |
462 |
: parent(p->getFunctionSpace(),p->getShape()) |
463 |
,m_sampleids(0), |
464 |
m_samples(1) |
465 |
{ |
466 |
if (p->isLazy()) |
467 |
{ |
468 |
// I don't want identity of Lazy. |
469 |
// Question: Why would that be so bad? |
470 |
// Answer: We assume that the child of ID is something we can call getVector on |
471 |
throw DataException("Programmer error - attempt to create identity from a DataLazy."); |
472 |
} |
473 |
else |
474 |
{ |
475 |
p->makeLazyShared(); |
476 |
DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p); |
477 |
makeIdentity(dr); |
478 |
LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;) |
479 |
} |
480 |
LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;) |
481 |
} |
482 |
|
483 |
DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op) |
484 |
: parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape), |
485 |
m_op(op), |
486 |
m_axis_offset(0), |
487 |
m_transpose(0), |
488 |
m_SL(0), m_SM(0), m_SR(0) |
489 |
{ |
490 |
if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION)) |
491 |
{ |
492 |
throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations."); |
493 |
} |
494 |
|
495 |
DataLazy_ptr lleft; |
496 |
if (!left->isLazy()) |
497 |
{ |
498 |
lleft=DataLazy_ptr(new DataLazy(left)); |
499 |
} |
500 |
else |
501 |
{ |
502 |
lleft=dynamic_pointer_cast<DataLazy>(left); |
503 |
} |
504 |
m_readytype=lleft->m_readytype; |
505 |
m_left=lleft; |
506 |
m_samplesize=getNumDPPSample()*getNoValues(); |
507 |
m_children=m_left->m_children+1; |
508 |
m_height=m_left->m_height+1; |
509 |
LazyNodeSetup(); |
510 |
SIZELIMIT |
511 |
} |
512 |
|
513 |
|
514 |
// In this constructor we need to consider interpolation |
515 |
DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op) |
516 |
: parent(resultFS(left,right,op), resultShape(left,right,op)), |
517 |
m_op(op), |
518 |
m_SL(0), m_SM(0), m_SR(0) |
519 |
{ |
520 |
LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;) |
521 |
if ((getOpgroup(op)!=G_BINARY)) |
522 |
{ |
523 |
throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations."); |
524 |
} |
525 |
|
526 |
if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated |
527 |
{ |
528 |
FunctionSpace fs=getFunctionSpace(); |
529 |
Data ltemp(left); |
530 |
Data tmp(ltemp,fs); |
531 |
left=tmp.borrowDataPtr(); |
532 |
} |
533 |
if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated |
534 |
{ |
535 |
Data tmp(Data(right),getFunctionSpace()); |
536 |
right=tmp.borrowDataPtr(); |
537 |
LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;) |
538 |
} |
539 |
left->operandCheck(*right); |
540 |
|
541 |
if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required |
542 |
{ |
543 |
m_left=dynamic_pointer_cast<DataLazy>(left); |
544 |
LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;) |
545 |
} |
546 |
else |
547 |
{ |
548 |
m_left=DataLazy_ptr(new DataLazy(left)); |
549 |
LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;) |
550 |
} |
551 |
if (right->isLazy()) |
552 |
{ |
553 |
m_right=dynamic_pointer_cast<DataLazy>(right); |
554 |
LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;) |
555 |
} |
556 |
else |
557 |
{ |
558 |
m_right=DataLazy_ptr(new DataLazy(right)); |
559 |
LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;) |
560 |
} |
561 |
char lt=m_left->m_readytype; |
562 |
char rt=m_right->m_readytype; |
563 |
if (lt=='E' || rt=='E') |
564 |
{ |
565 |
m_readytype='E'; |
566 |
} |
567 |
else if (lt=='T' || rt=='T') |
568 |
{ |
569 |
m_readytype='T'; |
570 |
} |
571 |
else |
572 |
{ |
573 |
m_readytype='C'; |
574 |
} |
575 |
m_samplesize=getNumDPPSample()*getNoValues(); |
576 |
m_children=m_left->m_children+m_right->m_children+2; |
577 |
m_height=max(m_left->m_height,m_right->m_height)+1; |
578 |
LazyNodeSetup(); |
579 |
SIZELIMIT |
580 |
LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;) |
581 |
} |
582 |
|
583 |
DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose) |
584 |
: parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)), |
585 |
m_op(op), |
586 |
m_axis_offset(axis_offset), |
587 |
m_transpose(transpose) |
588 |
{ |
589 |
if ((getOpgroup(op)!=G_TENSORPROD)) |
590 |
{ |
591 |
throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters."); |
592 |
} |
593 |
if ((transpose>2) || (transpose<0)) |
594 |
{ |
595 |
throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2"); |
596 |
} |
597 |
if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated |
598 |
{ |
599 |
FunctionSpace fs=getFunctionSpace(); |
600 |
Data ltemp(left); |
601 |
Data tmp(ltemp,fs); |
602 |
left=tmp.borrowDataPtr(); |
603 |
} |
604 |
if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated |
605 |
{ |
606 |
Data tmp(Data(right),getFunctionSpace()); |
607 |
right=tmp.borrowDataPtr(); |
608 |
} |
609 |
// left->operandCheck(*right); |
610 |
|
611 |
if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required |
612 |
{ |
613 |
m_left=dynamic_pointer_cast<DataLazy>(left); |
614 |
} |
615 |
else |
616 |
{ |
617 |
m_left=DataLazy_ptr(new DataLazy(left)); |
618 |
} |
619 |
if (right->isLazy()) |
620 |
{ |
621 |
m_right=dynamic_pointer_cast<DataLazy>(right); |
622 |
} |
623 |
else |
624 |
{ |
625 |
m_right=DataLazy_ptr(new DataLazy(right)); |
626 |
} |
627 |
char lt=m_left->m_readytype; |
628 |
char rt=m_right->m_readytype; |
629 |
if (lt=='E' || rt=='E') |
630 |
{ |
631 |
m_readytype='E'; |
632 |
} |
633 |
else if (lt=='T' || rt=='T') |
634 |
{ |
635 |
m_readytype='T'; |
636 |
} |
637 |
else |
638 |
{ |
639 |
m_readytype='C'; |
640 |
} |
641 |
m_samplesize=getNumDPPSample()*getNoValues(); |
642 |
m_children=m_left->m_children+m_right->m_children+2; |
643 |
m_height=max(m_left->m_height,m_right->m_height)+1; |
644 |
LazyNodeSetup(); |
645 |
SIZELIMIT |
646 |
LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;) |
647 |
} |
648 |
|
649 |
|
650 |
DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset) |
651 |
: parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)), |
652 |
m_op(op), |
653 |
m_axis_offset(axis_offset), |
654 |
m_transpose(0), |
655 |
m_tol(0) |
656 |
{ |
657 |
if ((getOpgroup(op)!=G_NP1OUT_P)) |
658 |
{ |
659 |
throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters."); |
660 |
} |
661 |
DataLazy_ptr lleft; |
662 |
if (!left->isLazy()) |
663 |
{ |
664 |
lleft=DataLazy_ptr(new DataLazy(left)); |
665 |
} |
666 |
else |
667 |
{ |
668 |
lleft=dynamic_pointer_cast<DataLazy>(left); |
669 |
} |
670 |
m_readytype=lleft->m_readytype; |
671 |
m_left=lleft; |
672 |
m_samplesize=getNumDPPSample()*getNoValues(); |
673 |
m_children=m_left->m_children+1; |
674 |
m_height=m_left->m_height+1; |
675 |
LazyNodeSetup(); |
676 |
SIZELIMIT |
677 |
LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;) |
678 |
} |
679 |
|
680 |
DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol) |
681 |
: parent(left->getFunctionSpace(), left->getShape()), |
682 |
m_op(op), |
683 |
m_axis_offset(0), |
684 |
m_transpose(0), |
685 |
m_tol(tol) |
686 |
{ |
687 |
if ((getOpgroup(op)!=G_UNARY_P)) |
688 |
{ |
689 |
throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters."); |
690 |
} |
691 |
DataLazy_ptr lleft; |
692 |
if (!left->isLazy()) |
693 |
{ |
694 |
lleft=DataLazy_ptr(new DataLazy(left)); |
695 |
} |
696 |
else |
697 |
{ |
698 |
lleft=dynamic_pointer_cast<DataLazy>(left); |
699 |
} |
700 |
m_readytype=lleft->m_readytype; |
701 |
m_left=lleft; |
702 |
m_samplesize=getNumDPPSample()*getNoValues(); |
703 |
m_children=m_left->m_children+1; |
704 |
m_height=m_left->m_height+1; |
705 |
LazyNodeSetup(); |
706 |
SIZELIMIT |
707 |
LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;) |
708 |
} |
709 |
|
710 |
|
711 |
DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1) |
712 |
: parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)), |
713 |
m_op(op), |
714 |
m_axis_offset(axis0), |
715 |
m_transpose(axis1), |
716 |
m_tol(0) |
717 |
{ |
718 |
if ((getOpgroup(op)!=G_NP1OUT_2P)) |
719 |
{ |
720 |
throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters."); |
721 |
} |
722 |
DataLazy_ptr lleft; |
723 |
if (!left->isLazy()) |
724 |
{ |
725 |
lleft=DataLazy_ptr(new DataLazy(left)); |
726 |
} |
727 |
else |
728 |
{ |
729 |
lleft=dynamic_pointer_cast<DataLazy>(left); |
730 |
} |
731 |
m_readytype=lleft->m_readytype; |
732 |
m_left=lleft; |
733 |
m_samplesize=getNumDPPSample()*getNoValues(); |
734 |
m_children=m_left->m_children+1; |
735 |
m_height=m_left->m_height+1; |
736 |
LazyNodeSetup(); |
737 |
SIZELIMIT |
738 |
LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;) |
739 |
} |
740 |
|
741 |
DataLazy::~DataLazy() |
742 |
{ |
743 |
delete[] m_sampleids; |
744 |
} |
745 |
|
746 |
|
747 |
/* |
748 |
\brief Evaluates the expression using methods on Data. |
749 |
This does the work for the collapse method. |
750 |
For reasons of efficiency do not call this method on DataExpanded nodes. |
751 |
*/ |
752 |
DataReady_ptr |
753 |
DataLazy::collapseToReady() |
754 |
{ |
755 |
if (m_readytype=='E') |
756 |
{ // this is more an efficiency concern than anything else |
757 |
throw DataException("Programmer Error - do not use collapse on Expanded data."); |
758 |
} |
759 |
if (m_op==IDENTITY) |
760 |
{ |
761 |
return m_id; |
762 |
} |
763 |
DataReady_ptr pleft=m_left->collapseToReady(); |
764 |
Data left(pleft); |
765 |
Data right; |
766 |
if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD)) |
767 |
{ |
768 |
right=Data(m_right->collapseToReady()); |
769 |
} |
770 |
Data result; |
771 |
switch(m_op) |
772 |
{ |
773 |
case ADD: |
774 |
result=left+right; |
775 |
break; |
776 |
case SUB: |
777 |
result=left-right; |
778 |
break; |
779 |
case MUL: |
780 |
result=left*right; |
781 |
break; |
782 |
case DIV: |
783 |
result=left/right; |
784 |
break; |
785 |
case SIN: |
786 |
result=left.sin(); |
787 |
break; |
788 |
case COS: |
789 |
result=left.cos(); |
790 |
break; |
791 |
case TAN: |
792 |
result=left.tan(); |
793 |
break; |
794 |
case ASIN: |
795 |
result=left.asin(); |
796 |
break; |
797 |
case ACOS: |
798 |
result=left.acos(); |
799 |
break; |
800 |
case ATAN: |
801 |
result=left.atan(); |
802 |
break; |
803 |
case SINH: |
804 |
result=left.sinh(); |
805 |
break; |
806 |
case COSH: |
807 |
result=left.cosh(); |
808 |
break; |
809 |
case TANH: |
810 |
result=left.tanh(); |
811 |
break; |
812 |
case ERF: |
813 |
result=left.erf(); |
814 |
break; |
815 |
case ASINH: |
816 |
result=left.asinh(); |
817 |
break; |
818 |
case ACOSH: |
819 |
result=left.acosh(); |
820 |
break; |
821 |
case ATANH: |
822 |
result=left.atanh(); |
823 |
break; |
824 |
case LOG10: |
825 |
result=left.log10(); |
826 |
break; |
827 |
case LOG: |
828 |
result=left.log(); |
829 |
break; |
830 |
case SIGN: |
831 |
result=left.sign(); |
832 |
break; |
833 |
case ABS: |
834 |
result=left.abs(); |
835 |
break; |
836 |
case NEG: |
837 |
result=left.neg(); |
838 |
break; |
839 |
case POS: |
840 |
// it doesn't mean anything for delayed. |
841 |
// it will just trigger a deep copy of the lazy object |
842 |
throw DataException("Programmer error - POS not supported for lazy data."); |
843 |
break; |
844 |
case EXP: |
845 |
result=left.exp(); |
846 |
break; |
847 |
case SQRT: |
848 |
result=left.sqrt(); |
849 |
break; |
850 |
case RECIP: |
851 |
result=left.oneOver(); |
852 |
break; |
853 |
case GZ: |
854 |
result=left.wherePositive(); |
855 |
break; |
856 |
case LZ: |
857 |
result=left.whereNegative(); |
858 |
break; |
859 |
case GEZ: |
860 |
result=left.whereNonNegative(); |
861 |
break; |
862 |
case LEZ: |
863 |
result=left.whereNonPositive(); |
864 |
break; |
865 |
case NEZ: |
866 |
result=left.whereNonZero(m_tol); |
867 |
break; |
868 |
case EZ: |
869 |
result=left.whereZero(m_tol); |
870 |
break; |
871 |
case SYM: |
872 |
result=left.symmetric(); |
873 |
break; |
874 |
case NSYM: |
875 |
result=left.nonsymmetric(); |
876 |
break; |
877 |
case PROD: |
878 |
result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose); |
879 |
break; |
880 |
case TRANS: |
881 |
result=left.transpose(m_axis_offset); |
882 |
break; |
883 |
case TRACE: |
884 |
result=left.trace(m_axis_offset); |
885 |
break; |
886 |
case SWAP: |
887 |
result=left.swapaxes(m_axis_offset, m_transpose); |
888 |
break; |
889 |
case MINVAL: |
890 |
result=left.minval(); |
891 |
break; |
892 |
case MAXVAL: |
893 |
result=left.minval(); |
894 |
break; |
895 |
default: |
896 |
throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+"."); |
897 |
} |
898 |
return result.borrowReadyPtr(); |
899 |
} |
900 |
|
901 |
/* |
902 |
\brief Converts the DataLazy into an IDENTITY storing the value of the expression. |
903 |
This method uses the original methods on the Data class to evaluate the expressions. |
904 |
For this reason, it should not be used on DataExpanded instances. (To do so would defeat |
905 |
the purpose of using DataLazy in the first place). |
906 |
*/ |
907 |
void |
908 |
DataLazy::collapse() |
909 |
{ |
910 |
if (m_op==IDENTITY) |
911 |
{ |
912 |
return; |
913 |
} |
914 |
if (m_readytype=='E') |
915 |
{ // this is more an efficiency concern than anything else |
916 |
throw DataException("Programmer Error - do not use collapse on Expanded data."); |
917 |
} |
918 |
m_id=collapseToReady(); |
919 |
m_op=IDENTITY; |
920 |
} |
921 |
|
922 |
|
923 |
|
924 |
|
925 |
|
926 |
|
927 |
#define PROC_OP(TYPE,X) \ |
928 |
for (int j=0;j<onumsteps;++j)\ |
929 |
{\ |
930 |
for (int i=0;i<numsteps;++i,resultp+=resultStep) \ |
931 |
{ \ |
932 |
LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\ |
933 |
LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\ |
934 |
tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \ |
935 |
LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \ |
936 |
lroffset+=leftstep; \ |
937 |
rroffset+=rightstep; \ |
938 |
}\ |
939 |
lroffset+=oleftstep;\ |
940 |
rroffset+=orightstep;\ |
941 |
} |
942 |
|
943 |
|
944 |
// The result will be stored in m_samples |
945 |
// The return value is a pointer to the DataVector, offset is the offset within the return value |
946 |
const DataTypes::ValueType* |
947 |
DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) |
948 |
{ |
949 |
LAZYDEBUG(cout << "Resolve sample " << toString() << endl;) |
950 |
// collapse so we have a 'E' node or an IDENTITY for some other type |
951 |
if (m_readytype!='E' && m_op!=IDENTITY) |
952 |
{ |
953 |
collapse(); |
954 |
} |
955 |
if (m_op==IDENTITY) |
956 |
{ |
957 |
const ValueType& vec=m_id->getVectorRO(); |
958 |
roffset=m_id->getPointOffset(sampleNo, 0); |
959 |
#ifdef LAZY_STACK_PROF |
960 |
int x; |
961 |
if (&x<stackend[omp_get_thread_num()]) |
962 |
{ |
963 |
stackend[omp_get_thread_num()]=&x; |
964 |
} |
965 |
#endif |
966 |
return &(vec); |
967 |
} |
968 |
if (m_readytype!='E') |
969 |
{ |
970 |
throw DataException("Programmer Error - Collapse did not produce an expanded node."); |
971 |
} |
972 |
if (m_sampleids[tid]==sampleNo) |
973 |
{ |
974 |
roffset=tid*m_samplesize; |
975 |
return &(m_samples); // sample is already resolved |
976 |
} |
977 |
m_sampleids[tid]=sampleNo; |
978 |
switch (getOpgroup(m_op)) |
979 |
{ |
980 |
case G_UNARY: |
981 |
case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset); |
982 |
case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset); |
983 |
case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset); |
984 |
case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset); |
985 |
case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset); |
986 |
case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset); |
987 |
case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset); |
988 |
default: |
989 |
throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+"."); |
990 |
} |
991 |
} |
992 |
|
993 |
const DataTypes::ValueType* |
994 |
DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) |
995 |
{ |
996 |
// we assume that any collapsing has been done before we get here |
997 |
// since we only have one argument we don't need to think about only |
998 |
// processing single points. |
999 |
// we will also know we won't get identity nodes |
1000 |
if (m_readytype!='E') |
1001 |
{ |
1002 |
throw DataException("Programmer error - resolveUnary should only be called on expanded Data."); |
1003 |
} |
1004 |
if (m_op==IDENTITY) |
1005 |
{ |
1006 |
throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes."); |
1007 |
} |
1008 |
const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset); |
1009 |
const double* left=&((*leftres)[roffset]); |
1010 |
roffset=m_samplesize*tid; |
1011 |
double* result=&(m_samples[roffset]); |
1012 |
switch (m_op) |
1013 |
{ |
1014 |
case SIN: |
1015 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin); |
1016 |
break; |
1017 |
case COS: |
1018 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos); |
1019 |
break; |
1020 |
case TAN: |
1021 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan); |
1022 |
break; |
1023 |
case ASIN: |
1024 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin); |
1025 |
break; |
1026 |
case ACOS: |
1027 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos); |
1028 |
break; |
1029 |
case ATAN: |
1030 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan); |
1031 |
break; |
1032 |
case SINH: |
1033 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh); |
1034 |
break; |
1035 |
case COSH: |
1036 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh); |
1037 |
break; |
1038 |
case TANH: |
1039 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh); |
1040 |
break; |
1041 |
case ERF: |
1042 |
#if defined (_WIN32) && !defined(__INTEL_COMPILER) |
1043 |
throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms."); |
1044 |
#else |
1045 |
tensor_unary_operation(m_samplesize, left, result, ::erf); |
1046 |
break; |
1047 |
#endif |
1048 |
case ASINH: |
1049 |
#if defined (_WIN32) && !defined(__INTEL_COMPILER) |
1050 |
tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute); |
1051 |
#else |
1052 |
tensor_unary_operation(m_samplesize, left, result, ::asinh); |
1053 |
#endif |
1054 |
break; |
1055 |
case ACOSH: |
1056 |
#if defined (_WIN32) && !defined(__INTEL_COMPILER) |
1057 |
tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute); |
1058 |
#else |
1059 |
tensor_unary_operation(m_samplesize, left, result, ::acosh); |
1060 |
#endif |
1061 |
break; |
1062 |
case ATANH: |
1063 |
#if defined (_WIN32) && !defined(__INTEL_COMPILER) |
1064 |
tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute); |
1065 |
#else |
1066 |
tensor_unary_operation(m_samplesize, left, result, ::atanh); |
1067 |
#endif |
1068 |
break; |
1069 |
case LOG10: |
1070 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10); |
1071 |
break; |
1072 |
case LOG: |
1073 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log); |
1074 |
break; |
1075 |
case SIGN: |
1076 |
tensor_unary_operation(m_samplesize, left, result, escript::fsign); |
1077 |
break; |
1078 |
case ABS: |
1079 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs); |
1080 |
break; |
1081 |
case NEG: |
1082 |
tensor_unary_operation(m_samplesize, left, result, negate<double>()); |
1083 |
break; |
1084 |
case POS: |
1085 |
// it doesn't mean anything for delayed. |
1086 |
// it will just trigger a deep copy of the lazy object |
1087 |
throw DataException("Programmer error - POS not supported for lazy data."); |
1088 |
break; |
1089 |
case EXP: |
1090 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp); |
1091 |
break; |
1092 |
case SQRT: |
1093 |
tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt); |
1094 |
break; |
1095 |
case RECIP: |
1096 |
tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.)); |
1097 |
break; |
1098 |
case GZ: |
1099 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0)); |
1100 |
break; |
1101 |
case LZ: |
1102 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0)); |
1103 |
break; |
1104 |
case GEZ: |
1105 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0)); |
1106 |
break; |
1107 |
case LEZ: |
1108 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0)); |
1109 |
break; |
1110 |
// There are actually G_UNARY_P but I don't see a compelling reason to treat them differently |
1111 |
case NEZ: |
1112 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol)); |
1113 |
break; |
1114 |
case EZ: |
1115 |
tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol)); |
1116 |
break; |
1117 |
|
1118 |
default: |
1119 |
throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+"."); |
1120 |
} |
1121 |
return &(m_samples); |
1122 |
} |
1123 |
|
1124 |
|
1125 |
const DataTypes::ValueType* |
1126 |
DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) |
1127 |
{ |
1128 |
// we assume that any collapsing has been done before we get here |
1129 |
// since we only have one argument we don't need to think about only |
1130 |
// processing single points. |
1131 |
// we will also know we won't get identity nodes |
1132 |
if (m_readytype!='E') |
1133 |
{ |
1134 |
throw DataException("Programmer error - resolveUnary should only be called on expanded Data."); |
1135 |
} |
1136 |
if (m_op==IDENTITY) |
1137 |
{ |
1138 |
throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes."); |
1139 |
} |
1140 |
size_t loffset=0; |
1141 |
const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset); |
1142 |
|
1143 |
roffset=m_samplesize*tid; |
1144 |
unsigned int ndpps=getNumDPPSample(); |
1145 |
unsigned int psize=DataTypes::noValues(getShape()); |
1146 |
double* result=&(m_samples[roffset]); |
1147 |
switch (m_op) |
1148 |
{ |
1149 |
case MINVAL: |
1150 |
{ |
1151 |
for (unsigned int z=0;z<ndpps;++z) |
1152 |
{ |
1153 |
FMin op; |
1154 |
*result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()); |
1155 |
loffset+=psize; |
1156 |
result++; |
1157 |
} |
1158 |
} |
1159 |
break; |
1160 |
case MAXVAL: |
1161 |
{ |
1162 |
for (unsigned int z=0;z<ndpps;++z) |
1163 |
{ |
1164 |
FMax op; |
1165 |
*result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1); |
1166 |
loffset+=psize; |
1167 |
result++; |
1168 |
} |
1169 |
} |
1170 |
break; |
1171 |
default: |
1172 |
throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+"."); |
1173 |
} |
1174 |
return &(m_samples); |
1175 |
} |
1176 |
|
1177 |
const DataTypes::ValueType* |
1178 |
DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) |
1179 |
{ |
1180 |
// we assume that any collapsing has been done before we get here |
1181 |
// since we only have one argument we don't need to think about only |
1182 |
// processing single points. |
1183 |
if (m_readytype!='E') |
1184 |
{ |
1185 |
throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data."); |
1186 |
} |
1187 |
if (m_op==IDENTITY) |
1188 |
{ |
1189 |
throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes."); |
1190 |
} |
1191 |
size_t subroffset; |
1192 |
const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset); |
1193 |
roffset=m_samplesize*tid; |
1194 |
size_t loop=0; |
1195 |
size_t numsteps=(m_readytype=='E')?getNumDPPSample():1; |
1196 |
size_t step=getNoValues(); |
1197 |
size_t offset=roffset; |
1198 |
switch (m_op) |
1199 |
{ |
1200 |
case SYM: |
1201 |
for (loop=0;loop<numsteps;++loop) |
1202 |
{ |
1203 |
DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset); |
1204 |
subroffset+=step; |
1205 |
offset+=step; |
1206 |
} |
1207 |
break; |
1208 |
case NSYM: |
1209 |
for (loop=0;loop<numsteps;++loop) |
1210 |
{ |
1211 |
DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset); |
1212 |
subroffset+=step; |
1213 |
offset+=step; |
1214 |
} |
1215 |
break; |
1216 |
default: |
1217 |
throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+"."); |
1218 |
} |
1219 |
return &m_samples; |
1220 |
} |
1221 |
|
1222 |
const DataTypes::ValueType* |
1223 |
DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) |
1224 |
{ |
1225 |
// we assume that any collapsing has been done before we get here |
1226 |
// since we only have one argument we don't need to think about only |
1227 |
// processing single points. |
1228 |
if (m_readytype!='E') |
1229 |
{ |
1230 |
throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data."); |
1231 |
} |
1232 |
if (m_op==IDENTITY) |
1233 |
{ |
1234 |
throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes."); |
1235 |
} |
1236 |
size_t subroffset; |
1237 |
size_t offset; |
1238 |
const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset); |
1239 |
roffset=m_samplesize*tid; |
1240 |
offset=roffset; |
1241 |
size_t loop=0; |
1242 |
size_t numsteps=(m_readytype=='E')?getNumDPPSample():1; |
1243 |
size_t outstep=getNoValues(); |
1244 |
size_t instep=m_left->getNoValues(); |
1245 |
switch (m_op) |
1246 |
{ |
1247 |
case TRACE: |
1248 |
for (loop=0;loop<numsteps;++loop) |
1249 |
{ |
1250 |
DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset); |
1251 |
subroffset+=instep; |
1252 |
offset+=outstep; |
1253 |
} |
1254 |
break; |
1255 |
case TRANS: |
1256 |
for (loop=0;loop<numsteps;++loop) |
1257 |
{ |
1258 |
DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset); |
1259 |
subroffset+=instep; |
1260 |
offset+=outstep; |
1261 |
} |
1262 |
break; |
1263 |
default: |
1264 |
throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+"."); |
1265 |
} |
1266 |
return &m_samples; |
1267 |
} |
1268 |
|
1269 |
|
1270 |
const DataTypes::ValueType* |
1271 |
DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) |
1272 |
{ |
1273 |
if (m_readytype!='E') |
1274 |
{ |
1275 |
throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data."); |
1276 |
} |
1277 |
if (m_op==IDENTITY) |
1278 |
{ |
1279 |
throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes."); |
1280 |
} |
1281 |
size_t subroffset; |
1282 |
size_t offset; |
1283 |
const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset); |
1284 |
roffset=m_samplesize*tid; |
1285 |
offset=roffset; |
1286 |
size_t loop=0; |
1287 |
size_t numsteps=(m_readytype=='E')?getNumDPPSample():1; |
1288 |
size_t outstep=getNoValues(); |
1289 |
size_t instep=m_left->getNoValues(); |
1290 |
switch (m_op) |
1291 |
{ |
1292 |
case SWAP: |
1293 |
for (loop=0;loop<numsteps;++loop) |
1294 |
{ |
1295 |
DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose); |
1296 |
subroffset+=instep; |
1297 |
offset+=outstep; |
1298 |
} |
1299 |
break; |
1300 |
default: |
1301 |
throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+"."); |
1302 |
} |
1303 |
return &m_samples; |
1304 |
} |
1305 |
|
1306 |
|
1307 |
|
1308 |
// This method assumes that any subexpressions which evaluate to Constant or Tagged Data |
1309 |
// have already been collapsed to IDENTITY. So we must have at least one expanded child. |
1310 |
// If both children are expanded, then we can process them in a single operation (we treat |
1311 |
// the whole sample as one big datapoint. |
1312 |
// If one of the children is not expanded, then we need to treat each point in the sample |
1313 |
// individually. |
1314 |
// There is an additional complication when scalar operations are considered. |
1315 |
// For example, 2+Vector. |
1316 |
// In this case each double within the point is treated individually |
1317 |
const DataTypes::ValueType* |
1318 |
DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) |
1319 |
{ |
1320 |
LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;) |
1321 |
|
1322 |
size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors |
1323 |
// first work out which of the children are expanded |
1324 |
bool leftExp=(m_left->m_readytype=='E'); |
1325 |
bool rightExp=(m_right->m_readytype=='E'); |
1326 |
if (!leftExp && !rightExp) |
1327 |
{ |
1328 |
throw DataException("Programmer Error - please use collapse if neither argument has type 'E'."); |
1329 |
} |
1330 |
bool leftScalar=(m_left->getRank()==0); |
1331 |
bool rightScalar=(m_right->getRank()==0); |
1332 |
if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar)) |
1333 |
{ |
1334 |
throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar."); |
1335 |
} |
1336 |
size_t leftsize=m_left->getNoValues(); |
1337 |
size_t rightsize=m_right->getNoValues(); |
1338 |
size_t chunksize=1; // how many doubles will be processed in one go |
1339 |
int leftstep=0; // how far should the left offset advance after each step |
1340 |
int rightstep=0; |
1341 |
int numsteps=0; // total number of steps for the inner loop |
1342 |
int oleftstep=0; // the o variables refer to the outer loop |
1343 |
int orightstep=0; // The outer loop is only required in cases where there is an extended scalar |
1344 |
int onumsteps=1; |
1345 |
|
1346 |
bool LES=(leftExp && leftScalar); // Left is an expanded scalar |
1347 |
bool RES=(rightExp && rightScalar); |
1348 |
bool LS=(!leftExp && leftScalar); // left is a single scalar |
1349 |
bool RS=(!rightExp && rightScalar); |
1350 |
bool LN=(!leftExp && !leftScalar); // left is a single non-scalar |
1351 |
bool RN=(!rightExp && !rightScalar); |
1352 |
bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar |
1353 |
bool REN=(rightExp && !rightScalar); |
1354 |
|
1355 |
if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars |
1356 |
{ |
1357 |
chunksize=m_left->getNumDPPSample()*leftsize; |
1358 |
leftstep=0; |
1359 |
rightstep=0; |
1360 |
numsteps=1; |
1361 |
} |
1362 |
else if (LES || RES) |
1363 |
{ |
1364 |
chunksize=1; |
1365 |
if (LES) // left is an expanded scalar |
1366 |
{ |
1367 |
if (RS) |
1368 |
{ |
1369 |
leftstep=1; |
1370 |
rightstep=0; |
1371 |
numsteps=m_left->getNumDPPSample(); |
1372 |
} |
1373 |
else // RN or REN |
1374 |
{ |
1375 |
leftstep=0; |
1376 |
oleftstep=1; |
1377 |
rightstep=1; |
1378 |
orightstep=(RN ? -(int)rightsize : 0); |
1379 |
numsteps=rightsize; |
1380 |
onumsteps=m_left->getNumDPPSample(); |
1381 |
} |
1382 |
} |
1383 |
else // right is an expanded scalar |
1384 |
{ |
1385 |
if (LS) |
1386 |
{ |
1387 |
rightstep=1; |
1388 |
leftstep=0; |
1389 |
numsteps=m_right->getNumDPPSample(); |
1390 |
} |
1391 |
else |
1392 |
{ |
1393 |
rightstep=0; |
1394 |
orightstep=1; |
1395 |
leftstep=1; |
1396 |
oleftstep=(LN ? -(int)leftsize : 0); |
1397 |
numsteps=leftsize; |
1398 |
onumsteps=m_right->getNumDPPSample(); |
1399 |
} |
1400 |
} |
1401 |
} |
1402 |
else // this leaves (LEN, RS), (LEN, RN) and their transposes |
1403 |
{ |
1404 |
if (LEN) // and Right will be a single value |
1405 |
{ |
1406 |
chunksize=rightsize; |
1407 |
leftstep=rightsize; |
1408 |
rightstep=0; |
1409 |
numsteps=m_left->getNumDPPSample(); |
1410 |
if (RS) |
1411 |
{ |
1412 |
numsteps*=leftsize; |
1413 |
} |
1414 |
} |
1415 |
else // REN |
1416 |
{ |
1417 |
chunksize=leftsize; |
1418 |
rightstep=leftsize; |
1419 |
leftstep=0; |
1420 |
numsteps=m_right->getNumDPPSample(); |
1421 |
if (LS) |
1422 |
{ |
1423 |
numsteps*=rightsize; |
1424 |
} |
1425 |
} |
1426 |
} |
1427 |
|
1428 |
int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0 |
1429 |
// Get the values of sub-expressions |
1430 |
const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset); |
1431 |
const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset); |
1432 |
LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;) |
1433 |
LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;) |
1434 |
LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;) |
1435 |
LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;) |
1436 |
LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;) |
1437 |
LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;) |
1438 |
LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;) |
1439 |
|
1440 |
LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;) |
1441 |
LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;) |
1442 |
|
1443 |
|
1444 |
roffset=m_samplesize*tid; |
1445 |
double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved |
1446 |
switch(m_op) |
1447 |
{ |
1448 |
case ADD: |
1449 |
PROC_OP(NO_ARG,plus<double>()); |
1450 |
break; |
1451 |
case SUB: |
1452 |
PROC_OP(NO_ARG,minus<double>()); |
1453 |
break; |
1454 |
case MUL: |
1455 |
PROC_OP(NO_ARG,multiplies<double>()); |
1456 |
break; |
1457 |
case DIV: |
1458 |
PROC_OP(NO_ARG,divides<double>()); |
1459 |
break; |
1460 |
case POW: |
1461 |
PROC_OP(double (double,double),::pow); |
1462 |
break; |
1463 |
default: |
1464 |
throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+"."); |
1465 |
} |
1466 |
LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;) |
1467 |
return &m_samples; |
1468 |
} |
1469 |
|
1470 |
|
1471 |
// This method assumes that any subexpressions which evaluate to Constant or Tagged Data |
1472 |
// have already been collapsed to IDENTITY. So we must have at least one expanded child. |
1473 |
// unlike the other resolve helpers, we must treat these datapoints separately. |
1474 |
const DataTypes::ValueType* |
1475 |
DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) |
1476 |
{ |
1477 |
LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;) |
1478 |
|
1479 |
size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors |
1480 |
// first work out which of the children are expanded |
1481 |
bool leftExp=(m_left->m_readytype=='E'); |
1482 |
bool rightExp=(m_right->m_readytype=='E'); |
1483 |
int steps=getNumDPPSample(); |
1484 |
int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method |
1485 |
int rightStep=(rightExp?m_right->getNoValues() : 0); |
1486 |
|
1487 |
int resultStep=getNoValues(); |
1488 |
roffset=m_samplesize*tid; |
1489 |
size_t offset=roffset; |
1490 |
|
1491 |
const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset); |
1492 |
|
1493 |
const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset); |
1494 |
|
1495 |
LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl; |
1496 |
cout << getNoValues() << endl;) |
1497 |
|
1498 |
|
1499 |
LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;) |
1500 |
LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;) |
1501 |
LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;) |
1502 |
LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;) |
1503 |
LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;) |
1504 |
LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;) |
1505 |
LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;) |
1506 |
|
1507 |
double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved |
1508 |
switch(m_op) |
1509 |
{ |
1510 |
case PROD: |
1511 |
for (int i=0;i<steps;++i,resultp+=resultStep) |
1512 |
{ |
1513 |
const double *ptr_0 = &((*left)[lroffset]); |
1514 |
const double *ptr_1 = &((*right)[rroffset]); |
1515 |
|
1516 |
LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;) |
1517 |
LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;) |
1518 |
|
1519 |
matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose); |
1520 |
|
1521 |
lroffset+=leftStep; |
1522 |
rroffset+=rightStep; |
1523 |
} |
1524 |
break; |
1525 |
default: |
1526 |
throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+"."); |
1527 |
} |
1528 |
roffset=offset; |
1529 |
return &m_samples; |
1530 |
} |
1531 |
|
1532 |
|
1533 |
const DataTypes::ValueType* |
1534 |
DataLazy::resolveSample(int sampleNo, size_t& roffset) |
1535 |
{ |
1536 |
#ifdef _OPENMP |
1537 |
int tid=omp_get_thread_num(); |
1538 |
#else |
1539 |
int tid=0; |
1540 |
#endif |
1541 |
|
1542 |
#ifdef LAZY_STACK_PROF |
1543 |
stackstart[tid]=&tid; |
1544 |
stackend[tid]=&tid; |
1545 |
const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset); |
1546 |
size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid]; |
1547 |
#pragma omp critical |
1548 |
if (d>maxstackuse) |
1549 |
{ |
1550 |
cout << "Max resolve Stack use " << d << endl; |
1551 |
maxstackuse=d; |
1552 |
} |
1553 |
return r; |
1554 |
#else |
1555 |
return resolveNodeSample(tid, sampleNo, roffset); |
1556 |
#endif |
1557 |
} |
1558 |
|
1559 |
|
1560 |
// This needs to do the work of the identity constructor |
1561 |
void |
1562 |
DataLazy::resolveToIdentity() |
1563 |
{ |
1564 |
if (m_op==IDENTITY) |
1565 |
return; |
1566 |
DataReady_ptr p=resolveNodeWorker(); |
1567 |
makeIdentity(p); |
1568 |
} |
1569 |
|
1570 |
void DataLazy::makeIdentity(const DataReady_ptr& p) |
1571 |
{ |
1572 |
m_op=IDENTITY; |
1573 |
m_axis_offset=0; |
1574 |
m_transpose=0; |
1575 |
m_SL=m_SM=m_SR=0; |
1576 |
m_children=m_height=0; |
1577 |
m_id=p; |
1578 |
if(p->isConstant()) {m_readytype='C';} |
1579 |
else if(p->isExpanded()) {m_readytype='E';} |
1580 |
else if (p->isTagged()) {m_readytype='T';} |
1581 |
else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");} |
1582 |
m_samplesize=p->getNumDPPSample()*p->getNoValues(); |
1583 |
m_left.reset(); |
1584 |
m_right.reset(); |
1585 |
} |
1586 |
|
1587 |
|
1588 |
DataReady_ptr |
1589 |
DataLazy::resolve() |
1590 |
{ |
1591 |
resolveToIdentity(); |
1592 |
return m_id; |
1593 |
} |
1594 |
|
1595 |
// This version of resolve uses storage in each node to hold results |
1596 |
DataReady_ptr |
1597 |
DataLazy::resolveNodeWorker() |
1598 |
{ |
1599 |
if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally |
1600 |
{ |
1601 |
collapse(); |
1602 |
} |
1603 |
if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here. |
1604 |
{ |
1605 |
return m_id; |
1606 |
} |
1607 |
// from this point on we must have m_op!=IDENTITY and m_readytype=='E' |
1608 |
DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues())); |
1609 |
ValueType& resvec=result->getVectorRW(); |
1610 |
DataReady_ptr resptr=DataReady_ptr(result); |
1611 |
|
1612 |
int sample; |
1613 |
int totalsamples=getNumSamples(); |
1614 |
const ValueType* res=0; // Storage for answer |
1615 |
LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;) |
1616 |
#pragma omp parallel private(sample,res) |
1617 |
{ |
1618 |
size_t roffset=0; |
1619 |
#ifdef LAZY_STACK_PROF |
1620 |
stackstart[omp_get_thread_num()]=&roffset; |
1621 |
stackend[omp_get_thread_num()]=&roffset; |
1622 |
#endif |
1623 |
#pragma omp for schedule(static) |
1624 |
for (sample=0;sample<totalsamples;++sample) |
1625 |
{ |
1626 |
roffset=0; |
1627 |
#ifdef _OPENMP |
1628 |
res=resolveNodeSample(omp_get_thread_num(),sample,roffset); |
1629 |
#else |
1630 |
res=resolveNodeSample(0,sample,roffset); |
1631 |
#endif |
1632 |
LAZYDEBUG(cout << "Sample #" << sample << endl;) |
1633 |
LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; ) |
1634 |
DataVector::size_type outoffset=result->getPointOffset(sample,0); |
1635 |
memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType)); |
1636 |
} |
1637 |
} |
1638 |
#ifdef LAZY_STACK_PROF |
1639 |
for (int i=0;i<getNumberOfThreads();++i) |
1640 |
{ |
1641 |
size_t r=((size_t)stackstart[i] - (size_t)stackend[i]); |
1642 |
// cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl; |
1643 |
if (r>maxstackuse) |
1644 |
{ |
1645 |
maxstackuse=r; |
1646 |
} |
1647 |
} |
1648 |
cout << "Max resolve Stack use=" << maxstackuse << endl; |
1649 |
#endif |
1650 |
return resptr; |
1651 |
} |
1652 |
|
1653 |
std::string |
1654 |
DataLazy::toString() const |
1655 |
{ |
1656 |
ostringstream oss; |
1657 |
oss << "Lazy Data:"; |
1658 |
if (escriptParams.getPRINT_LAZY_TREE()==0) |
1659 |
{ |
1660 |
intoString(oss); |
1661 |
} |
1662 |
else |
1663 |
{ |
1664 |
oss << endl; |
1665 |
intoTreeString(oss,""); |
1666 |
} |
1667 |
return oss.str(); |
1668 |
} |
1669 |
|
1670 |
|
1671 |
void |
1672 |
DataLazy::intoString(ostringstream& oss) const |
1673 |
{ |
1674 |
// oss << "[" << m_children <<";"<<m_height <<"]"; |
1675 |
switch (getOpgroup(m_op)) |
1676 |
{ |
1677 |
case G_IDENTITY: |
1678 |
if (m_id->isExpanded()) |
1679 |
{ |
1680 |
oss << "E"; |
1681 |
} |
1682 |
else if (m_id->isTagged()) |
1683 |
{ |
1684 |
oss << "T"; |
1685 |
} |
1686 |
else if (m_id->isConstant()) |
1687 |
{ |
1688 |
oss << "C"; |
1689 |
} |
1690 |
else |
1691 |
{ |
1692 |
oss << "?"; |
1693 |
} |
1694 |
oss << '@' << m_id.get(); |
1695 |
break; |
1696 |
case G_BINARY: |
1697 |
oss << '('; |
1698 |
m_left->intoString(oss); |
1699 |
oss << ' ' << opToString(m_op) << ' '; |
1700 |
m_right->intoString(oss); |
1701 |
oss << ')'; |
1702 |
break; |
1703 |
case G_UNARY: |
1704 |
case G_UNARY_P: |
1705 |
case G_NP1OUT: |
1706 |
case G_NP1OUT_P: |
1707 |
case G_REDUCTION: |
1708 |
oss << opToString(m_op) << '('; |
1709 |
m_left->intoString(oss); |
1710 |
oss << ')'; |
1711 |
break; |
1712 |
case G_TENSORPROD: |
1713 |
oss << opToString(m_op) << '('; |
1714 |
m_left->intoString(oss); |
1715 |
oss << ", "; |
1716 |
m_right->intoString(oss); |
1717 |
oss << ')'; |
1718 |
break; |
1719 |
case G_NP1OUT_2P: |
1720 |
oss << opToString(m_op) << '('; |
1721 |
m_left->intoString(oss); |
1722 |
oss << ", " << m_axis_offset << ", " << m_transpose; |
1723 |
oss << ')'; |
1724 |
break; |
1725 |
default: |
1726 |
oss << "UNKNOWN"; |
1727 |
} |
1728 |
} |
1729 |
|
1730 |
|
1731 |
void |
1732 |
DataLazy::intoTreeString(ostringstream& oss, string indent) const |
1733 |
{ |
1734 |
oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent; |
1735 |
switch (getOpgroup(m_op)) |
1736 |
{ |
1737 |
case G_IDENTITY: |
1738 |
if (m_id->isExpanded()) |
1739 |
{ |
1740 |
oss << "E"; |
1741 |
} |
1742 |
else if (m_id->isTagged()) |
1743 |
{ |
1744 |
oss << "T"; |
1745 |
} |
1746 |
else if (m_id->isConstant()) |
1747 |
{ |
1748 |
oss << "C"; |
1749 |
} |
1750 |
else |
1751 |
{ |
1752 |
oss << "?"; |
1753 |
} |
1754 |
oss << '@' << m_id.get() << endl; |
1755 |
break; |
1756 |
case G_BINARY: |
1757 |
oss << opToString(m_op) << endl; |
1758 |
indent+='.'; |
1759 |
m_left->intoTreeString(oss, indent); |
1760 |
m_right->intoTreeString(oss, indent); |
1761 |
break; |
1762 |
case G_UNARY: |
1763 |
case G_UNARY_P: |
1764 |
case G_NP1OUT: |
1765 |
case G_NP1OUT_P: |
1766 |
case G_REDUCTION: |
1767 |
oss << opToString(m_op) << endl; |
1768 |
indent+='.'; |
1769 |
m_left->intoTreeString(oss, indent); |
1770 |
break; |
1771 |
case G_TENSORPROD: |
1772 |
oss << opToString(m_op) << endl; |
1773 |
indent+='.'; |
1774 |
m_left->intoTreeString(oss, indent); |
1775 |
m_right->intoTreeString(oss, indent); |
1776 |
break; |
1777 |
case G_NP1OUT_2P: |
1778 |
oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl; |
1779 |
indent+='.'; |
1780 |
m_left->intoTreeString(oss, indent); |
1781 |
break; |
1782 |
default: |
1783 |
oss << "UNKNOWN"; |
1784 |
} |
1785 |
} |
1786 |
|
1787 |
|
1788 |
DataAbstract* |
1789 |
DataLazy::deepCopy() |
1790 |
{ |
1791 |
switch (getOpgroup(m_op)) |
1792 |
{ |
1793 |
case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr()); |
1794 |
case G_UNARY: |
1795 |
case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op); |
1796 |
case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol); |
1797 |
case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op); |
1798 |
case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op); |
1799 |
case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose); |
1800 |
case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset); |
1801 |
case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose); |
1802 |
default: |
1803 |
throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+"."); |
1804 |
} |
1805 |
} |
1806 |
|
1807 |
|
1808 |
|
1809 |
// There is no single, natural interpretation of getLength on DataLazy. |
1810 |
// Instances of DataReady can look at the size of their vectors. |
1811 |
// For lazy though, it could be the size the data would be if it were resolved; |
1812 |
// or it could be some function of the lengths of the DataReady instances which |
1813 |
// form part of the expression. |
1814 |
// Rather than have people making assumptions, I have disabled the method. |
1815 |
DataTypes::ValueType::size_type |
1816 |
DataLazy::getLength() const |
1817 |
{ |
1818 |
throw DataException("getLength() does not make sense for lazy data."); |
1819 |
} |
1820 |
|
1821 |
|
1822 |
DataAbstract* |
1823 |
DataLazy::getSlice(const DataTypes::RegionType& region) const |
1824 |
{ |
1825 |
throw DataException("getSlice - not implemented for Lazy objects."); |
1826 |
} |
1827 |
|
1828 |
|
1829 |
// To do this we need to rely on our child nodes |
1830 |
DataTypes::ValueType::size_type |
1831 |
DataLazy::getPointOffset(int sampleNo, |
1832 |
int dataPointNo) |
1833 |
{ |
1834 |
if (m_op==IDENTITY) |
1835 |
{ |
1836 |
return m_id->getPointOffset(sampleNo,dataPointNo); |
1837 |
} |
1838 |
if (m_readytype!='E') |
1839 |
{ |
1840 |
collapse(); |
1841 |
return m_id->getPointOffset(sampleNo,dataPointNo); |
1842 |
} |
1843 |
// at this point we do not have an identity node and the expression will be Expanded |
1844 |
// so we only need to know which child to ask |
1845 |
if (m_left->m_readytype=='E') |
1846 |
{ |
1847 |
return m_left->getPointOffset(sampleNo,dataPointNo); |
1848 |
} |
1849 |
else |
1850 |
{ |
1851 |
return m_right->getPointOffset(sampleNo,dataPointNo); |
1852 |
} |
1853 |
} |
1854 |
|
1855 |
// To do this we need to rely on our child nodes |
1856 |
DataTypes::ValueType::size_type |
1857 |
DataLazy::getPointOffset(int sampleNo, |
1858 |
int dataPointNo) const |
1859 |
{ |
1860 |
if (m_op==IDENTITY) |
1861 |
{ |
1862 |
return m_id->getPointOffset(sampleNo,dataPointNo); |
1863 |
} |
1864 |
if (m_readytype=='E') |
1865 |
{ |
1866 |
// at this point we do not have an identity node and the expression will be Expanded |
1867 |
// so we only need to know which child to ask |
1868 |
if (m_left->m_readytype=='E') |
1869 |
{ |
1870 |
return m_left->getPointOffset(sampleNo,dataPointNo); |
1871 |
} |
1872 |
else |
1873 |
{ |
1874 |
return m_right->getPointOffset(sampleNo,dataPointNo); |
1875 |
} |
1876 |
} |
1877 |
if (m_readytype=='C') |
1878 |
{ |
1879 |
return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter |
1880 |
} |
1881 |
throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const)."); |
1882 |
} |
1883 |
|
1884 |
|
1885 |
// I have decided to let Data:: handle this issue. |
1886 |
void |
1887 |
DataLazy::setToZero() |
1888 |
{ |
1889 |
// DataTypes::ValueType v(getNoValues(),0); |
1890 |
// m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v)); |
1891 |
// m_op=IDENTITY; |
1892 |
// m_right.reset(); |
1893 |
// m_left.reset(); |
1894 |
// m_readytype='C'; |
1895 |
// m_buffsRequired=1; |
1896 |
|
1897 |
privdebug=privdebug; // to stop the compiler complaining about unused privdebug |
1898 |
throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only)."); |
1899 |
} |
1900 |
|
1901 |
bool |
1902 |
DataLazy::actsExpanded() const |
1903 |
{ |
1904 |
return (m_readytype=='E'); |
1905 |
} |
1906 |
|
1907 |
} // end namespace |