/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6001 - (hide annotations)
Tue Mar 1 05:01:49 2016 UTC (3 years ago) by caltinay
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 67756 byte(s)
Bye bye esysUtils.
Also removed first.h as escript/DataTypes.h is now required everywhere
and fulfills that role by including a boost python header first.

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "UnaryFuncs.h" // for escript::fsign
23     #include "Utils.h"
24    
25 caltinay 3317 #ifdef USE_NETCDF
26     #include <netcdfcpp.h>
27     #endif
28    
29 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
30 jfenwick 2199
31 jfenwick 5938 using namespace escript::DataTypes;
32    
33 caltinay 5972 #define NO_ARG
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48 jfenwick 2472
49 jfenwick 2501
50 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52 jfenwick 1899 /*
53     How does DataLazy work?
54     ~~~~~~~~~~~~~~~~~~~~~~~
55    
56     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57     denoted left and right.
58    
59     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60     This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63 caltinay 4286 Note that IDENTITY is not considered a unary operation.
64 jfenwick 1899
65     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66     It must however form a DAG (directed acyclic graph).
67     I will refer to individual DataLazy objects with the structure as nodes.
68    
69     Each node also stores:
70     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71 caltinay 5972 evaluated.
72 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73 caltinay 5972 evaluate the expression.
74 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
75    
76     When a new node is created, the above values are computed based on the values in the child nodes.
77     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79     The resolve method, which produces a DataReady from a DataLazy, does the following:
80     1) Create a DataReady to hold the new result.
81     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86     resolveSample returns a Vector* and an offset within that vector where the result is stored.
87     Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93 jfenwick 2037
94 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 jfenwick 2037 1) Add to the ES_optype.
96     2) determine what opgroup your operation belongs to (X)
97     3) add a string for the op to the end of ES_opstrings
98     4) increase ES_opcount
99     5) add an entry (X) to opgroups
100     6) add an entry to the switch in collapseToReady
101     7) add an entry to resolveX
102 jfenwick 1899 */
103    
104    
105 jfenwick 1865 using namespace std;
106 jfenwick 1868 using namespace boost;
107 jfenwick 1865
108     namespace escript
109     {
110    
111     namespace
112     {
113    
114 jfenwick 2777
115     // enabling this will print out when ever the maximum stacksize used by resolve increases
116     // it assumes _OPENMP is also in use
117     //#define LAZY_STACK_PROF
118    
119    
120    
121     #ifndef _OPENMP
122     #ifdef LAZY_STACK_PROF
123     #undef LAZY_STACK_PROF
124     #endif
125     #endif
126    
127    
128     #ifdef LAZY_STACK_PROF
129     std::vector<void*> stackstart(getNumberOfThreads());
130     std::vector<void*> stackend(getNumberOfThreads());
131     size_t maxstackuse=0;
132     #endif
133    
134 jfenwick 1886 enum ES_opgroup
135     {
136     G_UNKNOWN,
137     G_IDENTITY,
138 caltinay 5972 G_BINARY, // pointwise operations with two arguments
139     G_UNARY, // pointwise operations with one argument
140     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT, // non-pointwise op with one output
142     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD, // general tensor product
144     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145     G_REDUCTION, // non-pointwise unary op with a scalar output
146 jfenwick 3035 G_CONDEVAL
147 jfenwick 1886 };
148    
149    
150    
151    
152 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153 caltinay 5972 "sin","cos","tan",
154     "asin","acos","atan","sinh","cosh","tanh","erf",
155     "asinh","acosh","atanh",
156     "log10","log","sign","abs","neg","pos","exp","sqrt",
157     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158     "symmetric","nonsymmetric",
159     "prod",
160     "transpose", "trace",
161     "swapaxes",
162     "minval", "maxval",
163     "condEval"};
164 jfenwick 3035 int ES_opcount=44;
165 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
167     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
168     G_UNARY,G_UNARY,G_UNARY, // 20
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
171     G_NP1OUT,G_NP1OUT,
172     G_TENSORPROD,
173     G_NP1OUT_P, G_NP1OUT_P,
174     G_NP1OUT_2P,
175     G_REDUCTION, G_REDUCTION,
176     G_CONDEVAL};
177 jfenwick 1886 inline
178     ES_opgroup
179     getOpgroup(ES_optype op)
180     {
181     return opgroups[op];
182     }
183    
184 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
185     FunctionSpace
186     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 caltinay 5972 // perhaps this should call interpolate and throw or something?
189     // maybe we need an interpolate node -
190     // that way, if interpolate is required in any other op we can just throw a
191     // programming error exception.
192 jfenwick 1879
193 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
194     FunctionSpace r=right->getFunctionSpace();
195     if (l!=r)
196     {
197 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198     if (res==1)
199 jfenwick 1943 {
200 caltinay 5972 return l;
201 jfenwick 1943 }
202 jfenwick 4264 if (res==-1)
203 jfenwick 1943 {
204 caltinay 5972 return r;
205 jfenwick 1943 }
206     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207     }
208     return l;
209 jfenwick 1865 }
210    
211     // return the shape of the result of "left op right"
212 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213 jfenwick 1865 DataTypes::ShapeType
214     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215     {
216 caltinay 5972 if (left->getShape()!=right->getShape())
217     {
218     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219     {
220     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221     }
222 jfenwick 2721
223 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
224     {
225     return right->getShape();
226     }
227     if (right->getRank()==0)
228     {
229     return left->getShape();
230     }
231     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232     }
233     return left->getShape();
234 jfenwick 1865 }
235    
236 jfenwick 2084 // return the shape for "op left"
237    
238     DataTypes::ShapeType
239 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240 jfenwick 2084 {
241 caltinay 5972 switch(op)
242     {
243     case TRANS:
244     { // for the scoping of variables
245     const DataTypes::ShapeType& s=left->getShape();
246     DataTypes::ShapeType sh;
247     int rank=left->getRank();
248     if (axis_offset<0 || axis_offset>rank)
249     {
250 caltinay 2779 stringstream e;
251     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252     throw DataException(e.str());
253 caltinay 5972 }
254     for (int i=0; i<rank; i++)
255     {
256     int index = (axis_offset+i)%rank;
257     sh.push_back(s[index]); // Append to new shape
258     }
259     return sh;
260     }
261     break;
262     case TRACE:
263     {
264     int rank=left->getRank();
265     if (rank<2)
266     {
267     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268     }
269     if ((axis_offset>rank-2) || (axis_offset<0))
270     {
271     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272     }
273     if (rank==2)
274     {
275     return DataTypes::scalarShape;
276     }
277     else if (rank==3)
278     {
279     DataTypes::ShapeType sh;
280     if (axis_offset==0)
281     {
282     sh.push_back(left->getShape()[2]);
283     }
284     else // offset==1
285     {
286     sh.push_back(left->getShape()[0]);
287     }
288     return sh;
289     }
290     else if (rank==4)
291     {
292     DataTypes::ShapeType sh;
293     const DataTypes::ShapeType& s=left->getShape();
294     if (axis_offset==0)
295     {
296     sh.push_back(s[2]);
297     sh.push_back(s[3]);
298     }
299     else if (axis_offset==1)
300     {
301     sh.push_back(s[0]);
302     sh.push_back(s[3]);
303     }
304     else // offset==2
305     {
306     sh.push_back(s[0]);
307     sh.push_back(s[1]);
308     }
309     return sh;
310     }
311     else // unknown rank
312     {
313     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314     }
315     }
316     break;
317     default:
318     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319     }
320 jfenwick 2084 }
321    
322 jfenwick 2496 DataTypes::ShapeType
323     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324     {
325     // This code taken from the Data.cpp swapaxes() method
326     // Some of the checks are probably redundant here
327     int axis0_tmp,axis1_tmp;
328     const DataTypes::ShapeType& s=left->getShape();
329     DataTypes::ShapeType out_shape;
330     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     int rank=left->getRank();
333     if (rank<2) {
334     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335     }
336     if (axis0<0 || axis0>rank-1) {
337 caltinay 2779 stringstream e;
338     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339     throw DataException(e.str());
340 jfenwick 2496 }
341     if (axis1<0 || axis1>rank-1) {
342 caltinay 2782 stringstream e;
343 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344     throw DataException(e.str());
345 jfenwick 2496 }
346     if (axis0 == axis1) {
347     throw DataException("Error - Data::swapaxes: axis indices must be different.");
348     }
349     if (axis0 > axis1) {
350     axis0_tmp=axis1;
351     axis1_tmp=axis0;
352     } else {
353     axis0_tmp=axis0;
354     axis1_tmp=axis1;
355     }
356     for (int i=0; i<rank; i++) {
357     if (i == axis0_tmp) {
358     out_shape.push_back(s[axis1_tmp]);
359     } else if (i == axis1_tmp) {
360     out_shape.push_back(s[axis0_tmp]);
361     } else {
362     out_shape.push_back(s[i]);
363     }
364     }
365     return out_shape;
366     }
367    
368    
369 jfenwick 2066 // determine the output shape for the general tensor product operation
370     // the additional parameters return information required later for the product
371     // the majority of this code is copy pasted from C_General_Tensor_Product
372     DataTypes::ShapeType
373     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374 jfenwick 1865 {
375 caltinay 5972
376 jfenwick 2066 // Get rank and shape of inputs
377     int rank0 = left->getRank();
378     int rank1 = right->getRank();
379     const DataTypes::ShapeType& shape0 = left->getShape();
380     const DataTypes::ShapeType& shape1 = right->getShape();
381    
382     // Prepare for the loops of the product and verify compatibility of shapes
383     int start0=0, start1=0;
384 caltinay 5972 if (transpose == 0) {}
385     else if (transpose == 1) { start0 = axis_offset; }
386     else if (transpose == 2) { start1 = rank1-axis_offset; }
387     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388 jfenwick 2066
389 jfenwick 2085 if (rank0<axis_offset)
390     {
391 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392 jfenwick 2085 }
393 jfenwick 2066
394     // Adjust the shapes for transpose
395 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
396     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
397     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399 jfenwick 2066
400     // Prepare for the loops of the product
401     SL=1, SM=1, SR=1;
402 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
403 jfenwick 2066 SL *= tmpShape0[i];
404     }
405 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
406 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408     }
409     SM *= tmpShape0[i];
410     }
411 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
412 jfenwick 2066 SR *= tmpShape1[i];
413     }
414    
415     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
417     { // block to limit the scope of out_index
418 jfenwick 2066 int out_index=0;
419     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421     }
422 jfenwick 2086
423     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424     {
425     ostringstream os;
426     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427     throw DataException(os.str());
428     }
429    
430 jfenwick 2066 return shape2;
431 jfenwick 1865 }
432    
433 caltinay 5972 } // end anonymous namespace
434 jfenwick 1865
435    
436 jfenwick 1899
437     // Return a string representing the operation
438 jfenwick 1865 const std::string&
439     opToString(ES_optype op)
440     {
441     if (op<0 || op>=ES_opcount)
442     {
443     op=UNKNOWNOP;
444     }
445     return ES_opstrings[op];
446     }
447    
448 jfenwick 2500 void DataLazy::LazyNodeSetup()
449     {
450     #ifdef _OPENMP
451     int numthreads=omp_get_max_threads();
452     m_samples.resize(numthreads*m_samplesize);
453     m_sampleids=new int[numthreads];
454     for (int i=0;i<numthreads;++i)
455     {
456     m_sampleids[i]=-1;
457     }
458     #else
459     m_samples.resize(m_samplesize);
460     m_sampleids=new int[1];
461     m_sampleids[0]=-1;
462     #endif // _OPENMP
463     }
464 jfenwick 1865
465 jfenwick 2500
466     // Creates an identity node
467 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
468 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
469     ,m_sampleids(0),
470     m_samples(1)
471 jfenwick 1865 {
472 jfenwick 1879 if (p->isLazy())
473     {
474 caltinay 5972 // I don't want identity of Lazy.
475     // Question: Why would that be so bad?
476     // Answer: We assume that the child of ID is something we can call getVector on
477     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478 jfenwick 1879 }
479     else
480     {
481 caltinay 5972 p->makeLazyShared();
482     DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483     makeIdentity(dr);
484 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
485 jfenwick 1879 }
486 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
487 jfenwick 1865 }
488    
489 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491     m_op(op),
492     m_axis_offset(0),
493     m_transpose(0),
494     m_SL(0), m_SM(0), m_SR(0)
495 jfenwick 1886 {
496 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497 jfenwick 1886 {
498 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499 jfenwick 1886 }
500 jfenwick 2066
501 jfenwick 1886 DataLazy_ptr lleft;
502     if (!left->isLazy())
503     {
504 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
505 jfenwick 1886 }
506     else
507     {
508 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
509 jfenwick 1886 }
510 jfenwick 1889 m_readytype=lleft->m_readytype;
511 jfenwick 1886 m_left=lleft;
512     m_samplesize=getNumDPPSample()*getNoValues();
513 jfenwick 2177 m_children=m_left->m_children+1;
514     m_height=m_left->m_height+1;
515 jfenwick 2500 LazyNodeSetup();
516 jfenwick 2177 SIZELIMIT
517 jfenwick 1886 }
518    
519    
520 jfenwick 1943 // In this constructor we need to consider interpolation
521 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
523     m_op(op),
524     m_SL(0), m_SM(0), m_SR(0)
525 jfenwick 1879 {
526 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
528 jfenwick 1886 {
529 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530 jfenwick 1886 }
531 jfenwick 1943
532 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
533 jfenwick 1943 {
534 caltinay 5972 FunctionSpace fs=getFunctionSpace();
535     Data ltemp(left);
536     Data tmp(ltemp,fs);
537     left=tmp.borrowDataPtr();
538 jfenwick 1943 }
539 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
540 jfenwick 1943 {
541 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
542     right=tmp.borrowDataPtr();
543 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544 jfenwick 1943 }
545     left->operandCheck(*right);
546    
547 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
548 jfenwick 1879 {
549 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
550 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551 jfenwick 1879 }
552     else
553     {
554 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
555 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556 jfenwick 1879 }
557     if (right->isLazy())
558     {
559 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
560 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561 jfenwick 1879 }
562     else
563     {
564 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
565 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566 jfenwick 1879 }
567 jfenwick 1889 char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;
569     if (lt=='E' || rt=='E')
570     {
571 caltinay 5972 m_readytype='E';
572 jfenwick 1889 }
573     else if (lt=='T' || rt=='T')
574     {
575 caltinay 5972 m_readytype='T';
576 jfenwick 1889 }
577     else
578     {
579 caltinay 5972 m_readytype='C';
580 jfenwick 1889 }
581 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
582 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
583     m_height=max(m_left->m_height,m_right->m_height)+1;
584 jfenwick 2500 LazyNodeSetup();
585 jfenwick 2177 SIZELIMIT
586 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587 jfenwick 1879 }
588    
589 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591     m_op(op),
592     m_axis_offset(axis_offset),
593     m_transpose(transpose)
594 jfenwick 2066 {
595     if ((getOpgroup(op)!=G_TENSORPROD))
596     {
597 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598 jfenwick 2066 }
599     if ((transpose>2) || (transpose<0))
600     {
601 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602 jfenwick 2066 }
603 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
604 jfenwick 2066 {
605 caltinay 5972 FunctionSpace fs=getFunctionSpace();
606     Data ltemp(left);
607     Data tmp(ltemp,fs);
608     left=tmp.borrowDataPtr();
609 jfenwick 2066 }
610 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
611 jfenwick 2066 {
612 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
613     right=tmp.borrowDataPtr();
614 jfenwick 2066 }
615 jfenwick 2195 // left->operandCheck(*right);
616 jfenwick 1879
617 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
618 jfenwick 2066 {
619 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
620 jfenwick 2066 }
621     else
622     {
623 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
624 jfenwick 2066 }
625     if (right->isLazy())
626     {
627 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
628 jfenwick 2066 }
629     else
630     {
631 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
632 jfenwick 2066 }
633     char lt=m_left->m_readytype;
634     char rt=m_right->m_readytype;
635     if (lt=='E' || rt=='E')
636     {
637 caltinay 5972 m_readytype='E';
638 jfenwick 2066 }
639     else if (lt=='T' || rt=='T')
640     {
641 caltinay 5972 m_readytype='T';
642 jfenwick 2066 }
643     else
644     {
645 caltinay 5972 m_readytype='C';
646 jfenwick 2066 }
647     m_samplesize=getNumDPPSample()*getNoValues();
648 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
649     m_height=max(m_left->m_height,m_right->m_height)+1;
650 jfenwick 2500 LazyNodeSetup();
651 jfenwick 2177 SIZELIMIT
652 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653 jfenwick 2066 }
654    
655    
656 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658     m_op(op),
659     m_axis_offset(axis_offset),
660     m_transpose(0),
661     m_tol(0)
662 jfenwick 2084 {
663     if ((getOpgroup(op)!=G_NP1OUT_P))
664     {
665 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666 jfenwick 2084 }
667     DataLazy_ptr lleft;
668     if (!left->isLazy())
669     {
670 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
671 jfenwick 2084 }
672     else
673     {
674 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
675 jfenwick 2084 }
676     m_readytype=lleft->m_readytype;
677     m_left=lleft;
678     m_samplesize=getNumDPPSample()*getNoValues();
679 jfenwick 2177 m_children=m_left->m_children+1;
680     m_height=m_left->m_height+1;
681 jfenwick 2500 LazyNodeSetup();
682 jfenwick 2177 SIZELIMIT
683 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684 jfenwick 2084 }
685    
686 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
688     m_op(op),
689     m_axis_offset(0),
690     m_transpose(0),
691     m_tol(tol)
692 jfenwick 2147 {
693     if ((getOpgroup(op)!=G_UNARY_P))
694     {
695 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696 jfenwick 2147 }
697     DataLazy_ptr lleft;
698     if (!left->isLazy())
699     {
700 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
701 jfenwick 2147 }
702     else
703     {
704 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
705 jfenwick 2147 }
706     m_readytype=lleft->m_readytype;
707     m_left=lleft;
708     m_samplesize=getNumDPPSample()*getNoValues();
709 jfenwick 2177 m_children=m_left->m_children+1;
710     m_height=m_left->m_height+1;
711 jfenwick 2500 LazyNodeSetup();
712 jfenwick 2177 SIZELIMIT
713 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714     }
715 jfenwick 2084
716 jfenwick 2496
717     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719     m_op(op),
720     m_axis_offset(axis0),
721     m_transpose(axis1),
722     m_tol(0)
723 jfenwick 2496 {
724     if ((getOpgroup(op)!=G_NP1OUT_2P))
725     {
726 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727 jfenwick 2496 }
728     DataLazy_ptr lleft;
729     if (!left->isLazy())
730     {
731 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
732 jfenwick 2496 }
733     else
734     {
735 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
736 jfenwick 2496 }
737     m_readytype=lleft->m_readytype;
738     m_left=lleft;
739     m_samplesize=getNumDPPSample()*getNoValues();
740     m_children=m_left->m_children+1;
741     m_height=m_left->m_height+1;
742 jfenwick 2500 LazyNodeSetup();
743 jfenwick 2496 SIZELIMIT
744     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745     }
746    
747 jfenwick 3035
748     namespace
749     {
750    
751     inline int max3(int a, int b, int c)
752     {
753 caltinay 5972 int t=(a>b?a:b);
754     return (t>c?t:c);
755 jfenwick 3035
756     }
757     }
758    
759     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
761     m_op(CONDEVAL),
762     m_axis_offset(0),
763     m_transpose(0),
764     m_tol(0)
765 jfenwick 3035 {
766    
767     DataLazy_ptr lmask;
768     DataLazy_ptr lleft;
769     DataLazy_ptr lright;
770     if (!mask->isLazy())
771     {
772 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
773 jfenwick 3035 }
774     else
775     {
776 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
777 jfenwick 3035 }
778     if (!left->isLazy())
779     {
780 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
781 jfenwick 3035 }
782     else
783     {
784 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
785 jfenwick 3035 }
786     if (!right->isLazy())
787     {
788 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
789 jfenwick 3035 }
790     else
791     {
792 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
793 jfenwick 3035 }
794     m_readytype=lmask->m_readytype;
795     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796     {
797 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
798 jfenwick 3035 }
799     m_left=lleft;
800     m_right=lright;
801     m_mask=lmask;
802     m_samplesize=getNumDPPSample()*getNoValues();
803     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805     LazyNodeSetup();
806     SIZELIMIT
807     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808     }
809    
810    
811    
812 jfenwick 1865 DataLazy::~DataLazy()
813     {
814 jfenwick 2500 delete[] m_sampleids;
815 jfenwick 1865 }
816    
817 jfenwick 1879
818 jfenwick 1899 /*
819     \brief Evaluates the expression using methods on Data.
820     This does the work for the collapse method.
821     For reasons of efficiency do not call this method on DataExpanded nodes.
822     */
823 jfenwick 1889 DataReady_ptr
824 jfenwick 4621 DataLazy::collapseToReady() const
825 jfenwick 1889 {
826     if (m_readytype=='E')
827 caltinay 5972 { // this is more an efficiency concern than anything else
828 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
829     }
830     if (m_op==IDENTITY)
831     {
832     return m_id;
833     }
834     DataReady_ptr pleft=m_left->collapseToReady();
835     Data left(pleft);
836     Data right;
837 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
838 jfenwick 1889 {
839     right=Data(m_right->collapseToReady());
840     }
841     Data result;
842     switch(m_op)
843     {
844     case ADD:
845 caltinay 5972 result=left+right;
846     break;
847     case SUB:
848     result=left-right;
849     break;
850     case MUL:
851     result=left*right;
852     break;
853     case DIV:
854     result=left/right;
855     break;
856 jfenwick 1889 case SIN:
857 caltinay 5972 result=left.sin();
858     break;
859 jfenwick 1889 case COS:
860 caltinay 5972 result=left.cos();
861     break;
862 jfenwick 1889 case TAN:
863 caltinay 5972 result=left.tan();
864     break;
865 jfenwick 1889 case ASIN:
866 caltinay 5972 result=left.asin();
867     break;
868 jfenwick 1889 case ACOS:
869 caltinay 5972 result=left.acos();
870     break;
871 jfenwick 1889 case ATAN:
872 caltinay 5972 result=left.atan();
873     break;
874 jfenwick 1889 case SINH:
875 caltinay 5972 result=left.sinh();
876     break;
877 jfenwick 1889 case COSH:
878 caltinay 5972 result=left.cosh();
879     break;
880 jfenwick 1889 case TANH:
881 caltinay 5972 result=left.tanh();
882     break;
883 jfenwick 1889 case ERF:
884 caltinay 5972 result=left.erf();
885     break;
886 jfenwick 1889 case ASINH:
887 caltinay 5972 result=left.asinh();
888     break;
889 jfenwick 1889 case ACOSH:
890 caltinay 5972 result=left.acosh();
891     break;
892 jfenwick 1889 case ATANH:
893 caltinay 5972 result=left.atanh();
894     break;
895 jfenwick 1889 case LOG10:
896 caltinay 5972 result=left.log10();
897     break;
898 jfenwick 1889 case LOG:
899 caltinay 5972 result=left.log();
900     break;
901 jfenwick 1889 case SIGN:
902 caltinay 5972 result=left.sign();
903     break;
904 jfenwick 1889 case ABS:
905 caltinay 5972 result=left.abs();
906     break;
907 jfenwick 1889 case NEG:
908 caltinay 5972 result=left.neg();
909     break;
910 jfenwick 1889 case POS:
911 caltinay 5972 // it doesn't mean anything for delayed.
912     // it will just trigger a deep copy of the lazy object
913     throw DataException("Programmer error - POS not supported for lazy data.");
914     break;
915 jfenwick 1889 case EXP:
916 caltinay 5972 result=left.exp();
917     break;
918 jfenwick 1889 case SQRT:
919 caltinay 5972 result=left.sqrt();
920     break;
921 jfenwick 1889 case RECIP:
922 caltinay 5972 result=left.oneOver();
923     break;
924 jfenwick 1889 case GZ:
925 caltinay 5972 result=left.wherePositive();
926     break;
927 jfenwick 1889 case LZ:
928 caltinay 5972 result=left.whereNegative();
929     break;
930 jfenwick 1889 case GEZ:
931 caltinay 5972 result=left.whereNonNegative();
932     break;
933 jfenwick 1889 case LEZ:
934 caltinay 5972 result=left.whereNonPositive();
935     break;
936 jfenwick 2147 case NEZ:
937 caltinay 5972 result=left.whereNonZero(m_tol);
938     break;
939 jfenwick 2147 case EZ:
940 caltinay 5972 result=left.whereZero(m_tol);
941     break;
942 jfenwick 2037 case SYM:
943 caltinay 5972 result=left.symmetric();
944     break;
945 jfenwick 2037 case NSYM:
946 caltinay 5972 result=left.nonsymmetric();
947     break;
948 jfenwick 2066 case PROD:
949 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950     break;
951 jfenwick 2084 case TRANS:
952 caltinay 5972 result=left.transpose(m_axis_offset);
953     break;
954 jfenwick 2084 case TRACE:
955 caltinay 5972 result=left.trace(m_axis_offset);
956     break;
957 jfenwick 2496 case SWAP:
958 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
959     break;
960 jfenwick 2721 case MINVAL:
961 caltinay 5972 result=left.minval();
962     break;
963 jfenwick 2721 case MAXVAL:
964 caltinay 5972 result=left.minval();
965     break;
966 jfenwick 1889 default:
967 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968 jfenwick 1889 }
969     return result.borrowReadyPtr();
970     }
971    
972 jfenwick 1899 /*
973     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
974     This method uses the original methods on the Data class to evaluate the expressions.
975     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
976     the purpose of using DataLazy in the first place).
977     */
978 jfenwick 1889 void
979 jfenwick 4621 DataLazy::collapse() const
980 jfenwick 1889 {
981     if (m_op==IDENTITY)
982     {
983 caltinay 5972 return;
984 jfenwick 1889 }
985     if (m_readytype=='E')
986 caltinay 5972 { // this is more an efficiency concern than anything else
987 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
988     }
989     m_id=collapseToReady();
990     m_op=IDENTITY;
991     }
992    
993 jfenwick 1899
994 jfenwick 1889
995    
996    
997 jfenwick 2147
998 phornby 1987 #define PROC_OP(TYPE,X) \
999 caltinay 5972 for (int j=0;j<onumsteps;++j)\
1000     {\
1001     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1002     { \
1003 jfenwick 2152 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1004 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1005 caltinay 5972 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1006 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1007 caltinay 5972 lroffset+=leftstep; \
1008     rroffset+=rightstep; \
1009     }\
1010     lroffset+=oleftstep;\
1011     rroffset+=orightstep;\
1012     }
1013 jfenwick 1889
1014 jfenwick 1899
1015 jfenwick 2500 // The result will be stored in m_samples
1016     // The return value is a pointer to the DataVector, offset is the offset within the return value
1017 jfenwick 5938 const DataTypes::RealVectorType*
1018 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1019 jfenwick 2500 {
1020     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1021 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1022 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1023     {
1024 caltinay 5972 collapse();
1025 jfenwick 2500 }
1026 caltinay 5972 if (m_op==IDENTITY)
1027 jfenwick 2500 {
1028     const ValueType& vec=m_id->getVectorRO();
1029     roffset=m_id->getPointOffset(sampleNo, 0);
1030 jfenwick 2777 #ifdef LAZY_STACK_PROF
1031     int x;
1032     if (&x<stackend[omp_get_thread_num()])
1033     {
1034     stackend[omp_get_thread_num()]=&x;
1035     }
1036     #endif
1037 jfenwick 2500 return &(vec);
1038     }
1039     if (m_readytype!='E')
1040     {
1041     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1042     }
1043 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1044     {
1045 caltinay 5972 roffset=tid*m_samplesize;
1046     return &(m_samples); // sample is already resolved
1047 jfenwick 2501 }
1048     m_sampleids[tid]=sampleNo;
1049 jfenwick 3035
1050 jfenwick 2500 switch (getOpgroup(m_op))
1051     {
1052     case G_UNARY:
1053     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1054     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1055     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1056     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1057     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1058     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1059 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1060 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1061 jfenwick 2500 default:
1062     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1063     }
1064     }
1065    
1066 jfenwick 5938 const DataTypes::RealVectorType*
1067 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1068 jfenwick 2500 {
1069 caltinay 5972 // we assume that any collapsing has been done before we get here
1070     // since we only have one argument we don't need to think about only
1071     // processing single points.
1072     // we will also know we won't get identity nodes
1073 jfenwick 2500 if (m_readytype!='E')
1074     {
1075     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1076     }
1077     if (m_op==IDENTITY)
1078     {
1079     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1080     }
1081 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1082 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1083     roffset=m_samplesize*tid;
1084     double* result=&(m_samples[roffset]);
1085     switch (m_op)
1086     {
1087 caltinay 5972 case SIN:
1088     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1089     break;
1090 jfenwick 2500 case COS:
1091 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1092     break;
1093 jfenwick 2500 case TAN:
1094 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1095     break;
1096 jfenwick 2500 case ASIN:
1097 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1098     break;
1099 jfenwick 2500 case ACOS:
1100 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1101     break;
1102 jfenwick 2500 case ATAN:
1103 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1104     break;
1105 jfenwick 2500 case SINH:
1106 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1107     break;
1108 jfenwick 2500 case COSH:
1109 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1110     break;
1111 jfenwick 2500 case TANH:
1112 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1113     break;
1114 jfenwick 2500 case ERF:
1115     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1116 caltinay 5972 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1117 jfenwick 2500 #else
1118 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::erf);
1119     break;
1120 jfenwick 2500 #endif
1121     case ASINH:
1122     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1123 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1124 jfenwick 2500 #else
1125 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1126 jfenwick 2500 #endif
1127 caltinay 5972 break;
1128 jfenwick 2500 case ACOSH:
1129     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1130 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1131 jfenwick 2500 #else
1132 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1133 jfenwick 2500 #endif
1134 caltinay 5972 break;
1135 jfenwick 2500 case ATANH:
1136     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1137 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1138 jfenwick 2500 #else
1139 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1140 jfenwick 2500 #endif
1141 caltinay 5972 break;
1142 jfenwick 2500 case LOG10:
1143 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1144     break;
1145 jfenwick 2500 case LOG:
1146 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1147     break;
1148 jfenwick 2500 case SIGN:
1149 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1150     break;
1151 jfenwick 2500 case ABS:
1152 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1153     break;
1154 jfenwick 2500 case NEG:
1155 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1156     break;
1157 jfenwick 2500 case POS:
1158 caltinay 5972 // it doesn't mean anything for delayed.
1159     // it will just trigger a deep copy of the lazy object
1160     throw DataException("Programmer error - POS not supported for lazy data.");
1161     break;
1162 jfenwick 2500 case EXP:
1163 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1164     break;
1165 jfenwick 2500 case SQRT:
1166 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1167     break;
1168 jfenwick 2500 case RECIP:
1169 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1170     break;
1171 jfenwick 2500 case GZ:
1172 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1173     break;
1174 jfenwick 2500 case LZ:
1175 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1176     break;
1177 jfenwick 2500 case GEZ:
1178 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1179     break;
1180 jfenwick 2500 case LEZ:
1181 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1182     break;
1183 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1184     case NEZ:
1185 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1186     break;
1187 jfenwick 2500 case EZ:
1188 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1189     break;
1190 jfenwick 2500
1191     default:
1192 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1193 jfenwick 2500 }
1194     return &(m_samples);
1195     }
1196    
1197    
1198 jfenwick 5938 const DataTypes::RealVectorType*
1199 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1200 jfenwick 2721 {
1201 caltinay 5972 // we assume that any collapsing has been done before we get here
1202     // since we only have one argument we don't need to think about only
1203     // processing single points.
1204     // we will also know we won't get identity nodes
1205 jfenwick 2721 if (m_readytype!='E')
1206     {
1207     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1208     }
1209     if (m_op==IDENTITY)
1210     {
1211     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1212     }
1213     size_t loffset=0;
1214 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1215 jfenwick 2721
1216     roffset=m_samplesize*tid;
1217 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1218 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1219 jfenwick 2721 double* result=&(m_samples[roffset]);
1220     switch (m_op)
1221     {
1222     case MINVAL:
1223 caltinay 5972 {
1224     for (unsigned int z=0;z<ndpps;++z)
1225     {
1226     FMin op;
1227     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1228     loffset+=psize;
1229     result++;
1230     }
1231     }
1232     break;
1233 jfenwick 2721 case MAXVAL:
1234 caltinay 5972 {
1235     for (unsigned int z=0;z<ndpps;++z)
1236     {
1237     FMax op;
1238     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1239     loffset+=psize;
1240     result++;
1241     }
1242     }
1243     break;
1244 jfenwick 2721 default:
1245 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1246 jfenwick 2721 }
1247     return &(m_samples);
1248     }
1249    
1250 jfenwick 5938 const DataTypes::RealVectorType*
1251 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1252 jfenwick 2500 {
1253 caltinay 5972 // we assume that any collapsing has been done before we get here
1254     // since we only have one argument we don't need to think about only
1255     // processing single points.
1256 jfenwick 2500 if (m_readytype!='E')
1257     {
1258     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1259     }
1260     if (m_op==IDENTITY)
1261     {
1262     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1263     }
1264     size_t subroffset;
1265     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1266     roffset=m_samplesize*tid;
1267     size_t loop=0;
1268     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1269     size_t step=getNoValues();
1270     size_t offset=roffset;
1271     switch (m_op)
1272     {
1273     case SYM:
1274 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1275     {
1276     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1277     subroffset+=step;
1278     offset+=step;
1279     }
1280     break;
1281 jfenwick 2500 case NSYM:
1282 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1283     {
1284     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285     subroffset+=step;
1286     offset+=step;
1287     }
1288     break;
1289 jfenwick 2500 default:
1290 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1291 jfenwick 2500 }
1292     return &m_samples;
1293     }
1294    
1295 jfenwick 5938 const DataTypes::RealVectorType*
1296 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1297 jfenwick 2500 {
1298 caltinay 5972 // we assume that any collapsing has been done before we get here
1299     // since we only have one argument we don't need to think about only
1300     // processing single points.
1301 jfenwick 2500 if (m_readytype!='E')
1302     {
1303     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1304     }
1305     if (m_op==IDENTITY)
1306     {
1307     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1308     }
1309     size_t subroffset;
1310     size_t offset;
1311     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1312     roffset=m_samplesize*tid;
1313     offset=roffset;
1314     size_t loop=0;
1315     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1316     size_t outstep=getNoValues();
1317     size_t instep=m_left->getNoValues();
1318     switch (m_op)
1319     {
1320     case TRACE:
1321 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1322     {
1323 jfenwick 2500 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1324 caltinay 5972 subroffset+=instep;
1325     offset+=outstep;
1326     }
1327     break;
1328 jfenwick 2500 case TRANS:
1329 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1330     {
1331 jfenwick 2500 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1332 caltinay 5972 subroffset+=instep;
1333     offset+=outstep;
1334     }
1335     break;
1336 jfenwick 2500 default:
1337 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1338 jfenwick 2500 }
1339     return &m_samples;
1340     }
1341    
1342    
1343 jfenwick 5938 const DataTypes::RealVectorType*
1344 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1345 jfenwick 2500 {
1346     if (m_readytype!='E')
1347     {
1348     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1349     }
1350     if (m_op==IDENTITY)
1351     {
1352     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1353     }
1354     size_t subroffset;
1355     size_t offset;
1356     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1357     roffset=m_samplesize*tid;
1358     offset=roffset;
1359     size_t loop=0;
1360     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1361     size_t outstep=getNoValues();
1362     size_t instep=m_left->getNoValues();
1363     switch (m_op)
1364     {
1365     case SWAP:
1366 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1367     {
1368 jfenwick 2500 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1369 caltinay 5972 subroffset+=instep;
1370     offset+=outstep;
1371     }
1372     break;
1373 jfenwick 2500 default:
1374 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1375 jfenwick 2500 }
1376     return &m_samples;
1377     }
1378    
1379 jfenwick 5938 const DataTypes::RealVectorType*
1380 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1381 jfenwick 3035 {
1382     if (m_readytype!='E')
1383     {
1384     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1385     }
1386     if (m_op!=CONDEVAL)
1387     {
1388     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1389     }
1390     size_t subroffset;
1391 jfenwick 2500
1392 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393     const ValueType* srcres=0;
1394     if ((*maskres)[subroffset]>0)
1395     {
1396 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397 jfenwick 3035 }
1398     else
1399     {
1400 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401 jfenwick 3035 }
1402    
1403     // Now we need to copy the result
1404    
1405     roffset=m_samplesize*tid;
1406     for (int i=0;i<m_samplesize;++i)
1407     {
1408 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1409 jfenwick 3035 }
1410    
1411     return &m_samples;
1412     }
1413    
1414 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1415     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1416     // If both children are expanded, then we can process them in a single operation (we treat
1417     // the whole sample as one big datapoint.
1418     // If one of the children is not expanded, then we need to treat each point in the sample
1419     // individually.
1420     // There is an additional complication when scalar operations are considered.
1421     // For example, 2+Vector.
1422     // In this case each double within the point is treated individually
1423 jfenwick 5938 const DataTypes::RealVectorType*
1424 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1425 jfenwick 2500 {
1426     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1427    
1428 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1429     // first work out which of the children are expanded
1430 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1431     bool rightExp=(m_right->m_readytype=='E');
1432     if (!leftExp && !rightExp)
1433     {
1434 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1435 jfenwick 2500 }
1436     bool leftScalar=(m_left->getRank()==0);
1437     bool rightScalar=(m_right->getRank()==0);
1438     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1439     {
1440 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1441 jfenwick 2500 }
1442     size_t leftsize=m_left->getNoValues();
1443     size_t rightsize=m_right->getNoValues();
1444 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1445     int leftstep=0; // how far should the left offset advance after each step
1446 jfenwick 2500 int rightstep=0;
1447 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1448     int oleftstep=0; // the o variables refer to the outer loop
1449     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1450 jfenwick 2500 int onumsteps=1;
1451    
1452 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1453 jfenwick 2500 bool RES=(rightExp && rightScalar);
1454 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1455 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1456 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1457 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1458 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1459 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1460    
1461 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1462 jfenwick 2500 {
1463 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1464     leftstep=0;
1465     rightstep=0;
1466     numsteps=1;
1467 jfenwick 2500 }
1468     else if (LES || RES)
1469     {
1470 caltinay 5972 chunksize=1;
1471     if (LES) // left is an expanded scalar
1472     {
1473     if (RS)
1474     {
1475     leftstep=1;
1476     rightstep=0;
1477     numsteps=m_left->getNumDPPSample();
1478     }
1479     else // RN or REN
1480     {
1481     leftstep=0;
1482     oleftstep=1;
1483     rightstep=1;
1484     orightstep=(RN ? -(int)rightsize : 0);
1485     numsteps=rightsize;
1486     onumsteps=m_left->getNumDPPSample();
1487     }
1488     }
1489     else // right is an expanded scalar
1490     {
1491     if (LS)
1492     {
1493     rightstep=1;
1494     leftstep=0;
1495     numsteps=m_right->getNumDPPSample();
1496     }
1497     else
1498     {
1499     rightstep=0;
1500     orightstep=1;
1501     leftstep=1;
1502     oleftstep=(LN ? -(int)leftsize : 0);
1503     numsteps=leftsize;
1504     onumsteps=m_right->getNumDPPSample();
1505     }
1506     }
1507 jfenwick 2500 }
1508 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1509 jfenwick 2500 {
1510 caltinay 5972 if (LEN) // and Right will be a single value
1511     {
1512     chunksize=rightsize;
1513     leftstep=rightsize;
1514     rightstep=0;
1515     numsteps=m_left->getNumDPPSample();
1516     if (RS)
1517     {
1518     numsteps*=leftsize;
1519     }
1520     }
1521     else // REN
1522     {
1523     chunksize=leftsize;
1524     rightstep=leftsize;
1525     leftstep=0;
1526     numsteps=m_right->getNumDPPSample();
1527     if (LS)
1528     {
1529     numsteps*=rightsize;
1530     }
1531     }
1532 jfenwick 2500 }
1533    
1534 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1535     // Get the values of sub-expressions
1536     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1537 jfenwick 2500 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1538     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1539     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1540     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1541     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1542     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1543     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1544     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1545    
1546     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1547     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1548    
1549    
1550     roffset=m_samplesize*tid;
1551 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1552 jfenwick 2500 switch(m_op)
1553     {
1554     case ADD:
1555     PROC_OP(NO_ARG,plus<double>());
1556 caltinay 5972 break;
1557 jfenwick 2500 case SUB:
1558 caltinay 5972 PROC_OP(NO_ARG,minus<double>());
1559     break;
1560 jfenwick 2500 case MUL:
1561 caltinay 5972 PROC_OP(NO_ARG,multiplies<double>());
1562     break;
1563 jfenwick 2500 case DIV:
1564 caltinay 5972 PROC_OP(NO_ARG,divides<double>());
1565     break;
1566 jfenwick 2500 case POW:
1567     PROC_OP(double (double,double),::pow);
1568 caltinay 5972 break;
1569 jfenwick 2500 default:
1570 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1571 jfenwick 2500 }
1572     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1573     return &m_samples;
1574     }
1575    
1576    
1577     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1578     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1579     // unlike the other resolve helpers, we must treat these datapoints separately.
1580 jfenwick 5938 const DataTypes::RealVectorType*
1581 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1582 jfenwick 2500 {
1583     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1584    
1585 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1586     // first work out which of the children are expanded
1587 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1588     bool rightExp=(m_right->m_readytype=='E');
1589     int steps=getNumDPPSample();
1590 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1591 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1592    
1593     int resultStep=getNoValues();
1594     roffset=m_samplesize*tid;
1595     size_t offset=roffset;
1596    
1597     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1598    
1599     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1600    
1601     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1602     cout << getNoValues() << endl;)
1603    
1604    
1605     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1606     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1607     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1608     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1609     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1610     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1611     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1612    
1613 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1614 jfenwick 2500 switch(m_op)
1615     {
1616     case PROD:
1617 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1618     {
1619     const double *ptr_0 = &((*left)[lroffset]);
1620     const double *ptr_1 = &((*right)[rroffset]);
1621 jfenwick 2500
1622     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1623     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1624    
1625 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1626 jfenwick 2500
1627 caltinay 5972 lroffset+=leftStep;
1628     rroffset+=rightStep;
1629     }
1630     break;
1631 jfenwick 2500 default:
1632 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1633 jfenwick 2500 }
1634     roffset=offset;
1635     return &m_samples;
1636     }
1637    
1638 jfenwick 1898
1639 jfenwick 5938 const DataTypes::RealVectorType*
1640 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1641 jfenwick 1879 {
1642 jfenwick 2271 #ifdef _OPENMP
1643 caltinay 5972 int tid=omp_get_thread_num();
1644 jfenwick 2271 #else
1645 caltinay 5972 int tid=0;
1646 jfenwick 2271 #endif
1647 jfenwick 2777
1648     #ifdef LAZY_STACK_PROF
1649 caltinay 5972 stackstart[tid]=&tid;
1650     stackend[tid]=&tid;
1651     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1652     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1653     #pragma omp critical
1654     if (d>maxstackuse)
1655     {
1656 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1657 caltinay 5972 maxstackuse=d;
1658     }
1659     return r;
1660 jfenwick 2777 #else
1661 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1662 jfenwick 2777 #endif
1663 jfenwick 2271 }
1664    
1665    
1666 jfenwick 2497 // This needs to do the work of the identity constructor
1667 jfenwick 2177 void
1668     DataLazy::resolveToIdentity()
1669     {
1670     if (m_op==IDENTITY)
1671 caltinay 5972 return;
1672 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1673 jfenwick 2177 makeIdentity(p);
1674     }
1675 jfenwick 1889
1676 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1677     {
1678     m_op=IDENTITY;
1679     m_axis_offset=0;
1680     m_transpose=0;
1681     m_SL=m_SM=m_SR=0;
1682     m_children=m_height=0;
1683     m_id=p;
1684     if(p->isConstant()) {m_readytype='C';}
1685     else if(p->isExpanded()) {m_readytype='E';}
1686     else if (p->isTagged()) {m_readytype='T';}
1687     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1688     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1689     m_left.reset();
1690     m_right.reset();
1691     }
1692    
1693 jfenwick 2497
1694     DataReady_ptr
1695     DataLazy::resolve()
1696     {
1697     resolveToIdentity();
1698     return m_id;
1699     }
1700    
1701 jfenwick 2799
1702     /* This is really a static method but I think that caused problems in windows */
1703     void
1704     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1705     {
1706     if (dats.empty())
1707     {
1708 caltinay 5972 return;
1709 jfenwick 2799 }
1710     vector<DataLazy*> work;
1711     FunctionSpace fs=dats[0]->getFunctionSpace();
1712     bool match=true;
1713 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1714 jfenwick 2799 {
1715 caltinay 5972 if (dats[i]->m_readytype!='E')
1716     {
1717     dats[i]->collapse();
1718     }
1719     if (dats[i]->m_op!=IDENTITY)
1720     {
1721     work.push_back(dats[i]);
1722     if (fs!=dats[i]->getFunctionSpace())
1723     {
1724     match=false;
1725     }
1726     }
1727 jfenwick 2799 }
1728     if (work.empty())
1729     {
1730 caltinay 5972 return; // no work to do
1731 jfenwick 2799 }
1732 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1733     { // it is possible that dats[0] is one of the objects which we discarded and
1734     // all the other functionspaces match.
1735     vector<DataExpanded*> dep;
1736     vector<ValueType*> vecs;
1737     for (int i=0;i<work.size();++i)
1738     {
1739     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1740     vecs.push_back(&(dep[i]->getVectorRW()));
1741     }
1742     int totalsamples=work[0]->getNumSamples();
1743     const ValueType* res=0; // Storage for answer
1744     int sample;
1745     #pragma omp parallel private(sample, res)
1746     {
1747     size_t roffset=0;
1748     #pragma omp for schedule(static)
1749     for (sample=0;sample<totalsamples;++sample)
1750     {
1751     roffset=0;
1752     int j;
1753     for (j=work.size()-1;j>=0;--j)
1754     {
1755 jfenwick 2799 #ifdef _OPENMP
1756 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1757 jfenwick 2799 #else
1758 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1759 jfenwick 2799 #endif
1760 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1761     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1762     }
1763     }
1764     }
1765     // Now we need to load the new results as identity ops into the lazy nodes
1766     for (int i=work.size()-1;i>=0;--i)
1767     {
1768 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1769 caltinay 5972 }
1770 jfenwick 2799 }
1771 caltinay 5972 else // functionspaces do not match
1772 jfenwick 2799 {
1773 caltinay 5972 for (int i=0;i<work.size();++i)
1774     {
1775     work[i]->resolveToIdentity();
1776     }
1777 jfenwick 2799 }
1778     }
1779    
1780    
1781    
1782 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1783     DataReady_ptr
1784     DataLazy::resolveNodeWorker()
1785     {
1786 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1787 jfenwick 2500 {
1788     collapse();
1789     }
1790 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1791 jfenwick 2500 {
1792     return m_id;
1793     }
1794 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1795 jfenwick 2500 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1796     ValueType& resvec=result->getVectorRW();
1797     DataReady_ptr resptr=DataReady_ptr(result);
1798    
1799     int sample;
1800     int totalsamples=getNumSamples();
1801 caltinay 5972 const ValueType* res=0; // Storage for answer
1802 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1803 jfenwick 2777 #pragma omp parallel private(sample,res)
1804 jfenwick 2500 {
1805 caltinay 5972 size_t roffset=0;
1806 jfenwick 2777 #ifdef LAZY_STACK_PROF
1807 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1808     stackend[omp_get_thread_num()]=&roffset;
1809 jfenwick 2777 #endif
1810 caltinay 5972 #pragma omp for schedule(static)
1811     for (sample=0;sample<totalsamples;++sample)
1812     {
1813     roffset=0;
1814 jfenwick 2500 #ifdef _OPENMP
1815 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1816 jfenwick 2500 #else
1817 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1818 jfenwick 2500 #endif
1819     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1820     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1821 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1822     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1823     }
1824 jfenwick 2500 }
1825 jfenwick 2777 #ifdef LAZY_STACK_PROF
1826     for (int i=0;i<getNumberOfThreads();++i)
1827     {
1828 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1829     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1830     if (r>maxstackuse)
1831     {
1832     maxstackuse=r;
1833     }
1834 jfenwick 2777 }
1835     cout << "Max resolve Stack use=" << maxstackuse << endl;
1836     #endif
1837 jfenwick 2500 return resptr;
1838     }
1839    
1840 jfenwick 1865 std::string
1841     DataLazy::toString() const
1842     {
1843 jfenwick 1886 ostringstream oss;
1844 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1845 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1846 jfenwick 2737 {
1847 caltinay 5972 case 1: // tree format
1848     oss << endl;
1849     intoTreeString(oss,"");
1850     break;
1851     case 2: // just the depth
1852     break;
1853 jfenwick 2795 default:
1854 caltinay 5972 intoString(oss);
1855     break;
1856 jfenwick 2737 }
1857 jfenwick 1886 return oss.str();
1858 jfenwick 1865 }
1859    
1860 jfenwick 1899
1861 jfenwick 1886 void
1862     DataLazy::intoString(ostringstream& oss) const
1863     {
1864 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1865 jfenwick 1886 switch (getOpgroup(m_op))
1866     {
1867 jfenwick 1889 case G_IDENTITY:
1868 caltinay 5972 if (m_id->isExpanded())
1869     {
1870     oss << "E";
1871     }
1872     else if (m_id->isTagged())
1873     {
1874     oss << "T";
1875     }
1876     else if (m_id->isConstant())
1877     {
1878     oss << "C";
1879     }
1880     else
1881     {
1882     oss << "?";
1883     }
1884     oss << '@' << m_id.get();
1885     break;
1886 jfenwick 1886 case G_BINARY:
1887 caltinay 5972 oss << '(';
1888     m_left->intoString(oss);
1889     oss << ' ' << opToString(m_op) << ' ';
1890     m_right->intoString(oss);
1891     oss << ')';
1892     break;
1893 jfenwick 1886 case G_UNARY:
1894 jfenwick 2147 case G_UNARY_P:
1895 jfenwick 2037 case G_NP1OUT:
1896 jfenwick 2084 case G_NP1OUT_P:
1897 jfenwick 2721 case G_REDUCTION:
1898 caltinay 5972 oss << opToString(m_op) << '(';
1899     m_left->intoString(oss);
1900     oss << ')';
1901     break;
1902 jfenwick 2066 case G_TENSORPROD:
1903 caltinay 5972 oss << opToString(m_op) << '(';
1904     m_left->intoString(oss);
1905     oss << ", ";
1906     m_right->intoString(oss);
1907     oss << ')';
1908     break;
1909 jfenwick 2496 case G_NP1OUT_2P:
1910 caltinay 5972 oss << opToString(m_op) << '(';
1911     m_left->intoString(oss);
1912     oss << ", " << m_axis_offset << ", " << m_transpose;
1913     oss << ')';
1914     break;
1915 jfenwick 3035 case G_CONDEVAL:
1916 caltinay 5972 oss << opToString(m_op)<< '(' ;
1917     m_mask->intoString(oss);
1918     oss << " ? ";
1919     m_left->intoString(oss);
1920     oss << " : ";
1921     m_right->intoString(oss);
1922     oss << ')';
1923     break;
1924 jfenwick 1886 default:
1925 caltinay 5972 oss << "UNKNOWN";
1926 jfenwick 1886 }
1927     }
1928    
1929 jfenwick 2737
1930     void
1931     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1932     {
1933     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1934     switch (getOpgroup(m_op))
1935     {
1936     case G_IDENTITY:
1937 caltinay 5972 if (m_id->isExpanded())
1938     {
1939     oss << "E";
1940     }
1941     else if (m_id->isTagged())
1942     {
1943     oss << "T";
1944     }
1945     else if (m_id->isConstant())
1946     {
1947     oss << "C";
1948     }
1949     else
1950     {
1951     oss << "?";
1952     }
1953     oss << '@' << m_id.get() << endl;
1954     break;
1955 jfenwick 2737 case G_BINARY:
1956 caltinay 5972 oss << opToString(m_op) << endl;
1957     indent+='.';
1958     m_left->intoTreeString(oss, indent);
1959     m_right->intoTreeString(oss, indent);
1960     break;
1961 jfenwick 2737 case G_UNARY:
1962     case G_UNARY_P:
1963     case G_NP1OUT:
1964     case G_NP1OUT_P:
1965     case G_REDUCTION:
1966 caltinay 5972 oss << opToString(m_op) << endl;
1967     indent+='.';
1968     m_left->intoTreeString(oss, indent);
1969     break;
1970 jfenwick 2737 case G_TENSORPROD:
1971 caltinay 5972 oss << opToString(m_op) << endl;
1972     indent+='.';
1973     m_left->intoTreeString(oss, indent);
1974     m_right->intoTreeString(oss, indent);
1975     break;
1976 jfenwick 2737 case G_NP1OUT_2P:
1977 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1978     indent+='.';
1979     m_left->intoTreeString(oss, indent);
1980     break;
1981 jfenwick 2737 default:
1982 caltinay 5972 oss << "UNKNOWN";
1983 jfenwick 2737 }
1984     }
1985    
1986    
1987 jfenwick 1865 DataAbstract*
1988 jfenwick 5938 DataLazy::deepCopy() const
1989 jfenwick 1865 {
1990 jfenwick 1935 switch (getOpgroup(m_op))
1991 jfenwick 1865 {
1992 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1993 caltinay 5972 case G_UNARY:
1994 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1995 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1996     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1997 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1998     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2000     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001 jfenwick 1935 default:
2002 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2003 jfenwick 1865 }
2004     }
2005    
2006    
2007 jfenwick 2721
2008 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2009     // Instances of DataReady can look at the size of their vectors.
2010     // For lazy though, it could be the size the data would be if it were resolved;
2011     // or it could be some function of the lengths of the DataReady instances which
2012     // form part of the expression.
2013     // Rather than have people making assumptions, I have disabled the method.
2014 jfenwick 5938 DataTypes::RealVectorType::size_type
2015 jfenwick 1865 DataLazy::getLength() const
2016     {
2017 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2018 jfenwick 1865 }
2019    
2020    
2021     DataAbstract*
2022     DataLazy::getSlice(const DataTypes::RegionType& region) const
2023     {
2024 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2025 jfenwick 1865 }
2026    
2027 jfenwick 1935
2028     // To do this we need to rely on our child nodes
2029 jfenwick 5938 DataTypes::RealVectorType::size_type
2030 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2031 jfenwick 1935 int dataPointNo)
2032     {
2033     if (m_op==IDENTITY)
2034     {
2035 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2036 jfenwick 1935 }
2037     if (m_readytype!='E')
2038     {
2039 caltinay 5972 collapse();
2040     return m_id->getPointOffset(sampleNo,dataPointNo);
2041 jfenwick 1935 }
2042     // at this point we do not have an identity node and the expression will be Expanded
2043     // so we only need to know which child to ask
2044     if (m_left->m_readytype=='E')
2045     {
2046 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2047 jfenwick 1935 }
2048     else
2049     {
2050 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2051 jfenwick 1935 }
2052     }
2053    
2054     // To do this we need to rely on our child nodes
2055 jfenwick 5938 DataTypes::RealVectorType::size_type
2056 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2057 jfenwick 1865 int dataPointNo) const
2058     {
2059 jfenwick 1935 if (m_op==IDENTITY)
2060     {
2061 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2062 jfenwick 1935 }
2063     if (m_readytype=='E')
2064     {
2065     // at this point we do not have an identity node and the expression will be Expanded
2066     // so we only need to know which child to ask
2067     if (m_left->m_readytype=='E')
2068     {
2069 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2070 jfenwick 1935 }
2071     else
2072     {
2073 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2074 jfenwick 1935 }
2075     }
2076     if (m_readytype=='C')
2077     {
2078 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2079 jfenwick 1935 }
2080     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2081 jfenwick 1865 }
2082    
2083 jfenwick 2271
2084     // I have decided to let Data:: handle this issue.
2085 jfenwick 1901 void
2086     DataLazy::setToZero()
2087     {
2088 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2089 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2090     // m_op=IDENTITY;
2091     // m_right.reset();
2092     // m_left.reset();
2093     // m_readytype='C';
2094     // m_buffsRequired=1;
2095    
2096 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2097 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2098 jfenwick 1901 }
2099    
2100 jfenwick 2271 bool
2101     DataLazy::actsExpanded() const
2102     {
2103 caltinay 5972 return (m_readytype=='E');
2104 jfenwick 2271 }
2105    
2106 caltinay 5997 } // end namespace
2107 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26