/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6057 - (hide annotations)
Thu Mar 10 06:00:58 2016 UTC (3 years, 1 month ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 68662 byte(s)
Well it passes my tests


1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "UnaryFuncs.h" // for escript::fsign
23     #include "Utils.h"
24 jfenwick 6042 #include "DataMaths.h"
25 caltinay 5972
26 caltinay 3317 #ifdef USE_NETCDF
27     #include <netcdfcpp.h>
28     #endif
29    
30 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
31 jfenwick 2199
32 jfenwick 5938 using namespace escript::DataTypes;
33    
34 caltinay 5972 #define NO_ARG
35    
36 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
37 jfenwick 2092 #define LAZYDEBUG(X)
38 jfenwick 2157 namespace
39     {
40     bool privdebug=false;
41 jfenwick 2092
42 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
43     #define DISABLEDEBUG privdebug=false;
44     }
45    
46 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47 jfenwick 2177
48 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49 jfenwick 2472
50 jfenwick 2501
51 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53 jfenwick 1899 /*
54     How does DataLazy work?
55     ~~~~~~~~~~~~~~~~~~~~~~~
56    
57     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
58     denoted left and right.
59    
60     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
61     This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64 caltinay 4286 Note that IDENTITY is not considered a unary operation.
65 jfenwick 1899
66     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67     It must however form a DAG (directed acyclic graph).
68     I will refer to individual DataLazy objects with the structure as nodes.
69    
70     Each node also stores:
71     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72 caltinay 5972 evaluated.
73 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74 caltinay 5972 evaluate the expression.
75 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
76    
77     When a new node is created, the above values are computed based on the values in the child nodes.
78     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
79    
80     The resolve method, which produces a DataReady from a DataLazy, does the following:
81     1) Create a DataReady to hold the new result.
82     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
83     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
84    
85     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
86    
87     resolveSample returns a Vector* and an offset within that vector where the result is stored.
88     Normally, this would be v, but for identity nodes their internal vector is returned instead.
89    
90     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
91    
92     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94 jfenwick 2037
95 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96 jfenwick 2037 1) Add to the ES_optype.
97     2) determine what opgroup your operation belongs to (X)
98     3) add a string for the op to the end of ES_opstrings
99     4) increase ES_opcount
100     5) add an entry (X) to opgroups
101     6) add an entry to the switch in collapseToReady
102     7) add an entry to resolveX
103 jfenwick 1899 */
104    
105    
106 jfenwick 1865 using namespace std;
107 jfenwick 1868 using namespace boost;
108 jfenwick 1865
109     namespace escript
110     {
111    
112     namespace
113     {
114    
115 jfenwick 2777
116     // enabling this will print out when ever the maximum stacksize used by resolve increases
117     // it assumes _OPENMP is also in use
118     //#define LAZY_STACK_PROF
119    
120    
121    
122     #ifndef _OPENMP
123     #ifdef LAZY_STACK_PROF
124     #undef LAZY_STACK_PROF
125     #endif
126     #endif
127    
128    
129     #ifdef LAZY_STACK_PROF
130     std::vector<void*> stackstart(getNumberOfThreads());
131     std::vector<void*> stackend(getNumberOfThreads());
132     size_t maxstackuse=0;
133     #endif
134    
135 jfenwick 1886 enum ES_opgroup
136     {
137     G_UNKNOWN,
138     G_IDENTITY,
139 caltinay 5972 G_BINARY, // pointwise operations with two arguments
140     G_UNARY, // pointwise operations with one argument
141     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
142     G_NP1OUT, // non-pointwise op with one output
143     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
144     G_TENSORPROD, // general tensor product
145     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
146     G_REDUCTION, // non-pointwise unary op with a scalar output
147 jfenwick 3035 G_CONDEVAL
148 jfenwick 1886 };
149    
150    
151    
152    
153 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154 caltinay 5972 "sin","cos","tan",
155     "asin","acos","atan","sinh","cosh","tanh","erf",
156     "asinh","acosh","atanh",
157     "log10","log","sign","abs","neg","pos","exp","sqrt",
158     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159     "symmetric","nonsymmetric",
160     "prod",
161     "transpose", "trace",
162     "swapaxes",
163     "minval", "maxval",
164     "condEval"};
165 jfenwick 3035 int ES_opcount=44;
166 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
168     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
169     G_UNARY,G_UNARY,G_UNARY, // 20
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
171     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
172     G_NP1OUT,G_NP1OUT,
173     G_TENSORPROD,
174     G_NP1OUT_P, G_NP1OUT_P,
175     G_NP1OUT_2P,
176     G_REDUCTION, G_REDUCTION,
177     G_CONDEVAL};
178 jfenwick 1886 inline
179     ES_opgroup
180     getOpgroup(ES_optype op)
181     {
182     return opgroups[op];
183     }
184    
185 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
186     FunctionSpace
187     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188     {
189 caltinay 5972 // perhaps this should call interpolate and throw or something?
190     // maybe we need an interpolate node -
191     // that way, if interpolate is required in any other op we can just throw a
192     // programming error exception.
193 jfenwick 1879
194 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
195     FunctionSpace r=right->getFunctionSpace();
196     if (l!=r)
197     {
198 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199     if (res==1)
200 jfenwick 1943 {
201 caltinay 5972 return l;
202 jfenwick 1943 }
203 jfenwick 4264 if (res==-1)
204 jfenwick 1943 {
205 caltinay 5972 return r;
206 jfenwick 1943 }
207     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208     }
209     return l;
210 jfenwick 1865 }
211    
212     // return the shape of the result of "left op right"
213 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
214 jfenwick 1865 DataTypes::ShapeType
215     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216     {
217 caltinay 5972 if (left->getShape()!=right->getShape())
218     {
219     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220     {
221     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222     }
223 jfenwick 2721
224 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
225     {
226     return right->getShape();
227     }
228     if (right->getRank()==0)
229     {
230     return left->getShape();
231     }
232     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233     }
234     return left->getShape();
235 jfenwick 1865 }
236    
237 jfenwick 2084 // return the shape for "op left"
238    
239     DataTypes::ShapeType
240 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241 jfenwick 2084 {
242 caltinay 5972 switch(op)
243     {
244     case TRANS:
245     { // for the scoping of variables
246     const DataTypes::ShapeType& s=left->getShape();
247     DataTypes::ShapeType sh;
248     int rank=left->getRank();
249     if (axis_offset<0 || axis_offset>rank)
250     {
251 caltinay 2779 stringstream e;
252     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253     throw DataException(e.str());
254 caltinay 5972 }
255     for (int i=0; i<rank; i++)
256     {
257     int index = (axis_offset+i)%rank;
258     sh.push_back(s[index]); // Append to new shape
259     }
260     return sh;
261     }
262     break;
263     case TRACE:
264     {
265     int rank=left->getRank();
266     if (rank<2)
267     {
268     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269     }
270     if ((axis_offset>rank-2) || (axis_offset<0))
271     {
272     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273     }
274     if (rank==2)
275     {
276     return DataTypes::scalarShape;
277     }
278     else if (rank==3)
279     {
280     DataTypes::ShapeType sh;
281     if (axis_offset==0)
282     {
283     sh.push_back(left->getShape()[2]);
284     }
285     else // offset==1
286     {
287     sh.push_back(left->getShape()[0]);
288     }
289     return sh;
290     }
291     else if (rank==4)
292     {
293     DataTypes::ShapeType sh;
294     const DataTypes::ShapeType& s=left->getShape();
295     if (axis_offset==0)
296     {
297     sh.push_back(s[2]);
298     sh.push_back(s[3]);
299     }
300     else if (axis_offset==1)
301     {
302     sh.push_back(s[0]);
303     sh.push_back(s[3]);
304     }
305     else // offset==2
306     {
307     sh.push_back(s[0]);
308     sh.push_back(s[1]);
309     }
310     return sh;
311     }
312     else // unknown rank
313     {
314     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315     }
316     }
317     break;
318     default:
319     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320     }
321 jfenwick 2084 }
322    
323 jfenwick 2496 DataTypes::ShapeType
324     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
325     {
326     // This code taken from the Data.cpp swapaxes() method
327     // Some of the checks are probably redundant here
328     int axis0_tmp,axis1_tmp;
329     const DataTypes::ShapeType& s=left->getShape();
330     DataTypes::ShapeType out_shape;
331     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
332     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
333     int rank=left->getRank();
334     if (rank<2) {
335     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336     }
337     if (axis0<0 || axis0>rank-1) {
338 caltinay 2779 stringstream e;
339     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340     throw DataException(e.str());
341 jfenwick 2496 }
342     if (axis1<0 || axis1>rank-1) {
343 caltinay 2782 stringstream e;
344 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345     throw DataException(e.str());
346 jfenwick 2496 }
347     if (axis0 == axis1) {
348     throw DataException("Error - Data::swapaxes: axis indices must be different.");
349     }
350     if (axis0 > axis1) {
351     axis0_tmp=axis1;
352     axis1_tmp=axis0;
353     } else {
354     axis0_tmp=axis0;
355     axis1_tmp=axis1;
356     }
357     for (int i=0; i<rank; i++) {
358     if (i == axis0_tmp) {
359     out_shape.push_back(s[axis1_tmp]);
360     } else if (i == axis1_tmp) {
361     out_shape.push_back(s[axis0_tmp]);
362     } else {
363     out_shape.push_back(s[i]);
364     }
365     }
366     return out_shape;
367     }
368    
369    
370 jfenwick 2066 // determine the output shape for the general tensor product operation
371     // the additional parameters return information required later for the product
372     // the majority of this code is copy pasted from C_General_Tensor_Product
373     DataTypes::ShapeType
374     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375 jfenwick 1865 {
376 caltinay 5972
377 jfenwick 2066 // Get rank and shape of inputs
378     int rank0 = left->getRank();
379     int rank1 = right->getRank();
380     const DataTypes::ShapeType& shape0 = left->getShape();
381     const DataTypes::ShapeType& shape1 = right->getShape();
382    
383     // Prepare for the loops of the product and verify compatibility of shapes
384     int start0=0, start1=0;
385 caltinay 5972 if (transpose == 0) {}
386     else if (transpose == 1) { start0 = axis_offset; }
387     else if (transpose == 2) { start1 = rank1-axis_offset; }
388     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389 jfenwick 2066
390 jfenwick 2085 if (rank0<axis_offset)
391     {
392 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393 jfenwick 2085 }
394 jfenwick 2066
395     // Adjust the shapes for transpose
396 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
397     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
398     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400 jfenwick 2066
401     // Prepare for the loops of the product
402     SL=1, SM=1, SR=1;
403 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
404 jfenwick 2066 SL *= tmpShape0[i];
405     }
406 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
407 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409     }
410     SM *= tmpShape0[i];
411     }
412 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
413 jfenwick 2066 SR *= tmpShape1[i];
414     }
415    
416     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
418     { // block to limit the scope of out_index
419 jfenwick 2066 int out_index=0;
420     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
422     }
423 jfenwick 2086
424     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
425     {
426     ostringstream os;
427     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
428     throw DataException(os.str());
429     }
430    
431 jfenwick 2066 return shape2;
432 jfenwick 1865 }
433    
434 caltinay 5972 } // end anonymous namespace
435 jfenwick 1865
436    
437 jfenwick 1899
438     // Return a string representing the operation
439 jfenwick 1865 const std::string&
440     opToString(ES_optype op)
441     {
442     if (op<0 || op>=ES_opcount)
443     {
444     op=UNKNOWNOP;
445     }
446     return ES_opstrings[op];
447     }
448    
449 jfenwick 2500 void DataLazy::LazyNodeSetup()
450     {
451     #ifdef _OPENMP
452     int numthreads=omp_get_max_threads();
453     m_samples.resize(numthreads*m_samplesize);
454     m_sampleids=new int[numthreads];
455     for (int i=0;i<numthreads;++i)
456     {
457     m_sampleids[i]=-1;
458     }
459     #else
460     m_samples.resize(m_samplesize);
461     m_sampleids=new int[1];
462     m_sampleids[0]=-1;
463     #endif // _OPENMP
464     }
465 jfenwick 1865
466 jfenwick 2500
467     // Creates an identity node
468 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
469 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
470     ,m_sampleids(0),
471     m_samples(1)
472 jfenwick 1865 {
473 jfenwick 1879 if (p->isLazy())
474     {
475 caltinay 5972 // I don't want identity of Lazy.
476     // Question: Why would that be so bad?
477     // Answer: We assume that the child of ID is something we can call getVector on
478     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
479 jfenwick 1879 }
480     else
481     {
482 caltinay 5972 p->makeLazyShared();
483     DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484     makeIdentity(dr);
485 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486 jfenwick 1879 }
487 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
488 jfenwick 1865 }
489    
490 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492     m_op(op),
493     m_axis_offset(0),
494     m_transpose(0),
495     m_SL(0), m_SM(0), m_SR(0)
496 jfenwick 1886 {
497 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498 jfenwick 1886 {
499 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500 jfenwick 1886 }
501 jfenwick 2066
502 jfenwick 1886 DataLazy_ptr lleft;
503     if (!left->isLazy())
504     {
505 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
506 jfenwick 1886 }
507     else
508     {
509 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
510 jfenwick 1886 }
511 jfenwick 1889 m_readytype=lleft->m_readytype;
512 jfenwick 1886 m_left=lleft;
513     m_samplesize=getNumDPPSample()*getNoValues();
514 jfenwick 2177 m_children=m_left->m_children+1;
515     m_height=m_left->m_height+1;
516 jfenwick 2500 LazyNodeSetup();
517 jfenwick 2177 SIZELIMIT
518 jfenwick 1886 }
519    
520    
521 jfenwick 1943 // In this constructor we need to consider interpolation
522 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
524     m_op(op),
525     m_SL(0), m_SM(0), m_SR(0)
526 jfenwick 1879 {
527 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
529 jfenwick 1886 {
530 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531 jfenwick 1886 }
532 jfenwick 1943
533 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
534 jfenwick 1943 {
535 caltinay 5972 FunctionSpace fs=getFunctionSpace();
536     Data ltemp(left);
537     Data tmp(ltemp,fs);
538     left=tmp.borrowDataPtr();
539 jfenwick 1943 }
540 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
541 jfenwick 1943 {
542 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
543     right=tmp.borrowDataPtr();
544 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545 jfenwick 1943 }
546     left->operandCheck(*right);
547    
548 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
549 jfenwick 1879 {
550 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
551 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552 jfenwick 1879 }
553     else
554     {
555 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
556 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557 jfenwick 1879 }
558     if (right->isLazy())
559     {
560 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
561 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562 jfenwick 1879 }
563     else
564     {
565 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
566 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567 jfenwick 1879 }
568 jfenwick 1889 char lt=m_left->m_readytype;
569     char rt=m_right->m_readytype;
570     if (lt=='E' || rt=='E')
571     {
572 caltinay 5972 m_readytype='E';
573 jfenwick 1889 }
574     else if (lt=='T' || rt=='T')
575     {
576 caltinay 5972 m_readytype='T';
577 jfenwick 1889 }
578     else
579     {
580 caltinay 5972 m_readytype='C';
581 jfenwick 1889 }
582 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
583 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
584     m_height=max(m_left->m_height,m_right->m_height)+1;
585 jfenwick 2500 LazyNodeSetup();
586 jfenwick 2177 SIZELIMIT
587 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588 jfenwick 1879 }
589    
590 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592     m_op(op),
593     m_axis_offset(axis_offset),
594     m_transpose(transpose)
595 jfenwick 2066 {
596     if ((getOpgroup(op)!=G_TENSORPROD))
597     {
598 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599 jfenwick 2066 }
600     if ((transpose>2) || (transpose<0))
601     {
602 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603 jfenwick 2066 }
604 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
605 jfenwick 2066 {
606 caltinay 5972 FunctionSpace fs=getFunctionSpace();
607     Data ltemp(left);
608     Data tmp(ltemp,fs);
609     left=tmp.borrowDataPtr();
610 jfenwick 2066 }
611 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
612 jfenwick 2066 {
613 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
614     right=tmp.borrowDataPtr();
615 jfenwick 2066 }
616 jfenwick 2195 // left->operandCheck(*right);
617 jfenwick 1879
618 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
619 jfenwick 2066 {
620 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
621 jfenwick 2066 }
622     else
623     {
624 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
625 jfenwick 2066 }
626     if (right->isLazy())
627     {
628 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
629 jfenwick 2066 }
630     else
631     {
632 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
633 jfenwick 2066 }
634     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')
637     {
638 caltinay 5972 m_readytype='E';
639 jfenwick 2066 }
640     else if (lt=='T' || rt=='T')
641     {
642 caltinay 5972 m_readytype='T';
643 jfenwick 2066 }
644     else
645     {
646 caltinay 5972 m_readytype='C';
647 jfenwick 2066 }
648     m_samplesize=getNumDPPSample()*getNoValues();
649 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
650     m_height=max(m_left->m_height,m_right->m_height)+1;
651 jfenwick 2500 LazyNodeSetup();
652 jfenwick 2177 SIZELIMIT
653 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654 jfenwick 2066 }
655    
656    
657 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659     m_op(op),
660     m_axis_offset(axis_offset),
661     m_transpose(0),
662     m_tol(0)
663 jfenwick 2084 {
664     if ((getOpgroup(op)!=G_NP1OUT_P))
665     {
666 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667 jfenwick 2084 }
668     DataLazy_ptr lleft;
669     if (!left->isLazy())
670     {
671 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
672 jfenwick 2084 }
673     else
674     {
675 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
676 jfenwick 2084 }
677     m_readytype=lleft->m_readytype;
678     m_left=lleft;
679     m_samplesize=getNumDPPSample()*getNoValues();
680 jfenwick 2177 m_children=m_left->m_children+1;
681     m_height=m_left->m_height+1;
682 jfenwick 2500 LazyNodeSetup();
683 jfenwick 2177 SIZELIMIT
684 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685 jfenwick 2084 }
686    
687 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
689     m_op(op),
690     m_axis_offset(0),
691     m_transpose(0),
692     m_tol(tol)
693 jfenwick 2147 {
694     if ((getOpgroup(op)!=G_UNARY_P))
695     {
696 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697 jfenwick 2147 }
698     DataLazy_ptr lleft;
699     if (!left->isLazy())
700     {
701 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
702 jfenwick 2147 }
703     else
704     {
705 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
706 jfenwick 2147 }
707     m_readytype=lleft->m_readytype;
708     m_left=lleft;
709     m_samplesize=getNumDPPSample()*getNoValues();
710 jfenwick 2177 m_children=m_left->m_children+1;
711     m_height=m_left->m_height+1;
712 jfenwick 2500 LazyNodeSetup();
713 jfenwick 2177 SIZELIMIT
714 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715     }
716 jfenwick 2084
717 jfenwick 2496
718     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720     m_op(op),
721     m_axis_offset(axis0),
722     m_transpose(axis1),
723     m_tol(0)
724 jfenwick 2496 {
725     if ((getOpgroup(op)!=G_NP1OUT_2P))
726     {
727 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728 jfenwick 2496 }
729     DataLazy_ptr lleft;
730     if (!left->isLazy())
731     {
732 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
733 jfenwick 2496 }
734     else
735     {
736 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
737 jfenwick 2496 }
738     m_readytype=lleft->m_readytype;
739     m_left=lleft;
740     m_samplesize=getNumDPPSample()*getNoValues();
741     m_children=m_left->m_children+1;
742     m_height=m_left->m_height+1;
743 jfenwick 2500 LazyNodeSetup();
744 jfenwick 2496 SIZELIMIT
745     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746     }
747    
748 jfenwick 3035
749     namespace
750     {
751    
752     inline int max3(int a, int b, int c)
753     {
754 caltinay 5972 int t=(a>b?a:b);
755     return (t>c?t:c);
756 jfenwick 3035
757     }
758     }
759    
760     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
762     m_op(CONDEVAL),
763     m_axis_offset(0),
764     m_transpose(0),
765     m_tol(0)
766 jfenwick 3035 {
767    
768     DataLazy_ptr lmask;
769     DataLazy_ptr lleft;
770     DataLazy_ptr lright;
771     if (!mask->isLazy())
772     {
773 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
774 jfenwick 3035 }
775     else
776     {
777 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
778 jfenwick 3035 }
779     if (!left->isLazy())
780     {
781 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
782 jfenwick 3035 }
783     else
784     {
785 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
786 jfenwick 3035 }
787     if (!right->isLazy())
788     {
789 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
790 jfenwick 3035 }
791     else
792     {
793 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
794 jfenwick 3035 }
795     m_readytype=lmask->m_readytype;
796     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797     {
798 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
799 jfenwick 3035 }
800     m_left=lleft;
801     m_right=lright;
802     m_mask=lmask;
803     m_samplesize=getNumDPPSample()*getNoValues();
804     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806     LazyNodeSetup();
807     SIZELIMIT
808     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809     }
810    
811    
812    
813 jfenwick 1865 DataLazy::~DataLazy()
814     {
815 jfenwick 2500 delete[] m_sampleids;
816 jfenwick 1865 }
817    
818 jfenwick 1879
819 jfenwick 1899 /*
820     \brief Evaluates the expression using methods on Data.
821     This does the work for the collapse method.
822     For reasons of efficiency do not call this method on DataExpanded nodes.
823     */
824 jfenwick 1889 DataReady_ptr
825 jfenwick 4621 DataLazy::collapseToReady() const
826 jfenwick 1889 {
827     if (m_readytype=='E')
828 caltinay 5972 { // this is more an efficiency concern than anything else
829 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
830     }
831     if (m_op==IDENTITY)
832     {
833     return m_id;
834     }
835     DataReady_ptr pleft=m_left->collapseToReady();
836     Data left(pleft);
837     Data right;
838 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
839 jfenwick 1889 {
840     right=Data(m_right->collapseToReady());
841     }
842     Data result;
843     switch(m_op)
844     {
845     case ADD:
846 caltinay 5972 result=left+right;
847     break;
848     case SUB:
849     result=left-right;
850     break;
851     case MUL:
852     result=left*right;
853     break;
854     case DIV:
855     result=left/right;
856     break;
857 jfenwick 1889 case SIN:
858 caltinay 5972 result=left.sin();
859     break;
860 jfenwick 1889 case COS:
861 caltinay 5972 result=left.cos();
862     break;
863 jfenwick 1889 case TAN:
864 caltinay 5972 result=left.tan();
865     break;
866 jfenwick 1889 case ASIN:
867 caltinay 5972 result=left.asin();
868     break;
869 jfenwick 1889 case ACOS:
870 caltinay 5972 result=left.acos();
871     break;
872 jfenwick 1889 case ATAN:
873 caltinay 5972 result=left.atan();
874     break;
875 jfenwick 1889 case SINH:
876 caltinay 5972 result=left.sinh();
877     break;
878 jfenwick 1889 case COSH:
879 caltinay 5972 result=left.cosh();
880     break;
881 jfenwick 1889 case TANH:
882 caltinay 5972 result=left.tanh();
883     break;
884 jfenwick 1889 case ERF:
885 caltinay 5972 result=left.erf();
886     break;
887 jfenwick 1889 case ASINH:
888 caltinay 5972 result=left.asinh();
889     break;
890 jfenwick 1889 case ACOSH:
891 caltinay 5972 result=left.acosh();
892     break;
893 jfenwick 1889 case ATANH:
894 caltinay 5972 result=left.atanh();
895     break;
896 jfenwick 1889 case LOG10:
897 caltinay 5972 result=left.log10();
898     break;
899 jfenwick 1889 case LOG:
900 caltinay 5972 result=left.log();
901     break;
902 jfenwick 1889 case SIGN:
903 caltinay 5972 result=left.sign();
904     break;
905 jfenwick 1889 case ABS:
906 caltinay 5972 result=left.abs();
907     break;
908 jfenwick 1889 case NEG:
909 caltinay 5972 result=left.neg();
910     break;
911 jfenwick 1889 case POS:
912 caltinay 5972 // it doesn't mean anything for delayed.
913     // it will just trigger a deep copy of the lazy object
914     throw DataException("Programmer error - POS not supported for lazy data.");
915     break;
916 jfenwick 1889 case EXP:
917 caltinay 5972 result=left.exp();
918     break;
919 jfenwick 1889 case SQRT:
920 caltinay 5972 result=left.sqrt();
921     break;
922 jfenwick 1889 case RECIP:
923 caltinay 5972 result=left.oneOver();
924     break;
925 jfenwick 1889 case GZ:
926 caltinay 5972 result=left.wherePositive();
927     break;
928 jfenwick 1889 case LZ:
929 caltinay 5972 result=left.whereNegative();
930     break;
931 jfenwick 1889 case GEZ:
932 caltinay 5972 result=left.whereNonNegative();
933     break;
934 jfenwick 1889 case LEZ:
935 caltinay 5972 result=left.whereNonPositive();
936     break;
937 jfenwick 2147 case NEZ:
938 caltinay 5972 result=left.whereNonZero(m_tol);
939     break;
940 jfenwick 2147 case EZ:
941 caltinay 5972 result=left.whereZero(m_tol);
942     break;
943 jfenwick 2037 case SYM:
944 caltinay 5972 result=left.symmetric();
945     break;
946 jfenwick 2037 case NSYM:
947 caltinay 5972 result=left.nonsymmetric();
948     break;
949 jfenwick 2066 case PROD:
950 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951     break;
952 jfenwick 2084 case TRANS:
953 caltinay 5972 result=left.transpose(m_axis_offset);
954     break;
955 jfenwick 2084 case TRACE:
956 caltinay 5972 result=left.trace(m_axis_offset);
957     break;
958 jfenwick 2496 case SWAP:
959 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
960     break;
961 jfenwick 2721 case MINVAL:
962 caltinay 5972 result=left.minval();
963     break;
964 jfenwick 2721 case MAXVAL:
965 caltinay 5972 result=left.minval();
966     break;
967 jfenwick 1889 default:
968 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969 jfenwick 1889 }
970     return result.borrowReadyPtr();
971     }
972    
973 jfenwick 1899 /*
974     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
975     This method uses the original methods on the Data class to evaluate the expressions.
976     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
977     the purpose of using DataLazy in the first place).
978     */
979 jfenwick 1889 void
980 jfenwick 4621 DataLazy::collapse() const
981 jfenwick 1889 {
982     if (m_op==IDENTITY)
983     {
984 caltinay 5972 return;
985 jfenwick 1889 }
986     if (m_readytype=='E')
987 caltinay 5972 { // this is more an efficiency concern than anything else
988 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
989     }
990     m_id=collapseToReady();
991     m_op=IDENTITY;
992     }
993    
994 jfenwick 2500 // The result will be stored in m_samples
995     // The return value is a pointer to the DataVector, offset is the offset within the return value
996 jfenwick 5938 const DataTypes::RealVectorType*
997 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
998 jfenwick 2500 {
999     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1000 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1001 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1002     {
1003 caltinay 5972 collapse();
1004 jfenwick 2500 }
1005 caltinay 5972 if (m_op==IDENTITY)
1006 jfenwick 2500 {
1007 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1008 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1009 jfenwick 2777 #ifdef LAZY_STACK_PROF
1010     int x;
1011     if (&x<stackend[omp_get_thread_num()])
1012     {
1013     stackend[omp_get_thread_num()]=&x;
1014     }
1015     #endif
1016 jfenwick 2500 return &(vec);
1017     }
1018     if (m_readytype!='E')
1019     {
1020     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1021     }
1022 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1023     {
1024 caltinay 5972 roffset=tid*m_samplesize;
1025     return &(m_samples); // sample is already resolved
1026 jfenwick 2501 }
1027     m_sampleids[tid]=sampleNo;
1028 jfenwick 3035
1029 jfenwick 2500 switch (getOpgroup(m_op))
1030     {
1031     case G_UNARY:
1032     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1033     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1034     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1035     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1036     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1037     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1038 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1039 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1040 jfenwick 2500 default:
1041     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1042     }
1043     }
1044    
1045 jfenwick 5938 const DataTypes::RealVectorType*
1046 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1047 jfenwick 2500 {
1048 caltinay 5972 // we assume that any collapsing has been done before we get here
1049     // since we only have one argument we don't need to think about only
1050     // processing single points.
1051     // we will also know we won't get identity nodes
1052 jfenwick 2500 if (m_readytype!='E')
1053     {
1054     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1055     }
1056     if (m_op==IDENTITY)
1057     {
1058     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1059     }
1060 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1061 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1062     roffset=m_samplesize*tid;
1063     double* result=&(m_samples[roffset]);
1064     switch (m_op)
1065     {
1066 caltinay 5972 case SIN:
1067     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1068     break;
1069 jfenwick 2500 case COS:
1070 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1071     break;
1072 jfenwick 2500 case TAN:
1073 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1074     break;
1075 jfenwick 2500 case ASIN:
1076 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1077     break;
1078 jfenwick 2500 case ACOS:
1079 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1080     break;
1081 jfenwick 2500 case ATAN:
1082 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1083     break;
1084 jfenwick 2500 case SINH:
1085 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1086     break;
1087 jfenwick 2500 case COSH:
1088 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1089     break;
1090 jfenwick 2500 case TANH:
1091 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1092     break;
1093 jfenwick 2500 case ERF:
1094     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1095 caltinay 5972 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1096 jfenwick 2500 #else
1097 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::erf);
1098     break;
1099 jfenwick 2500 #endif
1100     case ASINH:
1101     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1102 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1103 jfenwick 2500 #else
1104 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1105 jfenwick 2500 #endif
1106 caltinay 5972 break;
1107 jfenwick 2500 case ACOSH:
1108     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1109 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1110 jfenwick 2500 #else
1111 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1112 jfenwick 2500 #endif
1113 caltinay 5972 break;
1114 jfenwick 2500 case ATANH:
1115     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1116 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1117 jfenwick 2500 #else
1118 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1119 jfenwick 2500 #endif
1120 caltinay 5972 break;
1121 jfenwick 2500 case LOG10:
1122 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1123     break;
1124 jfenwick 2500 case LOG:
1125 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1126     break;
1127 jfenwick 2500 case SIGN:
1128 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1129     break;
1130 jfenwick 2500 case ABS:
1131 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1132     break;
1133 jfenwick 2500 case NEG:
1134 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1135     break;
1136 jfenwick 2500 case POS:
1137 caltinay 5972 // it doesn't mean anything for delayed.
1138     // it will just trigger a deep copy of the lazy object
1139     throw DataException("Programmer error - POS not supported for lazy data.");
1140     break;
1141 jfenwick 2500 case EXP:
1142 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1143     break;
1144 jfenwick 2500 case SQRT:
1145 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1146     break;
1147 jfenwick 2500 case RECIP:
1148 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1149     break;
1150 jfenwick 2500 case GZ:
1151 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1152     break;
1153 jfenwick 2500 case LZ:
1154 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1155     break;
1156 jfenwick 2500 case GEZ:
1157 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1158     break;
1159 jfenwick 2500 case LEZ:
1160 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1161     break;
1162 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1163     case NEZ:
1164 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1165     break;
1166 jfenwick 2500 case EZ:
1167 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1168     break;
1169 jfenwick 2500
1170     default:
1171 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1172 jfenwick 2500 }
1173     return &(m_samples);
1174     }
1175    
1176    
1177 jfenwick 5938 const DataTypes::RealVectorType*
1178 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1179 jfenwick 2721 {
1180 caltinay 5972 // we assume that any collapsing has been done before we get here
1181     // since we only have one argument we don't need to think about only
1182     // processing single points.
1183     // we will also know we won't get identity nodes
1184 jfenwick 2721 if (m_readytype!='E')
1185     {
1186     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1187     }
1188     if (m_op==IDENTITY)
1189     {
1190     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1191     }
1192     size_t loffset=0;
1193 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1194 jfenwick 2721
1195     roffset=m_samplesize*tid;
1196 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1197 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1198 jfenwick 2721 double* result=&(m_samples[roffset]);
1199     switch (m_op)
1200     {
1201     case MINVAL:
1202 caltinay 5972 {
1203     for (unsigned int z=0;z<ndpps;++z)
1204     {
1205     FMin op;
1206     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1207     loffset+=psize;
1208     result++;
1209     }
1210     }
1211     break;
1212 jfenwick 2721 case MAXVAL:
1213 caltinay 5972 {
1214     for (unsigned int z=0;z<ndpps;++z)
1215     {
1216     FMax op;
1217     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1218     loffset+=psize;
1219     result++;
1220     }
1221     }
1222     break;
1223 jfenwick 2721 default:
1224 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1225 jfenwick 2721 }
1226     return &(m_samples);
1227     }
1228    
1229 jfenwick 5938 const DataTypes::RealVectorType*
1230 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1231 jfenwick 2500 {
1232 caltinay 5972 // we assume that any collapsing has been done before we get here
1233     // since we only have one argument we don't need to think about only
1234     // processing single points.
1235 jfenwick 2500 if (m_readytype!='E')
1236     {
1237     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1238     }
1239     if (m_op==IDENTITY)
1240     {
1241     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1242     }
1243     size_t subroffset;
1244 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1245 jfenwick 2500 roffset=m_samplesize*tid;
1246     size_t loop=0;
1247     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1248     size_t step=getNoValues();
1249     size_t offset=roffset;
1250     switch (m_op)
1251     {
1252     case SYM:
1253 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1254     {
1255     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1256     subroffset+=step;
1257     offset+=step;
1258     }
1259     break;
1260 jfenwick 2500 case NSYM:
1261 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1262     {
1263     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1264     subroffset+=step;
1265     offset+=step;
1266     }
1267     break;
1268 jfenwick 2500 default:
1269 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1270 jfenwick 2500 }
1271     return &m_samples;
1272     }
1273    
1274 jfenwick 5938 const DataTypes::RealVectorType*
1275 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1276 jfenwick 2500 {
1277 caltinay 5972 // we assume that any collapsing has been done before we get here
1278     // since we only have one argument we don't need to think about only
1279     // processing single points.
1280 jfenwick 2500 if (m_readytype!='E')
1281     {
1282     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1283     }
1284     if (m_op==IDENTITY)
1285     {
1286     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1287     }
1288     size_t subroffset;
1289     size_t offset;
1290 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1291 jfenwick 2500 roffset=m_samplesize*tid;
1292     offset=roffset;
1293     size_t loop=0;
1294     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1295     size_t outstep=getNoValues();
1296     size_t instep=m_left->getNoValues();
1297     switch (m_op)
1298     {
1299     case TRACE:
1300 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1301     {
1302 jfenwick 2500 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1303 caltinay 5972 subroffset+=instep;
1304     offset+=outstep;
1305     }
1306     break;
1307 jfenwick 2500 case TRANS:
1308 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1309     {
1310 jfenwick 2500 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1311 caltinay 5972 subroffset+=instep;
1312     offset+=outstep;
1313     }
1314     break;
1315 jfenwick 2500 default:
1316 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1317 jfenwick 2500 }
1318     return &m_samples;
1319     }
1320    
1321    
1322 jfenwick 5938 const DataTypes::RealVectorType*
1323 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1324 jfenwick 2500 {
1325     if (m_readytype!='E')
1326     {
1327     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1328     }
1329     if (m_op==IDENTITY)
1330     {
1331     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1332     }
1333     size_t subroffset;
1334     size_t offset;
1335 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1336 jfenwick 2500 roffset=m_samplesize*tid;
1337     offset=roffset;
1338     size_t loop=0;
1339     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1340     size_t outstep=getNoValues();
1341     size_t instep=m_left->getNoValues();
1342     switch (m_op)
1343     {
1344     case SWAP:
1345 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1346     {
1347 jfenwick 2500 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1348 caltinay 5972 subroffset+=instep;
1349     offset+=outstep;
1350     }
1351     break;
1352 jfenwick 2500 default:
1353 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1354 jfenwick 2500 }
1355     return &m_samples;
1356     }
1357    
1358 jfenwick 5938 const DataTypes::RealVectorType*
1359 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1360 jfenwick 3035 {
1361     if (m_readytype!='E')
1362     {
1363     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1364     }
1365     if (m_op!=CONDEVAL)
1366     {
1367     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1368     }
1369     size_t subroffset;
1370 jfenwick 2500
1371 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1372     const RealVectorType* srcres=0;
1373 jfenwick 3035 if ((*maskres)[subroffset]>0)
1374     {
1375 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1376 jfenwick 3035 }
1377     else
1378     {
1379 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1380 jfenwick 3035 }
1381    
1382     // Now we need to copy the result
1383    
1384     roffset=m_samplesize*tid;
1385     for (int i=0;i<m_samplesize;++i)
1386     {
1387 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1388 jfenwick 3035 }
1389    
1390     return &m_samples;
1391     }
1392    
1393 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1394     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1395     // If both children are expanded, then we can process them in a single operation (we treat
1396     // the whole sample as one big datapoint.
1397     // If one of the children is not expanded, then we need to treat each point in the sample
1398     // individually.
1399     // There is an additional complication when scalar operations are considered.
1400     // For example, 2+Vector.
1401     // In this case each double within the point is treated individually
1402 jfenwick 5938 const DataTypes::RealVectorType*
1403 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1404 jfenwick 2500 {
1405     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1406    
1407 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1408     // first work out which of the children are expanded
1409 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1410     bool rightExp=(m_right->m_readytype=='E');
1411     if (!leftExp && !rightExp)
1412     {
1413 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1414 jfenwick 2500 }
1415     bool leftScalar=(m_left->getRank()==0);
1416     bool rightScalar=(m_right->getRank()==0);
1417     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1418     {
1419 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1420 jfenwick 2500 }
1421     size_t leftsize=m_left->getNoValues();
1422     size_t rightsize=m_right->getNoValues();
1423 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1424     int leftstep=0; // how far should the left offset advance after each step
1425 jfenwick 2500 int rightstep=0;
1426 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1427     int oleftstep=0; // the o variables refer to the outer loop
1428     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1429 jfenwick 2500 int onumsteps=1;
1430    
1431 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1432 jfenwick 2500 bool RES=(rightExp && rightScalar);
1433 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1434 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1435 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1436 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1437 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1438 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1439    
1440 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1441 jfenwick 2500 {
1442 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1443     leftstep=0;
1444     rightstep=0;
1445     numsteps=1;
1446 jfenwick 2500 }
1447     else if (LES || RES)
1448     {
1449 caltinay 5972 chunksize=1;
1450     if (LES) // left is an expanded scalar
1451     {
1452     if (RS)
1453     {
1454     leftstep=1;
1455     rightstep=0;
1456     numsteps=m_left->getNumDPPSample();
1457     }
1458     else // RN or REN
1459     {
1460     leftstep=0;
1461     oleftstep=1;
1462     rightstep=1;
1463     orightstep=(RN ? -(int)rightsize : 0);
1464     numsteps=rightsize;
1465     onumsteps=m_left->getNumDPPSample();
1466     }
1467     }
1468     else // right is an expanded scalar
1469     {
1470     if (LS)
1471     {
1472     rightstep=1;
1473     leftstep=0;
1474     numsteps=m_right->getNumDPPSample();
1475     }
1476     else
1477     {
1478     rightstep=0;
1479     orightstep=1;
1480     leftstep=1;
1481     oleftstep=(LN ? -(int)leftsize : 0);
1482     numsteps=leftsize;
1483     onumsteps=m_right->getNumDPPSample();
1484     }
1485     }
1486 jfenwick 2500 }
1487 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1488 jfenwick 2500 {
1489 caltinay 5972 if (LEN) // and Right will be a single value
1490     {
1491     chunksize=rightsize;
1492     leftstep=rightsize;
1493     rightstep=0;
1494     numsteps=m_left->getNumDPPSample();
1495     if (RS)
1496     {
1497     numsteps*=leftsize;
1498     }
1499     }
1500     else // REN
1501     {
1502     chunksize=leftsize;
1503     rightstep=leftsize;
1504     leftstep=0;
1505     numsteps=m_right->getNumDPPSample();
1506     if (LS)
1507     {
1508     numsteps*=rightsize;
1509     }
1510     }
1511 jfenwick 2500 }
1512    
1513 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1514     // Get the values of sub-expressions
1515 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1516     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1517 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1518     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1519     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1520     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1521     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1522     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1523     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1524    
1525     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1526     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1527    
1528    
1529     roffset=m_samplesize*tid;
1530 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1531 jfenwick 2500 switch(m_op)
1532     {
1533     case ADD:
1534 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1535     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1536     &(*left)[0],
1537     &(*right)[0],
1538     chunksize,
1539     onumsteps,
1540     numsteps,
1541     resultStep,
1542     leftstep,
1543     rightstep,
1544     oleftstep,
1545     orightstep,
1546 jfenwick 6056 lroffset,
1547     rroffset,
1548 jfenwick 6042 escript::ESFunction::PLUSF);
1549 caltinay 5972 break;
1550 jfenwick 2500 case SUB:
1551 jfenwick 6057 DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1552     &(*left)[0],
1553     &(*right)[0],
1554     chunksize,
1555     onumsteps,
1556     numsteps,
1557     resultStep,
1558     leftstep,
1559     rightstep,
1560     oleftstep,
1561     orightstep,
1562     lroffset,
1563     rroffset,
1564     escript::ESFunction::MINUSF);
1565     //PROC_OP(NO_ARG,minus<double>());
1566 caltinay 5972 break;
1567 jfenwick 2500 case MUL:
1568 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1569     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1570     &(*left)[0],
1571     &(*right)[0],
1572     chunksize,
1573     onumsteps,
1574     numsteps,
1575     resultStep,
1576     leftstep,
1577     rightstep,
1578     oleftstep,
1579     orightstep,
1580     lroffset,
1581     rroffset,
1582     escript::ESFunction::MULTIPLIESF);
1583 caltinay 5972 break;
1584 jfenwick 2500 case DIV:
1585 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1586     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1587     &(*left)[0],
1588     &(*right)[0],
1589     chunksize,
1590     onumsteps,
1591     numsteps,
1592     resultStep,
1593     leftstep,
1594     rightstep,
1595     oleftstep,
1596     orightstep,
1597     lroffset,
1598     rroffset,
1599     escript::ESFunction::DIVIDESF);
1600 caltinay 5972 break;
1601 jfenwick 2500 case POW:
1602 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1603     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1604     &(*left)[0],
1605     &(*right)[0],
1606     chunksize,
1607     onumsteps,
1608     numsteps,
1609     resultStep,
1610     leftstep,
1611     rightstep,
1612     oleftstep,
1613     orightstep,
1614     lroffset,
1615     rroffset,
1616     escript::ESFunction::POWF);
1617 caltinay 5972 break;
1618 jfenwick 2500 default:
1619 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1620 jfenwick 2500 }
1621     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1622     return &m_samples;
1623     }
1624    
1625    
1626     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1627     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1628     // unlike the other resolve helpers, we must treat these datapoints separately.
1629 jfenwick 5938 const DataTypes::RealVectorType*
1630 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1631 jfenwick 2500 {
1632     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1633    
1634 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1635     // first work out which of the children are expanded
1636 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1637     bool rightExp=(m_right->m_readytype=='E');
1638     int steps=getNumDPPSample();
1639 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1640 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1641    
1642     int resultStep=getNoValues();
1643     roffset=m_samplesize*tid;
1644     size_t offset=roffset;
1645    
1646 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1647 jfenwick 2500
1648 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1649 jfenwick 2500
1650     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1651     cout << getNoValues() << endl;)
1652    
1653    
1654     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1655     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1656     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1657     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1658     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1659     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1660     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1661    
1662 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1663 jfenwick 2500 switch(m_op)
1664     {
1665     case PROD:
1666 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1667     {
1668     const double *ptr_0 = &((*left)[lroffset]);
1669     const double *ptr_1 = &((*right)[rroffset]);
1670 jfenwick 2500
1671     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1672     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1673    
1674 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1675 jfenwick 2500
1676 caltinay 5972 lroffset+=leftStep;
1677     rroffset+=rightStep;
1678     }
1679     break;
1680 jfenwick 2500 default:
1681 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1682 jfenwick 2500 }
1683     roffset=offset;
1684     return &m_samples;
1685     }
1686    
1687 jfenwick 1898
1688 jfenwick 5938 const DataTypes::RealVectorType*
1689 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1690 jfenwick 1879 {
1691 jfenwick 2271 #ifdef _OPENMP
1692 caltinay 5972 int tid=omp_get_thread_num();
1693 jfenwick 2271 #else
1694 caltinay 5972 int tid=0;
1695 jfenwick 2271 #endif
1696 jfenwick 2777
1697     #ifdef LAZY_STACK_PROF
1698 caltinay 5972 stackstart[tid]=&tid;
1699     stackend[tid]=&tid;
1700     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1701     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1702     #pragma omp critical
1703     if (d>maxstackuse)
1704     {
1705 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1706 caltinay 5972 maxstackuse=d;
1707     }
1708     return r;
1709 jfenwick 2777 #else
1710 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1711 jfenwick 2777 #endif
1712 jfenwick 2271 }
1713    
1714    
1715 jfenwick 2497 // This needs to do the work of the identity constructor
1716 jfenwick 2177 void
1717     DataLazy::resolveToIdentity()
1718     {
1719     if (m_op==IDENTITY)
1720 caltinay 5972 return;
1721 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1722 jfenwick 2177 makeIdentity(p);
1723     }
1724 jfenwick 1889
1725 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1726     {
1727     m_op=IDENTITY;
1728     m_axis_offset=0;
1729     m_transpose=0;
1730     m_SL=m_SM=m_SR=0;
1731     m_children=m_height=0;
1732     m_id=p;
1733     if(p->isConstant()) {m_readytype='C';}
1734     else if(p->isExpanded()) {m_readytype='E';}
1735     else if (p->isTagged()) {m_readytype='T';}
1736     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1737     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1738     m_left.reset();
1739     m_right.reset();
1740     }
1741    
1742 jfenwick 2497
1743     DataReady_ptr
1744     DataLazy::resolve()
1745     {
1746     resolveToIdentity();
1747     return m_id;
1748     }
1749    
1750 jfenwick 2799
1751     /* This is really a static method but I think that caused problems in windows */
1752     void
1753     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1754     {
1755     if (dats.empty())
1756     {
1757 caltinay 5972 return;
1758 jfenwick 2799 }
1759     vector<DataLazy*> work;
1760     FunctionSpace fs=dats[0]->getFunctionSpace();
1761     bool match=true;
1762 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1763 jfenwick 2799 {
1764 caltinay 5972 if (dats[i]->m_readytype!='E')
1765     {
1766     dats[i]->collapse();
1767     }
1768     if (dats[i]->m_op!=IDENTITY)
1769     {
1770     work.push_back(dats[i]);
1771     if (fs!=dats[i]->getFunctionSpace())
1772     {
1773     match=false;
1774     }
1775     }
1776 jfenwick 2799 }
1777     if (work.empty())
1778     {
1779 caltinay 5972 return; // no work to do
1780 jfenwick 2799 }
1781 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1782     { // it is possible that dats[0] is one of the objects which we discarded and
1783     // all the other functionspaces match.
1784     vector<DataExpanded*> dep;
1785 jfenwick 6042 vector<RealVectorType*> vecs;
1786 caltinay 5972 for (int i=0;i<work.size();++i)
1787     {
1788 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1789 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1790     }
1791     int totalsamples=work[0]->getNumSamples();
1792 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1793 caltinay 5972 int sample;
1794     #pragma omp parallel private(sample, res)
1795     {
1796     size_t roffset=0;
1797     #pragma omp for schedule(static)
1798     for (sample=0;sample<totalsamples;++sample)
1799     {
1800     roffset=0;
1801     int j;
1802     for (j=work.size()-1;j>=0;--j)
1803     {
1804 jfenwick 2799 #ifdef _OPENMP
1805 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1806 jfenwick 2799 #else
1807 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1808 jfenwick 2799 #endif
1809 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1810     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1811     }
1812     }
1813     }
1814     // Now we need to load the new results as identity ops into the lazy nodes
1815     for (int i=work.size()-1;i>=0;--i)
1816     {
1817 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1818 caltinay 5972 }
1819 jfenwick 2799 }
1820 caltinay 5972 else // functionspaces do not match
1821 jfenwick 2799 {
1822 caltinay 5972 for (int i=0;i<work.size();++i)
1823     {
1824     work[i]->resolveToIdentity();
1825     }
1826 jfenwick 2799 }
1827     }
1828    
1829    
1830    
1831 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1832     DataReady_ptr
1833     DataLazy::resolveNodeWorker()
1834     {
1835 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1836 jfenwick 2500 {
1837     collapse();
1838     }
1839 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1840 jfenwick 2500 {
1841     return m_id;
1842     }
1843 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1844 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1845     RealVectorType& resvec=result->getVectorRW();
1846 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1847    
1848     int sample;
1849     int totalsamples=getNumSamples();
1850 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1851 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1852 jfenwick 2777 #pragma omp parallel private(sample,res)
1853 jfenwick 2500 {
1854 caltinay 5972 size_t roffset=0;
1855 jfenwick 2777 #ifdef LAZY_STACK_PROF
1856 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1857     stackend[omp_get_thread_num()]=&roffset;
1858 jfenwick 2777 #endif
1859 caltinay 5972 #pragma omp for schedule(static)
1860     for (sample=0;sample<totalsamples;++sample)
1861     {
1862     roffset=0;
1863 jfenwick 2500 #ifdef _OPENMP
1864 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1865 jfenwick 2500 #else
1866 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1867 jfenwick 2500 #endif
1868     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1869     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1870 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1871     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1872     }
1873 jfenwick 2500 }
1874 jfenwick 2777 #ifdef LAZY_STACK_PROF
1875     for (int i=0;i<getNumberOfThreads();++i)
1876     {
1877 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1878     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1879     if (r>maxstackuse)
1880     {
1881     maxstackuse=r;
1882     }
1883 jfenwick 2777 }
1884     cout << "Max resolve Stack use=" << maxstackuse << endl;
1885     #endif
1886 jfenwick 2500 return resptr;
1887     }
1888    
1889 jfenwick 1865 std::string
1890     DataLazy::toString() const
1891     {
1892 jfenwick 1886 ostringstream oss;
1893 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1894 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1895 jfenwick 2737 {
1896 caltinay 5972 case 1: // tree format
1897     oss << endl;
1898     intoTreeString(oss,"");
1899     break;
1900     case 2: // just the depth
1901     break;
1902 jfenwick 2795 default:
1903 caltinay 5972 intoString(oss);
1904     break;
1905 jfenwick 2737 }
1906 jfenwick 1886 return oss.str();
1907 jfenwick 1865 }
1908    
1909 jfenwick 1899
1910 jfenwick 1886 void
1911     DataLazy::intoString(ostringstream& oss) const
1912     {
1913 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1914 jfenwick 1886 switch (getOpgroup(m_op))
1915     {
1916 jfenwick 1889 case G_IDENTITY:
1917 caltinay 5972 if (m_id->isExpanded())
1918     {
1919     oss << "E";
1920     }
1921     else if (m_id->isTagged())
1922     {
1923     oss << "T";
1924     }
1925     else if (m_id->isConstant())
1926     {
1927     oss << "C";
1928     }
1929     else
1930     {
1931     oss << "?";
1932     }
1933     oss << '@' << m_id.get();
1934     break;
1935 jfenwick 1886 case G_BINARY:
1936 caltinay 5972 oss << '(';
1937     m_left->intoString(oss);
1938     oss << ' ' << opToString(m_op) << ' ';
1939     m_right->intoString(oss);
1940     oss << ')';
1941     break;
1942 jfenwick 1886 case G_UNARY:
1943 jfenwick 2147 case G_UNARY_P:
1944 jfenwick 2037 case G_NP1OUT:
1945 jfenwick 2084 case G_NP1OUT_P:
1946 jfenwick 2721 case G_REDUCTION:
1947 caltinay 5972 oss << opToString(m_op) << '(';
1948     m_left->intoString(oss);
1949     oss << ')';
1950     break;
1951 jfenwick 2066 case G_TENSORPROD:
1952 caltinay 5972 oss << opToString(m_op) << '(';
1953     m_left->intoString(oss);
1954     oss << ", ";
1955     m_right->intoString(oss);
1956     oss << ')';
1957     break;
1958 jfenwick 2496 case G_NP1OUT_2P:
1959 caltinay 5972 oss << opToString(m_op) << '(';
1960     m_left->intoString(oss);
1961     oss << ", " << m_axis_offset << ", " << m_transpose;
1962     oss << ')';
1963     break;
1964 jfenwick 3035 case G_CONDEVAL:
1965 caltinay 5972 oss << opToString(m_op)<< '(' ;
1966     m_mask->intoString(oss);
1967     oss << " ? ";
1968     m_left->intoString(oss);
1969     oss << " : ";
1970     m_right->intoString(oss);
1971     oss << ')';
1972     break;
1973 jfenwick 1886 default:
1974 caltinay 5972 oss << "UNKNOWN";
1975 jfenwick 1886 }
1976     }
1977    
1978 jfenwick 2737
1979     void
1980     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1981     {
1982     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1983     switch (getOpgroup(m_op))
1984     {
1985     case G_IDENTITY:
1986 caltinay 5972 if (m_id->isExpanded())
1987     {
1988     oss << "E";
1989     }
1990     else if (m_id->isTagged())
1991     {
1992     oss << "T";
1993     }
1994     else if (m_id->isConstant())
1995     {
1996     oss << "C";
1997     }
1998     else
1999     {
2000     oss << "?";
2001     }
2002     oss << '@' << m_id.get() << endl;
2003     break;
2004 jfenwick 2737 case G_BINARY:
2005 caltinay 5972 oss << opToString(m_op) << endl;
2006     indent+='.';
2007     m_left->intoTreeString(oss, indent);
2008     m_right->intoTreeString(oss, indent);
2009     break;
2010 jfenwick 2737 case G_UNARY:
2011     case G_UNARY_P:
2012     case G_NP1OUT:
2013     case G_NP1OUT_P:
2014     case G_REDUCTION:
2015 caltinay 5972 oss << opToString(m_op) << endl;
2016     indent+='.';
2017     m_left->intoTreeString(oss, indent);
2018     break;
2019 jfenwick 2737 case G_TENSORPROD:
2020 caltinay 5972 oss << opToString(m_op) << endl;
2021     indent+='.';
2022     m_left->intoTreeString(oss, indent);
2023     m_right->intoTreeString(oss, indent);
2024     break;
2025 jfenwick 2737 case G_NP1OUT_2P:
2026 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2027     indent+='.';
2028     m_left->intoTreeString(oss, indent);
2029     break;
2030 jfenwick 2737 default:
2031 caltinay 5972 oss << "UNKNOWN";
2032 jfenwick 2737 }
2033     }
2034    
2035    
2036 jfenwick 1865 DataAbstract*
2037 jfenwick 5938 DataLazy::deepCopy() const
2038 jfenwick 1865 {
2039 jfenwick 1935 switch (getOpgroup(m_op))
2040 jfenwick 1865 {
2041 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
2042 caltinay 5972 case G_UNARY:
2043 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2044 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2045     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2046 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2047     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2048 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2049     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2050 jfenwick 1935 default:
2051 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2052 jfenwick 1865 }
2053     }
2054    
2055    
2056 jfenwick 2721
2057 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2058     // Instances of DataReady can look at the size of their vectors.
2059     // For lazy though, it could be the size the data would be if it were resolved;
2060     // or it could be some function of the lengths of the DataReady instances which
2061     // form part of the expression.
2062     // Rather than have people making assumptions, I have disabled the method.
2063 jfenwick 5938 DataTypes::RealVectorType::size_type
2064 jfenwick 1865 DataLazy::getLength() const
2065     {
2066 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2067 jfenwick 1865 }
2068    
2069    
2070     DataAbstract*
2071     DataLazy::getSlice(const DataTypes::RegionType& region) const
2072     {
2073 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2074 jfenwick 1865 }
2075    
2076 jfenwick 1935
2077     // To do this we need to rely on our child nodes
2078 jfenwick 5938 DataTypes::RealVectorType::size_type
2079 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2080 jfenwick 1935 int dataPointNo)
2081     {
2082     if (m_op==IDENTITY)
2083     {
2084 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2085 jfenwick 1935 }
2086     if (m_readytype!='E')
2087     {
2088 caltinay 5972 collapse();
2089     return m_id->getPointOffset(sampleNo,dataPointNo);
2090 jfenwick 1935 }
2091     // at this point we do not have an identity node and the expression will be Expanded
2092     // so we only need to know which child to ask
2093     if (m_left->m_readytype=='E')
2094     {
2095 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2096 jfenwick 1935 }
2097     else
2098     {
2099 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2100 jfenwick 1935 }
2101     }
2102    
2103     // To do this we need to rely on our child nodes
2104 jfenwick 5938 DataTypes::RealVectorType::size_type
2105 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2106 jfenwick 1865 int dataPointNo) const
2107     {
2108 jfenwick 1935 if (m_op==IDENTITY)
2109     {
2110 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2111 jfenwick 1935 }
2112     if (m_readytype=='E')
2113     {
2114     // at this point we do not have an identity node and the expression will be Expanded
2115     // so we only need to know which child to ask
2116     if (m_left->m_readytype=='E')
2117     {
2118 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2119 jfenwick 1935 }
2120     else
2121     {
2122 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2123 jfenwick 1935 }
2124     }
2125     if (m_readytype=='C')
2126     {
2127 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2128 jfenwick 1935 }
2129     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2130 jfenwick 1865 }
2131    
2132 jfenwick 2271
2133     // I have decided to let Data:: handle this issue.
2134 jfenwick 1901 void
2135     DataLazy::setToZero()
2136     {
2137 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2138 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2139     // m_op=IDENTITY;
2140     // m_right.reset();
2141     // m_left.reset();
2142     // m_readytype='C';
2143     // m_buffsRequired=1;
2144    
2145 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2146 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2147 jfenwick 1901 }
2148    
2149 jfenwick 2271 bool
2150     DataLazy::actsExpanded() const
2151     {
2152 caltinay 5972 return (m_readytype=='E');
2153 jfenwick 2271 }
2154    
2155 caltinay 5997 } // end namespace
2156 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26