/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6074 - (hide annotations)
Fri Mar 18 02:42:48 2016 UTC (3 years ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 66416 byte(s)
Starting to remove dead code.

DataAlgorithmAdapter is only used by the code which tests it.

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6042 #include "DataMaths.h"
24 caltinay 5972
25 caltinay 3317 #ifdef USE_NETCDF
26     #include <netcdfcpp.h>
27     #endif
28    
29 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
30 jfenwick 2199
31 jfenwick 5938 using namespace escript::DataTypes;
32    
33 caltinay 5972 #define NO_ARG
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48 jfenwick 2472
49 jfenwick 2501
50 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52 jfenwick 1899 /*
53     How does DataLazy work?
54     ~~~~~~~~~~~~~~~~~~~~~~~
55    
56     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57     denoted left and right.
58    
59     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60     This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63 caltinay 4286 Note that IDENTITY is not considered a unary operation.
64 jfenwick 1899
65     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66     It must however form a DAG (directed acyclic graph).
67     I will refer to individual DataLazy objects with the structure as nodes.
68    
69     Each node also stores:
70     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71 caltinay 5972 evaluated.
72 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73 caltinay 5972 evaluate the expression.
74 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
75    
76     When a new node is created, the above values are computed based on the values in the child nodes.
77     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79     The resolve method, which produces a DataReady from a DataLazy, does the following:
80     1) Create a DataReady to hold the new result.
81     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86     resolveSample returns a Vector* and an offset within that vector where the result is stored.
87     Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93 jfenwick 2037
94 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 jfenwick 2037 1) Add to the ES_optype.
96     2) determine what opgroup your operation belongs to (X)
97     3) add a string for the op to the end of ES_opstrings
98     4) increase ES_opcount
99     5) add an entry (X) to opgroups
100     6) add an entry to the switch in collapseToReady
101     7) add an entry to resolveX
102 jfenwick 1899 */
103    
104    
105 jfenwick 1865 using namespace std;
106 jfenwick 1868 using namespace boost;
107 jfenwick 1865
108     namespace escript
109     {
110    
111     namespace
112     {
113    
114 jfenwick 2777
115     // enabling this will print out when ever the maximum stacksize used by resolve increases
116     // it assumes _OPENMP is also in use
117     //#define LAZY_STACK_PROF
118    
119    
120    
121     #ifndef _OPENMP
122     #ifdef LAZY_STACK_PROF
123     #undef LAZY_STACK_PROF
124     #endif
125     #endif
126    
127    
128     #ifdef LAZY_STACK_PROF
129     std::vector<void*> stackstart(getNumberOfThreads());
130     std::vector<void*> stackend(getNumberOfThreads());
131     size_t maxstackuse=0;
132     #endif
133    
134 jfenwick 1886 enum ES_opgroup
135     {
136     G_UNKNOWN,
137     G_IDENTITY,
138 caltinay 5972 G_BINARY, // pointwise operations with two arguments
139     G_UNARY, // pointwise operations with one argument
140     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT, // non-pointwise op with one output
142     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD, // general tensor product
144     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145     G_REDUCTION, // non-pointwise unary op with a scalar output
146 jfenwick 3035 G_CONDEVAL
147 jfenwick 1886 };
148    
149    
150    
151    
152 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153 caltinay 5972 "sin","cos","tan",
154     "asin","acos","atan","sinh","cosh","tanh","erf",
155     "asinh","acosh","atanh",
156     "log10","log","sign","abs","neg","pos","exp","sqrt",
157     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158     "symmetric","nonsymmetric",
159     "prod",
160     "transpose", "trace",
161     "swapaxes",
162     "minval", "maxval",
163     "condEval"};
164 jfenwick 3035 int ES_opcount=44;
165 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
167     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
168     G_UNARY,G_UNARY,G_UNARY, // 20
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
171     G_NP1OUT,G_NP1OUT,
172     G_TENSORPROD,
173     G_NP1OUT_P, G_NP1OUT_P,
174     G_NP1OUT_2P,
175     G_REDUCTION, G_REDUCTION,
176     G_CONDEVAL};
177 jfenwick 1886 inline
178     ES_opgroup
179     getOpgroup(ES_optype op)
180     {
181     return opgroups[op];
182     }
183    
184 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
185     FunctionSpace
186     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 caltinay 5972 // perhaps this should call interpolate and throw or something?
189     // maybe we need an interpolate node -
190     // that way, if interpolate is required in any other op we can just throw a
191     // programming error exception.
192 jfenwick 1879
193 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
194     FunctionSpace r=right->getFunctionSpace();
195     if (l!=r)
196     {
197 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198     if (res==1)
199 jfenwick 1943 {
200 caltinay 5972 return l;
201 jfenwick 1943 }
202 jfenwick 4264 if (res==-1)
203 jfenwick 1943 {
204 caltinay 5972 return r;
205 jfenwick 1943 }
206     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207     }
208     return l;
209 jfenwick 1865 }
210    
211     // return the shape of the result of "left op right"
212 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213 jfenwick 1865 DataTypes::ShapeType
214     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215     {
216 caltinay 5972 if (left->getShape()!=right->getShape())
217     {
218     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219     {
220     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221     }
222 jfenwick 2721
223 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
224     {
225     return right->getShape();
226     }
227     if (right->getRank()==0)
228     {
229     return left->getShape();
230     }
231     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232     }
233     return left->getShape();
234 jfenwick 1865 }
235    
236 jfenwick 2084 // return the shape for "op left"
237    
238     DataTypes::ShapeType
239 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240 jfenwick 2084 {
241 caltinay 5972 switch(op)
242     {
243     case TRANS:
244     { // for the scoping of variables
245     const DataTypes::ShapeType& s=left->getShape();
246     DataTypes::ShapeType sh;
247     int rank=left->getRank();
248     if (axis_offset<0 || axis_offset>rank)
249     {
250 caltinay 2779 stringstream e;
251     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252     throw DataException(e.str());
253 caltinay 5972 }
254     for (int i=0; i<rank; i++)
255     {
256     int index = (axis_offset+i)%rank;
257     sh.push_back(s[index]); // Append to new shape
258     }
259     return sh;
260     }
261     break;
262     case TRACE:
263     {
264     int rank=left->getRank();
265     if (rank<2)
266     {
267     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268     }
269     if ((axis_offset>rank-2) || (axis_offset<0))
270     {
271     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272     }
273     if (rank==2)
274     {
275     return DataTypes::scalarShape;
276     }
277     else if (rank==3)
278     {
279     DataTypes::ShapeType sh;
280     if (axis_offset==0)
281     {
282     sh.push_back(left->getShape()[2]);
283     }
284     else // offset==1
285     {
286     sh.push_back(left->getShape()[0]);
287     }
288     return sh;
289     }
290     else if (rank==4)
291     {
292     DataTypes::ShapeType sh;
293     const DataTypes::ShapeType& s=left->getShape();
294     if (axis_offset==0)
295     {
296     sh.push_back(s[2]);
297     sh.push_back(s[3]);
298     }
299     else if (axis_offset==1)
300     {
301     sh.push_back(s[0]);
302     sh.push_back(s[3]);
303     }
304     else // offset==2
305     {
306     sh.push_back(s[0]);
307     sh.push_back(s[1]);
308     }
309     return sh;
310     }
311     else // unknown rank
312     {
313     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314     }
315     }
316     break;
317     default:
318     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319     }
320 jfenwick 2084 }
321    
322 jfenwick 2496 DataTypes::ShapeType
323     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324     {
325     // This code taken from the Data.cpp swapaxes() method
326     // Some of the checks are probably redundant here
327     int axis0_tmp,axis1_tmp;
328     const DataTypes::ShapeType& s=left->getShape();
329     DataTypes::ShapeType out_shape;
330     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     int rank=left->getRank();
333     if (rank<2) {
334     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335     }
336     if (axis0<0 || axis0>rank-1) {
337 caltinay 2779 stringstream e;
338     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339     throw DataException(e.str());
340 jfenwick 2496 }
341     if (axis1<0 || axis1>rank-1) {
342 caltinay 2782 stringstream e;
343 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344     throw DataException(e.str());
345 jfenwick 2496 }
346     if (axis0 == axis1) {
347     throw DataException("Error - Data::swapaxes: axis indices must be different.");
348     }
349     if (axis0 > axis1) {
350     axis0_tmp=axis1;
351     axis1_tmp=axis0;
352     } else {
353     axis0_tmp=axis0;
354     axis1_tmp=axis1;
355     }
356     for (int i=0; i<rank; i++) {
357     if (i == axis0_tmp) {
358     out_shape.push_back(s[axis1_tmp]);
359     } else if (i == axis1_tmp) {
360     out_shape.push_back(s[axis0_tmp]);
361     } else {
362     out_shape.push_back(s[i]);
363     }
364     }
365     return out_shape;
366     }
367    
368    
369 jfenwick 2066 // determine the output shape for the general tensor product operation
370     // the additional parameters return information required later for the product
371     // the majority of this code is copy pasted from C_General_Tensor_Product
372     DataTypes::ShapeType
373     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374 jfenwick 1865 {
375 caltinay 5972
376 jfenwick 2066 // Get rank and shape of inputs
377     int rank0 = left->getRank();
378     int rank1 = right->getRank();
379     const DataTypes::ShapeType& shape0 = left->getShape();
380     const DataTypes::ShapeType& shape1 = right->getShape();
381    
382     // Prepare for the loops of the product and verify compatibility of shapes
383     int start0=0, start1=0;
384 caltinay 5972 if (transpose == 0) {}
385     else if (transpose == 1) { start0 = axis_offset; }
386     else if (transpose == 2) { start1 = rank1-axis_offset; }
387     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388 jfenwick 2066
389 jfenwick 2085 if (rank0<axis_offset)
390     {
391 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392 jfenwick 2085 }
393 jfenwick 2066
394     // Adjust the shapes for transpose
395 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
396     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
397     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399 jfenwick 2066
400     // Prepare for the loops of the product
401     SL=1, SM=1, SR=1;
402 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
403 jfenwick 2066 SL *= tmpShape0[i];
404     }
405 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
406 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408     }
409     SM *= tmpShape0[i];
410     }
411 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
412 jfenwick 2066 SR *= tmpShape1[i];
413     }
414    
415     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
417     { // block to limit the scope of out_index
418 jfenwick 2066 int out_index=0;
419     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421     }
422 jfenwick 2086
423     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424     {
425     ostringstream os;
426     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427     throw DataException(os.str());
428     }
429    
430 jfenwick 2066 return shape2;
431 jfenwick 1865 }
432    
433 caltinay 5972 } // end anonymous namespace
434 jfenwick 1865
435    
436 jfenwick 1899
437     // Return a string representing the operation
438 jfenwick 1865 const std::string&
439     opToString(ES_optype op)
440     {
441     if (op<0 || op>=ES_opcount)
442     {
443     op=UNKNOWNOP;
444     }
445     return ES_opstrings[op];
446     }
447    
448 jfenwick 2500 void DataLazy::LazyNodeSetup()
449     {
450     #ifdef _OPENMP
451     int numthreads=omp_get_max_threads();
452     m_samples.resize(numthreads*m_samplesize);
453     m_sampleids=new int[numthreads];
454     for (int i=0;i<numthreads;++i)
455     {
456     m_sampleids[i]=-1;
457     }
458     #else
459     m_samples.resize(m_samplesize);
460     m_sampleids=new int[1];
461     m_sampleids[0]=-1;
462     #endif // _OPENMP
463     }
464 jfenwick 1865
465 jfenwick 2500
466     // Creates an identity node
467 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
468 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
469     ,m_sampleids(0),
470     m_samples(1)
471 jfenwick 1865 {
472 jfenwick 1879 if (p->isLazy())
473     {
474 caltinay 5972 // I don't want identity of Lazy.
475     // Question: Why would that be so bad?
476     // Answer: We assume that the child of ID is something we can call getVector on
477     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478 jfenwick 1879 }
479     else
480     {
481 caltinay 5972 p->makeLazyShared();
482     DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483     makeIdentity(dr);
484 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
485 jfenwick 1879 }
486 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
487 jfenwick 1865 }
488    
489 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491     m_op(op),
492     m_axis_offset(0),
493     m_transpose(0),
494     m_SL(0), m_SM(0), m_SR(0)
495 jfenwick 1886 {
496 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497 jfenwick 1886 {
498 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499 jfenwick 1886 }
500 jfenwick 2066
501 jfenwick 1886 DataLazy_ptr lleft;
502     if (!left->isLazy())
503     {
504 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
505 jfenwick 1886 }
506     else
507     {
508 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
509 jfenwick 1886 }
510 jfenwick 1889 m_readytype=lleft->m_readytype;
511 jfenwick 1886 m_left=lleft;
512     m_samplesize=getNumDPPSample()*getNoValues();
513 jfenwick 2177 m_children=m_left->m_children+1;
514     m_height=m_left->m_height+1;
515 jfenwick 2500 LazyNodeSetup();
516 jfenwick 2177 SIZELIMIT
517 jfenwick 1886 }
518    
519    
520 jfenwick 1943 // In this constructor we need to consider interpolation
521 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
523     m_op(op),
524     m_SL(0), m_SM(0), m_SR(0)
525 jfenwick 1879 {
526 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
528 jfenwick 1886 {
529 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530 jfenwick 1886 }
531 jfenwick 1943
532 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
533 jfenwick 1943 {
534 caltinay 5972 FunctionSpace fs=getFunctionSpace();
535     Data ltemp(left);
536     Data tmp(ltemp,fs);
537     left=tmp.borrowDataPtr();
538 jfenwick 1943 }
539 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
540 jfenwick 1943 {
541 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
542     right=tmp.borrowDataPtr();
543 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544 jfenwick 1943 }
545     left->operandCheck(*right);
546    
547 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
548 jfenwick 1879 {
549 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
550 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551 jfenwick 1879 }
552     else
553     {
554 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
555 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556 jfenwick 1879 }
557     if (right->isLazy())
558     {
559 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
560 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561 jfenwick 1879 }
562     else
563     {
564 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
565 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566 jfenwick 1879 }
567 jfenwick 1889 char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;
569     if (lt=='E' || rt=='E')
570     {
571 caltinay 5972 m_readytype='E';
572 jfenwick 1889 }
573     else if (lt=='T' || rt=='T')
574     {
575 caltinay 5972 m_readytype='T';
576 jfenwick 1889 }
577     else
578     {
579 caltinay 5972 m_readytype='C';
580 jfenwick 1889 }
581 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
582 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
583     m_height=max(m_left->m_height,m_right->m_height)+1;
584 jfenwick 2500 LazyNodeSetup();
585 jfenwick 2177 SIZELIMIT
586 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587 jfenwick 1879 }
588    
589 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591     m_op(op),
592     m_axis_offset(axis_offset),
593     m_transpose(transpose)
594 jfenwick 2066 {
595     if ((getOpgroup(op)!=G_TENSORPROD))
596     {
597 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598 jfenwick 2066 }
599     if ((transpose>2) || (transpose<0))
600     {
601 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602 jfenwick 2066 }
603 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
604 jfenwick 2066 {
605 caltinay 5972 FunctionSpace fs=getFunctionSpace();
606     Data ltemp(left);
607     Data tmp(ltemp,fs);
608     left=tmp.borrowDataPtr();
609 jfenwick 2066 }
610 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
611 jfenwick 2066 {
612 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
613     right=tmp.borrowDataPtr();
614 jfenwick 2066 }
615 jfenwick 2195 // left->operandCheck(*right);
616 jfenwick 1879
617 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
618 jfenwick 2066 {
619 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
620 jfenwick 2066 }
621     else
622     {
623 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
624 jfenwick 2066 }
625     if (right->isLazy())
626     {
627 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
628 jfenwick 2066 }
629     else
630     {
631 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
632 jfenwick 2066 }
633     char lt=m_left->m_readytype;
634     char rt=m_right->m_readytype;
635     if (lt=='E' || rt=='E')
636     {
637 caltinay 5972 m_readytype='E';
638 jfenwick 2066 }
639     else if (lt=='T' || rt=='T')
640     {
641 caltinay 5972 m_readytype='T';
642 jfenwick 2066 }
643     else
644     {
645 caltinay 5972 m_readytype='C';
646 jfenwick 2066 }
647     m_samplesize=getNumDPPSample()*getNoValues();
648 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
649     m_height=max(m_left->m_height,m_right->m_height)+1;
650 jfenwick 2500 LazyNodeSetup();
651 jfenwick 2177 SIZELIMIT
652 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653 jfenwick 2066 }
654    
655    
656 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658     m_op(op),
659     m_axis_offset(axis_offset),
660     m_transpose(0),
661     m_tol(0)
662 jfenwick 2084 {
663     if ((getOpgroup(op)!=G_NP1OUT_P))
664     {
665 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666 jfenwick 2084 }
667     DataLazy_ptr lleft;
668     if (!left->isLazy())
669     {
670 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
671 jfenwick 2084 }
672     else
673     {
674 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
675 jfenwick 2084 }
676     m_readytype=lleft->m_readytype;
677     m_left=lleft;
678     m_samplesize=getNumDPPSample()*getNoValues();
679 jfenwick 2177 m_children=m_left->m_children+1;
680     m_height=m_left->m_height+1;
681 jfenwick 2500 LazyNodeSetup();
682 jfenwick 2177 SIZELIMIT
683 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684 jfenwick 2084 }
685    
686 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
688     m_op(op),
689     m_axis_offset(0),
690     m_transpose(0),
691     m_tol(tol)
692 jfenwick 2147 {
693     if ((getOpgroup(op)!=G_UNARY_P))
694     {
695 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696 jfenwick 2147 }
697     DataLazy_ptr lleft;
698     if (!left->isLazy())
699     {
700 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
701 jfenwick 2147 }
702     else
703     {
704 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
705 jfenwick 2147 }
706     m_readytype=lleft->m_readytype;
707     m_left=lleft;
708     m_samplesize=getNumDPPSample()*getNoValues();
709 jfenwick 2177 m_children=m_left->m_children+1;
710     m_height=m_left->m_height+1;
711 jfenwick 2500 LazyNodeSetup();
712 jfenwick 2177 SIZELIMIT
713 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714     }
715 jfenwick 2084
716 jfenwick 2496
717     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719     m_op(op),
720     m_axis_offset(axis0),
721     m_transpose(axis1),
722     m_tol(0)
723 jfenwick 2496 {
724     if ((getOpgroup(op)!=G_NP1OUT_2P))
725     {
726 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727 jfenwick 2496 }
728     DataLazy_ptr lleft;
729     if (!left->isLazy())
730     {
731 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
732 jfenwick 2496 }
733     else
734     {
735 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
736 jfenwick 2496 }
737     m_readytype=lleft->m_readytype;
738     m_left=lleft;
739     m_samplesize=getNumDPPSample()*getNoValues();
740     m_children=m_left->m_children+1;
741     m_height=m_left->m_height+1;
742 jfenwick 2500 LazyNodeSetup();
743 jfenwick 2496 SIZELIMIT
744     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745     }
746    
747 jfenwick 3035
748     namespace
749     {
750    
751     inline int max3(int a, int b, int c)
752     {
753 caltinay 5972 int t=(a>b?a:b);
754     return (t>c?t:c);
755 jfenwick 3035
756     }
757     }
758    
759     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
761     m_op(CONDEVAL),
762     m_axis_offset(0),
763     m_transpose(0),
764     m_tol(0)
765 jfenwick 3035 {
766    
767     DataLazy_ptr lmask;
768     DataLazy_ptr lleft;
769     DataLazy_ptr lright;
770     if (!mask->isLazy())
771     {
772 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
773 jfenwick 3035 }
774     else
775     {
776 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
777 jfenwick 3035 }
778     if (!left->isLazy())
779     {
780 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
781 jfenwick 3035 }
782     else
783     {
784 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
785 jfenwick 3035 }
786     if (!right->isLazy())
787     {
788 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
789 jfenwick 3035 }
790     else
791     {
792 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
793 jfenwick 3035 }
794     m_readytype=lmask->m_readytype;
795     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796     {
797 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
798 jfenwick 3035 }
799     m_left=lleft;
800     m_right=lright;
801     m_mask=lmask;
802     m_samplesize=getNumDPPSample()*getNoValues();
803     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805     LazyNodeSetup();
806     SIZELIMIT
807     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808     }
809    
810    
811    
812 jfenwick 1865 DataLazy::~DataLazy()
813     {
814 jfenwick 2500 delete[] m_sampleids;
815 jfenwick 1865 }
816    
817 jfenwick 1879
818 jfenwick 1899 /*
819     \brief Evaluates the expression using methods on Data.
820     This does the work for the collapse method.
821     For reasons of efficiency do not call this method on DataExpanded nodes.
822     */
823 jfenwick 1889 DataReady_ptr
824 jfenwick 4621 DataLazy::collapseToReady() const
825 jfenwick 1889 {
826     if (m_readytype=='E')
827 caltinay 5972 { // this is more an efficiency concern than anything else
828 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
829     }
830     if (m_op==IDENTITY)
831     {
832     return m_id;
833     }
834     DataReady_ptr pleft=m_left->collapseToReady();
835     Data left(pleft);
836     Data right;
837 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
838 jfenwick 1889 {
839     right=Data(m_right->collapseToReady());
840     }
841     Data result;
842     switch(m_op)
843     {
844     case ADD:
845 caltinay 5972 result=left+right;
846     break;
847     case SUB:
848     result=left-right;
849     break;
850     case MUL:
851     result=left*right;
852     break;
853     case DIV:
854     result=left/right;
855     break;
856 jfenwick 1889 case SIN:
857 caltinay 5972 result=left.sin();
858     break;
859 jfenwick 1889 case COS:
860 caltinay 5972 result=left.cos();
861     break;
862 jfenwick 1889 case TAN:
863 caltinay 5972 result=left.tan();
864     break;
865 jfenwick 1889 case ASIN:
866 caltinay 5972 result=left.asin();
867     break;
868 jfenwick 1889 case ACOS:
869 caltinay 5972 result=left.acos();
870     break;
871 jfenwick 1889 case ATAN:
872 caltinay 5972 result=left.atan();
873     break;
874 jfenwick 1889 case SINH:
875 caltinay 5972 result=left.sinh();
876     break;
877 jfenwick 1889 case COSH:
878 caltinay 5972 result=left.cosh();
879     break;
880 jfenwick 1889 case TANH:
881 caltinay 5972 result=left.tanh();
882     break;
883 jfenwick 1889 case ERF:
884 caltinay 5972 result=left.erf();
885     break;
886 jfenwick 1889 case ASINH:
887 caltinay 5972 result=left.asinh();
888     break;
889 jfenwick 1889 case ACOSH:
890 caltinay 5972 result=left.acosh();
891     break;
892 jfenwick 1889 case ATANH:
893 caltinay 5972 result=left.atanh();
894     break;
895 jfenwick 1889 case LOG10:
896 caltinay 5972 result=left.log10();
897     break;
898 jfenwick 1889 case LOG:
899 caltinay 5972 result=left.log();
900     break;
901 jfenwick 1889 case SIGN:
902 caltinay 5972 result=left.sign();
903     break;
904 jfenwick 1889 case ABS:
905 caltinay 5972 result=left.abs();
906     break;
907 jfenwick 1889 case NEG:
908 caltinay 5972 result=left.neg();
909     break;
910 jfenwick 1889 case POS:
911 caltinay 5972 // it doesn't mean anything for delayed.
912     // it will just trigger a deep copy of the lazy object
913     throw DataException("Programmer error - POS not supported for lazy data.");
914     break;
915 jfenwick 1889 case EXP:
916 caltinay 5972 result=left.exp();
917     break;
918 jfenwick 1889 case SQRT:
919 caltinay 5972 result=left.sqrt();
920     break;
921 jfenwick 1889 case RECIP:
922 caltinay 5972 result=left.oneOver();
923     break;
924 jfenwick 1889 case GZ:
925 caltinay 5972 result=left.wherePositive();
926     break;
927 jfenwick 1889 case LZ:
928 caltinay 5972 result=left.whereNegative();
929     break;
930 jfenwick 1889 case GEZ:
931 caltinay 5972 result=left.whereNonNegative();
932     break;
933 jfenwick 1889 case LEZ:
934 caltinay 5972 result=left.whereNonPositive();
935     break;
936 jfenwick 2147 case NEZ:
937 caltinay 5972 result=left.whereNonZero(m_tol);
938     break;
939 jfenwick 2147 case EZ:
940 caltinay 5972 result=left.whereZero(m_tol);
941     break;
942 jfenwick 2037 case SYM:
943 caltinay 5972 result=left.symmetric();
944     break;
945 jfenwick 2037 case NSYM:
946 caltinay 5972 result=left.nonsymmetric();
947     break;
948 jfenwick 2066 case PROD:
949 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950     break;
951 jfenwick 2084 case TRANS:
952 caltinay 5972 result=left.transpose(m_axis_offset);
953     break;
954 jfenwick 2084 case TRACE:
955 caltinay 5972 result=left.trace(m_axis_offset);
956     break;
957 jfenwick 2496 case SWAP:
958 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
959     break;
960 jfenwick 2721 case MINVAL:
961 caltinay 5972 result=left.minval();
962     break;
963 jfenwick 2721 case MAXVAL:
964 caltinay 5972 result=left.minval();
965     break;
966 jfenwick 1889 default:
967 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968 jfenwick 1889 }
969     return result.borrowReadyPtr();
970     }
971    
972 jfenwick 1899 /*
973     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
974     This method uses the original methods on the Data class to evaluate the expressions.
975     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
976     the purpose of using DataLazy in the first place).
977     */
978 jfenwick 1889 void
979 jfenwick 4621 DataLazy::collapse() const
980 jfenwick 1889 {
981     if (m_op==IDENTITY)
982     {
983 caltinay 5972 return;
984 jfenwick 1889 }
985     if (m_readytype=='E')
986 caltinay 5972 { // this is more an efficiency concern than anything else
987 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
988     }
989     m_id=collapseToReady();
990     m_op=IDENTITY;
991     }
992    
993 jfenwick 2500 // The result will be stored in m_samples
994     // The return value is a pointer to the DataVector, offset is the offset within the return value
995 jfenwick 5938 const DataTypes::RealVectorType*
996 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
997 jfenwick 2500 {
998     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
999 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1000 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1001     {
1002 caltinay 5972 collapse();
1003 jfenwick 2500 }
1004 caltinay 5972 if (m_op==IDENTITY)
1005 jfenwick 2500 {
1006 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1007 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1008 jfenwick 2777 #ifdef LAZY_STACK_PROF
1009     int x;
1010     if (&x<stackend[omp_get_thread_num()])
1011     {
1012     stackend[omp_get_thread_num()]=&x;
1013     }
1014     #endif
1015 jfenwick 2500 return &(vec);
1016     }
1017     if (m_readytype!='E')
1018     {
1019     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1020     }
1021 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1022     {
1023 caltinay 5972 roffset=tid*m_samplesize;
1024     return &(m_samples); // sample is already resolved
1025 jfenwick 2501 }
1026     m_sampleids[tid]=sampleNo;
1027 jfenwick 3035
1028 jfenwick 2500 switch (getOpgroup(m_op))
1029     {
1030     case G_UNARY:
1031     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1032     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1033     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1034     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1035     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1036     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1037 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1038 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1039 jfenwick 2500 default:
1040     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1041     }
1042     }
1043    
1044 jfenwick 5938 const DataTypes::RealVectorType*
1045 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1046 jfenwick 2500 {
1047 caltinay 5972 // we assume that any collapsing has been done before we get here
1048     // since we only have one argument we don't need to think about only
1049     // processing single points.
1050     // we will also know we won't get identity nodes
1051 jfenwick 2500 if (m_readytype!='E')
1052     {
1053     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1054     }
1055     if (m_op==IDENTITY)
1056     {
1057     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1058     }
1059 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1060 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1061     roffset=m_samplesize*tid;
1062     double* result=&(m_samples[roffset]);
1063 jfenwick 6066 escript::ESFunction operation=SINF;
1064 jfenwick 2500 switch (m_op)
1065     {
1066 jfenwick 6066 case SIN:
1067     operation=SINF;
1068     break;
1069 jfenwick 2500 case COS:
1070 jfenwick 6066 operation=COSF;
1071     break;
1072 jfenwick 2500 case TAN:
1073 jfenwick 6066 operation=TANF;
1074     break;
1075 jfenwick 2500 case ASIN:
1076 jfenwick 6066 operation=ASINF;
1077     break;
1078 jfenwick 2500 case ACOS:
1079 jfenwick 6066 operation=ACOSF;
1080     break;
1081 jfenwick 2500 case ATAN:
1082 jfenwick 6066 operation=ATANF;
1083     break;
1084 jfenwick 2500 case SINH:
1085 jfenwick 6066 operation=SINHF;
1086     break;
1087 jfenwick 2500 case COSH:
1088 jfenwick 6066 operation=COSHF;
1089     break;
1090 jfenwick 2500 case TANH:
1091 jfenwick 6066 operation=TANHF;
1092     break;
1093 jfenwick 2500 case ERF:
1094 jfenwick 6066 operation=ERFF;
1095     break;
1096 jfenwick 2500 case ASINH:
1097 jfenwick 6066 operation=ASINHF;
1098     break;
1099 jfenwick 2500 case ACOSH:
1100 jfenwick 6066 operation=ACOSHF;
1101     break;
1102 jfenwick 2500 case ATANH:
1103 jfenwick 6066 operation=ATANHF;
1104     break;
1105 jfenwick 2500 case LOG10:
1106 jfenwick 6066 operation=LOG10F;
1107     break;
1108 jfenwick 2500 case LOG:
1109 jfenwick 6066 operation=LOGF;
1110     break;
1111 jfenwick 2500 case SIGN:
1112 jfenwick 6066 operation=SIGNF;
1113     break;
1114 jfenwick 2500 case ABS:
1115 jfenwick 6066 operation=ABSF;
1116     break;
1117 jfenwick 2500 case NEG:
1118 jfenwick 6066 operation=NEGF;
1119     break;
1120 jfenwick 2500 case POS:
1121 caltinay 5972 // it doesn't mean anything for delayed.
1122     // it will just trigger a deep copy of the lazy object
1123     throw DataException("Programmer error - POS not supported for lazy data.");
1124     break;
1125 jfenwick 2500 case EXP:
1126 jfenwick 6066 operation=EXPF;
1127     break;
1128 jfenwick 2500 case SQRT:
1129 jfenwick 6066 operation=SQRTF;
1130     break;
1131 jfenwick 2500 case RECIP:
1132 jfenwick 6066 operation=INVF;
1133     break;
1134 jfenwick 2500 case GZ:
1135 jfenwick 6066 operation=GTZEROF;
1136     break;
1137 jfenwick 2500 case LZ:
1138 jfenwick 6066 operation=LTZEROF;
1139     break;
1140 jfenwick 2500 case GEZ:
1141 jfenwick 6066 operation=GEZEROF;
1142     break;
1143 jfenwick 2500 case LEZ:
1144 jfenwick 6066 operation=LEZEROF;
1145     break;
1146 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1147     case NEZ:
1148 jfenwick 6066 operation=NEQZEROF;
1149     break;
1150 jfenwick 2500 case EZ:
1151 jfenwick 6066 operation=EQZEROF;
1152     break;
1153 jfenwick 2500 default:
1154 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1155 jfenwick 2500 }
1156 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1157     left,
1158     result,
1159     operation,
1160     m_tol);
1161 jfenwick 2500 return &(m_samples);
1162     }
1163    
1164    
1165 jfenwick 5938 const DataTypes::RealVectorType*
1166 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1167 jfenwick 2721 {
1168 caltinay 5972 // we assume that any collapsing has been done before we get here
1169     // since we only have one argument we don't need to think about only
1170     // processing single points.
1171     // we will also know we won't get identity nodes
1172 jfenwick 2721 if (m_readytype!='E')
1173     {
1174     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1175     }
1176     if (m_op==IDENTITY)
1177     {
1178     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1179     }
1180     size_t loffset=0;
1181 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1182 jfenwick 2721
1183     roffset=m_samplesize*tid;
1184 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1185 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1186 jfenwick 2721 double* result=&(m_samples[roffset]);
1187     switch (m_op)
1188     {
1189     case MINVAL:
1190 caltinay 5972 {
1191     for (unsigned int z=0;z<ndpps;++z)
1192     {
1193     FMin op;
1194     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1195     loffset+=psize;
1196     result++;
1197     }
1198     }
1199     break;
1200 jfenwick 2721 case MAXVAL:
1201 caltinay 5972 {
1202     for (unsigned int z=0;z<ndpps;++z)
1203     {
1204     FMax op;
1205     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1206     loffset+=psize;
1207     result++;
1208     }
1209     }
1210     break;
1211 jfenwick 2721 default:
1212 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1213 jfenwick 2721 }
1214     return &(m_samples);
1215     }
1216    
1217 jfenwick 5938 const DataTypes::RealVectorType*
1218 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1219 jfenwick 2500 {
1220 caltinay 5972 // we assume that any collapsing has been done before we get here
1221     // since we only have one argument we don't need to think about only
1222     // processing single points.
1223 jfenwick 2500 if (m_readytype!='E')
1224     {
1225     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1226     }
1227     if (m_op==IDENTITY)
1228     {
1229     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1230     }
1231     size_t subroffset;
1232 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233 jfenwick 2500 roffset=m_samplesize*tid;
1234     size_t loop=0;
1235     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1236     size_t step=getNoValues();
1237     size_t offset=roffset;
1238     switch (m_op)
1239     {
1240     case SYM:
1241 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1242     {
1243     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1244     subroffset+=step;
1245     offset+=step;
1246     }
1247     break;
1248 jfenwick 2500 case NSYM:
1249 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1250     {
1251     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1252     subroffset+=step;
1253     offset+=step;
1254     }
1255     break;
1256 jfenwick 2500 default:
1257 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1258 jfenwick 2500 }
1259     return &m_samples;
1260     }
1261    
1262 jfenwick 5938 const DataTypes::RealVectorType*
1263 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1264 jfenwick 2500 {
1265 caltinay 5972 // we assume that any collapsing has been done before we get here
1266     // since we only have one argument we don't need to think about only
1267     // processing single points.
1268 jfenwick 2500 if (m_readytype!='E')
1269     {
1270     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1271     }
1272     if (m_op==IDENTITY)
1273     {
1274     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1275     }
1276     size_t subroffset;
1277     size_t offset;
1278 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1279 jfenwick 2500 roffset=m_samplesize*tid;
1280     offset=roffset;
1281     size_t loop=0;
1282     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1283     size_t outstep=getNoValues();
1284     size_t instep=m_left->getNoValues();
1285     switch (m_op)
1286     {
1287     case TRACE:
1288 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1289     {
1290 jfenwick 2500 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1291 caltinay 5972 subroffset+=instep;
1292     offset+=outstep;
1293     }
1294     break;
1295 jfenwick 2500 case TRANS:
1296 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1297     {
1298 jfenwick 2500 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1299 caltinay 5972 subroffset+=instep;
1300     offset+=outstep;
1301     }
1302     break;
1303 jfenwick 2500 default:
1304 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1305 jfenwick 2500 }
1306     return &m_samples;
1307     }
1308    
1309    
1310 jfenwick 5938 const DataTypes::RealVectorType*
1311 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1312 jfenwick 2500 {
1313     if (m_readytype!='E')
1314     {
1315     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1316     }
1317     if (m_op==IDENTITY)
1318     {
1319     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1320     }
1321     size_t subroffset;
1322     size_t offset;
1323 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1324 jfenwick 2500 roffset=m_samplesize*tid;
1325     offset=roffset;
1326     size_t loop=0;
1327     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1328     size_t outstep=getNoValues();
1329     size_t instep=m_left->getNoValues();
1330     switch (m_op)
1331     {
1332     case SWAP:
1333 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1334     {
1335 jfenwick 2500 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1336 caltinay 5972 subroffset+=instep;
1337     offset+=outstep;
1338     }
1339     break;
1340 jfenwick 2500 default:
1341 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342 jfenwick 2500 }
1343     return &m_samples;
1344     }
1345    
1346 jfenwick 5938 const DataTypes::RealVectorType*
1347 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1348 jfenwick 3035 {
1349     if (m_readytype!='E')
1350     {
1351     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1352     }
1353     if (m_op!=CONDEVAL)
1354     {
1355     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1356     }
1357     size_t subroffset;
1358 jfenwick 2500
1359 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1360     const RealVectorType* srcres=0;
1361 jfenwick 3035 if ((*maskres)[subroffset]>0)
1362     {
1363 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1364 jfenwick 3035 }
1365     else
1366     {
1367 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1368 jfenwick 3035 }
1369    
1370     // Now we need to copy the result
1371    
1372     roffset=m_samplesize*tid;
1373     for (int i=0;i<m_samplesize;++i)
1374     {
1375 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1376 jfenwick 3035 }
1377    
1378     return &m_samples;
1379     }
1380    
1381 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1382     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1383     // If both children are expanded, then we can process them in a single operation (we treat
1384     // the whole sample as one big datapoint.
1385     // If one of the children is not expanded, then we need to treat each point in the sample
1386     // individually.
1387     // There is an additional complication when scalar operations are considered.
1388     // For example, 2+Vector.
1389     // In this case each double within the point is treated individually
1390 jfenwick 5938 const DataTypes::RealVectorType*
1391 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1392 jfenwick 2500 {
1393     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1394    
1395 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1396     // first work out which of the children are expanded
1397 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1398     bool rightExp=(m_right->m_readytype=='E');
1399     if (!leftExp && !rightExp)
1400     {
1401 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1402 jfenwick 2500 }
1403     bool leftScalar=(m_left->getRank()==0);
1404     bool rightScalar=(m_right->getRank()==0);
1405     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1406     {
1407 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1408 jfenwick 2500 }
1409     size_t leftsize=m_left->getNoValues();
1410     size_t rightsize=m_right->getNoValues();
1411 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1412     int leftstep=0; // how far should the left offset advance after each step
1413 jfenwick 2500 int rightstep=0;
1414 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1415     int oleftstep=0; // the o variables refer to the outer loop
1416     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1417 jfenwick 2500 int onumsteps=1;
1418    
1419 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1420 jfenwick 2500 bool RES=(rightExp && rightScalar);
1421 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1422 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1423 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1424 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1425 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1426 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1427    
1428 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1429 jfenwick 2500 {
1430 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1431     leftstep=0;
1432     rightstep=0;
1433     numsteps=1;
1434 jfenwick 2500 }
1435     else if (LES || RES)
1436     {
1437 caltinay 5972 chunksize=1;
1438     if (LES) // left is an expanded scalar
1439     {
1440     if (RS)
1441     {
1442     leftstep=1;
1443     rightstep=0;
1444     numsteps=m_left->getNumDPPSample();
1445     }
1446     else // RN or REN
1447     {
1448     leftstep=0;
1449     oleftstep=1;
1450     rightstep=1;
1451     orightstep=(RN ? -(int)rightsize : 0);
1452     numsteps=rightsize;
1453     onumsteps=m_left->getNumDPPSample();
1454     }
1455     }
1456     else // right is an expanded scalar
1457     {
1458     if (LS)
1459     {
1460     rightstep=1;
1461     leftstep=0;
1462     numsteps=m_right->getNumDPPSample();
1463     }
1464     else
1465     {
1466     rightstep=0;
1467     orightstep=1;
1468     leftstep=1;
1469     oleftstep=(LN ? -(int)leftsize : 0);
1470     numsteps=leftsize;
1471     onumsteps=m_right->getNumDPPSample();
1472     }
1473     }
1474 jfenwick 2500 }
1475 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1476 jfenwick 2500 {
1477 caltinay 5972 if (LEN) // and Right will be a single value
1478     {
1479     chunksize=rightsize;
1480     leftstep=rightsize;
1481     rightstep=0;
1482     numsteps=m_left->getNumDPPSample();
1483     if (RS)
1484     {
1485     numsteps*=leftsize;
1486     }
1487     }
1488     else // REN
1489     {
1490     chunksize=leftsize;
1491     rightstep=leftsize;
1492     leftstep=0;
1493     numsteps=m_right->getNumDPPSample();
1494     if (LS)
1495     {
1496     numsteps*=rightsize;
1497     }
1498     }
1499 jfenwick 2500 }
1500    
1501 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1502     // Get the values of sub-expressions
1503 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1504     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1505 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1506     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1507     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1508     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1509     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1510     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1511     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1512    
1513     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1514     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1515    
1516    
1517     roffset=m_samplesize*tid;
1518 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1519 jfenwick 2500 switch(m_op)
1520     {
1521     case ADD:
1522 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1523     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1524     &(*left)[0],
1525     &(*right)[0],
1526     chunksize,
1527     onumsteps,
1528     numsteps,
1529     resultStep,
1530     leftstep,
1531     rightstep,
1532     oleftstep,
1533     orightstep,
1534 jfenwick 6056 lroffset,
1535     rroffset,
1536 jfenwick 6042 escript::ESFunction::PLUSF);
1537 caltinay 5972 break;
1538 jfenwick 2500 case SUB:
1539 jfenwick 6057 DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1540     &(*left)[0],
1541     &(*right)[0],
1542     chunksize,
1543     onumsteps,
1544     numsteps,
1545     resultStep,
1546     leftstep,
1547     rightstep,
1548     oleftstep,
1549     orightstep,
1550     lroffset,
1551     rroffset,
1552     escript::ESFunction::MINUSF);
1553     //PROC_OP(NO_ARG,minus<double>());
1554 caltinay 5972 break;
1555 jfenwick 2500 case MUL:
1556 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1557     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1558     &(*left)[0],
1559     &(*right)[0],
1560     chunksize,
1561     onumsteps,
1562     numsteps,
1563     resultStep,
1564     leftstep,
1565     rightstep,
1566     oleftstep,
1567     orightstep,
1568     lroffset,
1569     rroffset,
1570     escript::ESFunction::MULTIPLIESF);
1571 caltinay 5972 break;
1572 jfenwick 2500 case DIV:
1573 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1574     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1575     &(*left)[0],
1576     &(*right)[0],
1577     chunksize,
1578     onumsteps,
1579     numsteps,
1580     resultStep,
1581     leftstep,
1582     rightstep,
1583     oleftstep,
1584     orightstep,
1585     lroffset,
1586     rroffset,
1587     escript::ESFunction::DIVIDESF);
1588 caltinay 5972 break;
1589 jfenwick 2500 case POW:
1590 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1591     DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1592     &(*left)[0],
1593     &(*right)[0],
1594     chunksize,
1595     onumsteps,
1596     numsteps,
1597     resultStep,
1598     leftstep,
1599     rightstep,
1600     oleftstep,
1601     orightstep,
1602     lroffset,
1603     rroffset,
1604     escript::ESFunction::POWF);
1605 caltinay 5972 break;
1606 jfenwick 2500 default:
1607 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1608 jfenwick 2500 }
1609     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1610     return &m_samples;
1611     }
1612    
1613    
1614     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1615     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1616     // unlike the other resolve helpers, we must treat these datapoints separately.
1617 jfenwick 5938 const DataTypes::RealVectorType*
1618 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1619 jfenwick 2500 {
1620     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1621    
1622 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1623     // first work out which of the children are expanded
1624 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1625     bool rightExp=(m_right->m_readytype=='E');
1626     int steps=getNumDPPSample();
1627 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1628 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1629    
1630     int resultStep=getNoValues();
1631     roffset=m_samplesize*tid;
1632     size_t offset=roffset;
1633    
1634 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1635 jfenwick 2500
1636 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1637 jfenwick 2500
1638     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1639     cout << getNoValues() << endl;)
1640    
1641    
1642     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1643     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1644     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1645     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1646     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1647     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1648     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1649    
1650 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1651 jfenwick 2500 switch(m_op)
1652     {
1653     case PROD:
1654 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1655     {
1656     const double *ptr_0 = &((*left)[lroffset]);
1657     const double *ptr_1 = &((*right)[rroffset]);
1658 jfenwick 2500
1659     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1660     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1661    
1662 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1663 jfenwick 2500
1664 caltinay 5972 lroffset+=leftStep;
1665     rroffset+=rightStep;
1666     }
1667     break;
1668 jfenwick 2500 default:
1669 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1670 jfenwick 2500 }
1671     roffset=offset;
1672     return &m_samples;
1673     }
1674    
1675 jfenwick 1898
1676 jfenwick 5938 const DataTypes::RealVectorType*
1677 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1678 jfenwick 1879 {
1679 jfenwick 2271 #ifdef _OPENMP
1680 caltinay 5972 int tid=omp_get_thread_num();
1681 jfenwick 2271 #else
1682 caltinay 5972 int tid=0;
1683 jfenwick 2271 #endif
1684 jfenwick 2777
1685     #ifdef LAZY_STACK_PROF
1686 caltinay 5972 stackstart[tid]=&tid;
1687     stackend[tid]=&tid;
1688     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1689     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1690     #pragma omp critical
1691     if (d>maxstackuse)
1692     {
1693 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1694 caltinay 5972 maxstackuse=d;
1695     }
1696     return r;
1697 jfenwick 2777 #else
1698 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1699 jfenwick 2777 #endif
1700 jfenwick 2271 }
1701    
1702    
1703 jfenwick 2497 // This needs to do the work of the identity constructor
1704 jfenwick 2177 void
1705     DataLazy::resolveToIdentity()
1706     {
1707     if (m_op==IDENTITY)
1708 caltinay 5972 return;
1709 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1710 jfenwick 2177 makeIdentity(p);
1711     }
1712 jfenwick 1889
1713 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1714     {
1715     m_op=IDENTITY;
1716     m_axis_offset=0;
1717     m_transpose=0;
1718     m_SL=m_SM=m_SR=0;
1719     m_children=m_height=0;
1720     m_id=p;
1721     if(p->isConstant()) {m_readytype='C';}
1722     else if(p->isExpanded()) {m_readytype='E';}
1723     else if (p->isTagged()) {m_readytype='T';}
1724     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1725     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1726     m_left.reset();
1727     m_right.reset();
1728     }
1729    
1730 jfenwick 2497
1731     DataReady_ptr
1732     DataLazy::resolve()
1733     {
1734     resolveToIdentity();
1735     return m_id;
1736     }
1737    
1738 jfenwick 2799
1739     /* This is really a static method but I think that caused problems in windows */
1740     void
1741     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1742     {
1743     if (dats.empty())
1744     {
1745 caltinay 5972 return;
1746 jfenwick 2799 }
1747     vector<DataLazy*> work;
1748     FunctionSpace fs=dats[0]->getFunctionSpace();
1749     bool match=true;
1750 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1751 jfenwick 2799 {
1752 caltinay 5972 if (dats[i]->m_readytype!='E')
1753     {
1754     dats[i]->collapse();
1755     }
1756     if (dats[i]->m_op!=IDENTITY)
1757     {
1758     work.push_back(dats[i]);
1759     if (fs!=dats[i]->getFunctionSpace())
1760     {
1761     match=false;
1762     }
1763     }
1764 jfenwick 2799 }
1765     if (work.empty())
1766     {
1767 caltinay 5972 return; // no work to do
1768 jfenwick 2799 }
1769 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1770     { // it is possible that dats[0] is one of the objects which we discarded and
1771     // all the other functionspaces match.
1772     vector<DataExpanded*> dep;
1773 jfenwick 6042 vector<RealVectorType*> vecs;
1774 caltinay 5972 for (int i=0;i<work.size();++i)
1775     {
1776 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1777 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1778     }
1779     int totalsamples=work[0]->getNumSamples();
1780 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1781 caltinay 5972 int sample;
1782     #pragma omp parallel private(sample, res)
1783     {
1784     size_t roffset=0;
1785     #pragma omp for schedule(static)
1786     for (sample=0;sample<totalsamples;++sample)
1787     {
1788     roffset=0;
1789     int j;
1790     for (j=work.size()-1;j>=0;--j)
1791     {
1792 jfenwick 2799 #ifdef _OPENMP
1793 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1794 jfenwick 2799 #else
1795 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1796 jfenwick 2799 #endif
1797 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1798     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1799     }
1800     }
1801     }
1802     // Now we need to load the new results as identity ops into the lazy nodes
1803     for (int i=work.size()-1;i>=0;--i)
1804     {
1805 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1806 caltinay 5972 }
1807 jfenwick 2799 }
1808 caltinay 5972 else // functionspaces do not match
1809 jfenwick 2799 {
1810 caltinay 5972 for (int i=0;i<work.size();++i)
1811     {
1812     work[i]->resolveToIdentity();
1813     }
1814 jfenwick 2799 }
1815     }
1816    
1817    
1818    
1819 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1820     DataReady_ptr
1821     DataLazy::resolveNodeWorker()
1822     {
1823 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1824 jfenwick 2500 {
1825     collapse();
1826     }
1827 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1828 jfenwick 2500 {
1829     return m_id;
1830     }
1831 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1832 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1833     RealVectorType& resvec=result->getVectorRW();
1834 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1835    
1836     int sample;
1837     int totalsamples=getNumSamples();
1838 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1839 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1840 jfenwick 2777 #pragma omp parallel private(sample,res)
1841 jfenwick 2500 {
1842 caltinay 5972 size_t roffset=0;
1843 jfenwick 2777 #ifdef LAZY_STACK_PROF
1844 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1845     stackend[omp_get_thread_num()]=&roffset;
1846 jfenwick 2777 #endif
1847 caltinay 5972 #pragma omp for schedule(static)
1848     for (sample=0;sample<totalsamples;++sample)
1849     {
1850     roffset=0;
1851 jfenwick 2500 #ifdef _OPENMP
1852 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1853 jfenwick 2500 #else
1854 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1855 jfenwick 2500 #endif
1856     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1857     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1858 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1859     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1860     }
1861 jfenwick 2500 }
1862 jfenwick 2777 #ifdef LAZY_STACK_PROF
1863     for (int i=0;i<getNumberOfThreads();++i)
1864     {
1865 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1866     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1867     if (r>maxstackuse)
1868     {
1869     maxstackuse=r;
1870     }
1871 jfenwick 2777 }
1872     cout << "Max resolve Stack use=" << maxstackuse << endl;
1873     #endif
1874 jfenwick 2500 return resptr;
1875     }
1876    
1877 jfenwick 1865 std::string
1878     DataLazy::toString() const
1879     {
1880 jfenwick 1886 ostringstream oss;
1881 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1882 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1883 jfenwick 2737 {
1884 caltinay 5972 case 1: // tree format
1885     oss << endl;
1886     intoTreeString(oss,"");
1887     break;
1888     case 2: // just the depth
1889     break;
1890 jfenwick 2795 default:
1891 caltinay 5972 intoString(oss);
1892     break;
1893 jfenwick 2737 }
1894 jfenwick 1886 return oss.str();
1895 jfenwick 1865 }
1896    
1897 jfenwick 1899
1898 jfenwick 1886 void
1899     DataLazy::intoString(ostringstream& oss) const
1900     {
1901 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1902 jfenwick 1886 switch (getOpgroup(m_op))
1903     {
1904 jfenwick 1889 case G_IDENTITY:
1905 caltinay 5972 if (m_id->isExpanded())
1906     {
1907     oss << "E";
1908     }
1909     else if (m_id->isTagged())
1910     {
1911     oss << "T";
1912     }
1913     else if (m_id->isConstant())
1914     {
1915     oss << "C";
1916     }
1917     else
1918     {
1919     oss << "?";
1920     }
1921     oss << '@' << m_id.get();
1922     break;
1923 jfenwick 1886 case G_BINARY:
1924 caltinay 5972 oss << '(';
1925     m_left->intoString(oss);
1926     oss << ' ' << opToString(m_op) << ' ';
1927     m_right->intoString(oss);
1928     oss << ')';
1929     break;
1930 jfenwick 1886 case G_UNARY:
1931 jfenwick 2147 case G_UNARY_P:
1932 jfenwick 2037 case G_NP1OUT:
1933 jfenwick 2084 case G_NP1OUT_P:
1934 jfenwick 2721 case G_REDUCTION:
1935 caltinay 5972 oss << opToString(m_op) << '(';
1936     m_left->intoString(oss);
1937     oss << ')';
1938     break;
1939 jfenwick 2066 case G_TENSORPROD:
1940 caltinay 5972 oss << opToString(m_op) << '(';
1941     m_left->intoString(oss);
1942     oss << ", ";
1943     m_right->intoString(oss);
1944     oss << ')';
1945     break;
1946 jfenwick 2496 case G_NP1OUT_2P:
1947 caltinay 5972 oss << opToString(m_op) << '(';
1948     m_left->intoString(oss);
1949     oss << ", " << m_axis_offset << ", " << m_transpose;
1950     oss << ')';
1951     break;
1952 jfenwick 3035 case G_CONDEVAL:
1953 caltinay 5972 oss << opToString(m_op)<< '(' ;
1954     m_mask->intoString(oss);
1955     oss << " ? ";
1956     m_left->intoString(oss);
1957     oss << " : ";
1958     m_right->intoString(oss);
1959     oss << ')';
1960     break;
1961 jfenwick 1886 default:
1962 caltinay 5972 oss << "UNKNOWN";
1963 jfenwick 1886 }
1964     }
1965    
1966 jfenwick 2737
1967     void
1968     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1969     {
1970     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1971     switch (getOpgroup(m_op))
1972     {
1973     case G_IDENTITY:
1974 caltinay 5972 if (m_id->isExpanded())
1975     {
1976     oss << "E";
1977     }
1978     else if (m_id->isTagged())
1979     {
1980     oss << "T";
1981     }
1982     else if (m_id->isConstant())
1983     {
1984     oss << "C";
1985     }
1986     else
1987     {
1988     oss << "?";
1989     }
1990     oss << '@' << m_id.get() << endl;
1991     break;
1992 jfenwick 2737 case G_BINARY:
1993 caltinay 5972 oss << opToString(m_op) << endl;
1994     indent+='.';
1995     m_left->intoTreeString(oss, indent);
1996     m_right->intoTreeString(oss, indent);
1997     break;
1998 jfenwick 2737 case G_UNARY:
1999     case G_UNARY_P:
2000     case G_NP1OUT:
2001     case G_NP1OUT_P:
2002     case G_REDUCTION:
2003 caltinay 5972 oss << opToString(m_op) << endl;
2004     indent+='.';
2005     m_left->intoTreeString(oss, indent);
2006     break;
2007 jfenwick 2737 case G_TENSORPROD:
2008 caltinay 5972 oss << opToString(m_op) << endl;
2009     indent+='.';
2010     m_left->intoTreeString(oss, indent);
2011     m_right->intoTreeString(oss, indent);
2012     break;
2013 jfenwick 2737 case G_NP1OUT_2P:
2014 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2015     indent+='.';
2016     m_left->intoTreeString(oss, indent);
2017     break;
2018 jfenwick 2737 default:
2019 caltinay 5972 oss << "UNKNOWN";
2020 jfenwick 2737 }
2021     }
2022    
2023    
2024 jfenwick 1865 DataAbstract*
2025 jfenwick 5938 DataLazy::deepCopy() const
2026 jfenwick 1865 {
2027 jfenwick 1935 switch (getOpgroup(m_op))
2028 jfenwick 1865 {
2029 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
2030 caltinay 5972 case G_UNARY:
2031 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2032 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2033     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2034 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2035     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2036 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2037     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2038 jfenwick 1935 default:
2039 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2040 jfenwick 1865 }
2041     }
2042    
2043    
2044 jfenwick 2721
2045 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2046     // Instances of DataReady can look at the size of their vectors.
2047     // For lazy though, it could be the size the data would be if it were resolved;
2048     // or it could be some function of the lengths of the DataReady instances which
2049     // form part of the expression.
2050     // Rather than have people making assumptions, I have disabled the method.
2051 jfenwick 5938 DataTypes::RealVectorType::size_type
2052 jfenwick 1865 DataLazy::getLength() const
2053     {
2054 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2055 jfenwick 1865 }
2056    
2057    
2058     DataAbstract*
2059     DataLazy::getSlice(const DataTypes::RegionType& region) const
2060     {
2061 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2062 jfenwick 1865 }
2063    
2064 jfenwick 1935
2065     // To do this we need to rely on our child nodes
2066 jfenwick 5938 DataTypes::RealVectorType::size_type
2067 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2068 jfenwick 1935 int dataPointNo)
2069     {
2070     if (m_op==IDENTITY)
2071     {
2072 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2073 jfenwick 1935 }
2074     if (m_readytype!='E')
2075     {
2076 caltinay 5972 collapse();
2077     return m_id->getPointOffset(sampleNo,dataPointNo);
2078 jfenwick 1935 }
2079     // at this point we do not have an identity node and the expression will be Expanded
2080     // so we only need to know which child to ask
2081     if (m_left->m_readytype=='E')
2082     {
2083 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2084 jfenwick 1935 }
2085     else
2086     {
2087 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2088 jfenwick 1935 }
2089     }
2090    
2091     // To do this we need to rely on our child nodes
2092 jfenwick 5938 DataTypes::RealVectorType::size_type
2093 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2094 jfenwick 1865 int dataPointNo) const
2095     {
2096 jfenwick 1935 if (m_op==IDENTITY)
2097     {
2098 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2099 jfenwick 1935 }
2100     if (m_readytype=='E')
2101     {
2102     // at this point we do not have an identity node and the expression will be Expanded
2103     // so we only need to know which child to ask
2104     if (m_left->m_readytype=='E')
2105     {
2106 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2107 jfenwick 1935 }
2108     else
2109     {
2110 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2111 jfenwick 1935 }
2112     }
2113     if (m_readytype=='C')
2114     {
2115 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2116 jfenwick 1935 }
2117     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2118 jfenwick 1865 }
2119    
2120 jfenwick 2271
2121     // I have decided to let Data:: handle this issue.
2122 jfenwick 1901 void
2123     DataLazy::setToZero()
2124     {
2125 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2126 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2127     // m_op=IDENTITY;
2128     // m_right.reset();
2129     // m_left.reset();
2130     // m_readytype='C';
2131     // m_buffsRequired=1;
2132    
2133 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2134 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2135 jfenwick 1901 }
2136    
2137 jfenwick 2271 bool
2138     DataLazy::actsExpanded() const
2139     {
2140 caltinay 5972 return (m_readytype=='E');
2141 jfenwick 2271 }
2142    
2143 caltinay 5997 } // end namespace
2144 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26