/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6114 - (hide annotations)
Fri Apr 1 03:37:52 2016 UTC (2 years, 11 months ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 66372 byte(s)
symmetric and antisymmetric now support complex

Also a number of methods are renamed from nonsymmetric

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 3317 #ifdef USE_NETCDF
26     #include <netcdfcpp.h>
27     #endif
28    
29 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
30 jfenwick 2199
31 jfenwick 5938 using namespace escript::DataTypes;
32    
33 caltinay 5972 #define NO_ARG
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48 jfenwick 2472
49 jfenwick 2501
50 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52 jfenwick 1899 /*
53     How does DataLazy work?
54     ~~~~~~~~~~~~~~~~~~~~~~~
55    
56     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57     denoted left and right.
58    
59     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60     This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63 caltinay 4286 Note that IDENTITY is not considered a unary operation.
64 jfenwick 1899
65     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66     It must however form a DAG (directed acyclic graph).
67     I will refer to individual DataLazy objects with the structure as nodes.
68    
69     Each node also stores:
70     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71 caltinay 5972 evaluated.
72 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73 caltinay 5972 evaluate the expression.
74 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
75    
76     When a new node is created, the above values are computed based on the values in the child nodes.
77     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79     The resolve method, which produces a DataReady from a DataLazy, does the following:
80     1) Create a DataReady to hold the new result.
81     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86     resolveSample returns a Vector* and an offset within that vector where the result is stored.
87     Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93 jfenwick 2037
94 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 jfenwick 2037 1) Add to the ES_optype.
96     2) determine what opgroup your operation belongs to (X)
97     3) add a string for the op to the end of ES_opstrings
98     4) increase ES_opcount
99     5) add an entry (X) to opgroups
100     6) add an entry to the switch in collapseToReady
101     7) add an entry to resolveX
102 jfenwick 1899 */
103    
104    
105 jfenwick 1865 using namespace std;
106 jfenwick 1868 using namespace boost;
107 jfenwick 1865
108     namespace escript
109     {
110    
111     namespace
112     {
113    
114 jfenwick 2777
115     // enabling this will print out when ever the maximum stacksize used by resolve increases
116     // it assumes _OPENMP is also in use
117     //#define LAZY_STACK_PROF
118    
119    
120    
121     #ifndef _OPENMP
122     #ifdef LAZY_STACK_PROF
123     #undef LAZY_STACK_PROF
124     #endif
125     #endif
126    
127    
128     #ifdef LAZY_STACK_PROF
129     std::vector<void*> stackstart(getNumberOfThreads());
130     std::vector<void*> stackend(getNumberOfThreads());
131     size_t maxstackuse=0;
132     #endif
133    
134 jfenwick 1886 enum ES_opgroup
135     {
136     G_UNKNOWN,
137     G_IDENTITY,
138 caltinay 5972 G_BINARY, // pointwise operations with two arguments
139     G_UNARY, // pointwise operations with one argument
140     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT, // non-pointwise op with one output
142     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD, // general tensor product
144     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145     G_REDUCTION, // non-pointwise unary op with a scalar output
146 jfenwick 3035 G_CONDEVAL
147 jfenwick 1886 };
148    
149    
150    
151    
152 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153 caltinay 5972 "sin","cos","tan",
154     "asin","acos","atan","sinh","cosh","tanh","erf",
155     "asinh","acosh","atanh",
156     "log10","log","sign","abs","neg","pos","exp","sqrt",
157     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158 jfenwick 6114 "symmetric","antisymmetric",
159 caltinay 5972 "prod",
160     "transpose", "trace",
161     "swapaxes",
162     "minval", "maxval",
163     "condEval"};
164 jfenwick 3035 int ES_opcount=44;
165 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
167     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
168     G_UNARY,G_UNARY,G_UNARY, // 20
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
171     G_NP1OUT,G_NP1OUT,
172     G_TENSORPROD,
173     G_NP1OUT_P, G_NP1OUT_P,
174     G_NP1OUT_2P,
175     G_REDUCTION, G_REDUCTION,
176     G_CONDEVAL};
177 jfenwick 1886 inline
178     ES_opgroup
179     getOpgroup(ES_optype op)
180     {
181     return opgroups[op];
182     }
183    
184 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
185     FunctionSpace
186     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 caltinay 5972 // perhaps this should call interpolate and throw or something?
189     // maybe we need an interpolate node -
190     // that way, if interpolate is required in any other op we can just throw a
191     // programming error exception.
192 jfenwick 1879
193 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
194     FunctionSpace r=right->getFunctionSpace();
195     if (l!=r)
196     {
197 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198     if (res==1)
199 jfenwick 1943 {
200 caltinay 5972 return l;
201 jfenwick 1943 }
202 jfenwick 4264 if (res==-1)
203 jfenwick 1943 {
204 caltinay 5972 return r;
205 jfenwick 1943 }
206     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207     }
208     return l;
209 jfenwick 1865 }
210    
211     // return the shape of the result of "left op right"
212 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213 jfenwick 1865 DataTypes::ShapeType
214     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215     {
216 caltinay 5972 if (left->getShape()!=right->getShape())
217     {
218     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219     {
220     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221     }
222 jfenwick 2721
223 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
224     {
225     return right->getShape();
226     }
227     if (right->getRank()==0)
228     {
229     return left->getShape();
230     }
231     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232     }
233     return left->getShape();
234 jfenwick 1865 }
235    
236 jfenwick 2084 // return the shape for "op left"
237    
238     DataTypes::ShapeType
239 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240 jfenwick 2084 {
241 caltinay 5972 switch(op)
242     {
243     case TRANS:
244     { // for the scoping of variables
245     const DataTypes::ShapeType& s=left->getShape();
246     DataTypes::ShapeType sh;
247     int rank=left->getRank();
248     if (axis_offset<0 || axis_offset>rank)
249     {
250 caltinay 2779 stringstream e;
251     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252     throw DataException(e.str());
253 caltinay 5972 }
254     for (int i=0; i<rank; i++)
255     {
256     int index = (axis_offset+i)%rank;
257     sh.push_back(s[index]); // Append to new shape
258     }
259     return sh;
260     }
261     break;
262     case TRACE:
263     {
264     int rank=left->getRank();
265     if (rank<2)
266     {
267     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268     }
269     if ((axis_offset>rank-2) || (axis_offset<0))
270     {
271     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272     }
273     if (rank==2)
274     {
275     return DataTypes::scalarShape;
276     }
277     else if (rank==3)
278     {
279     DataTypes::ShapeType sh;
280     if (axis_offset==0)
281     {
282     sh.push_back(left->getShape()[2]);
283     }
284     else // offset==1
285     {
286     sh.push_back(left->getShape()[0]);
287     }
288     return sh;
289     }
290     else if (rank==4)
291     {
292     DataTypes::ShapeType sh;
293     const DataTypes::ShapeType& s=left->getShape();
294     if (axis_offset==0)
295     {
296     sh.push_back(s[2]);
297     sh.push_back(s[3]);
298     }
299     else if (axis_offset==1)
300     {
301     sh.push_back(s[0]);
302     sh.push_back(s[3]);
303     }
304     else // offset==2
305     {
306     sh.push_back(s[0]);
307     sh.push_back(s[1]);
308     }
309     return sh;
310     }
311     else // unknown rank
312     {
313     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314     }
315     }
316     break;
317     default:
318     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319     }
320 jfenwick 2084 }
321    
322 jfenwick 2496 DataTypes::ShapeType
323     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324     {
325     // This code taken from the Data.cpp swapaxes() method
326     // Some of the checks are probably redundant here
327     int axis0_tmp,axis1_tmp;
328     const DataTypes::ShapeType& s=left->getShape();
329     DataTypes::ShapeType out_shape;
330     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     int rank=left->getRank();
333     if (rank<2) {
334     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335     }
336     if (axis0<0 || axis0>rank-1) {
337 caltinay 2779 stringstream e;
338     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339     throw DataException(e.str());
340 jfenwick 2496 }
341     if (axis1<0 || axis1>rank-1) {
342 caltinay 2782 stringstream e;
343 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344     throw DataException(e.str());
345 jfenwick 2496 }
346     if (axis0 == axis1) {
347     throw DataException("Error - Data::swapaxes: axis indices must be different.");
348     }
349     if (axis0 > axis1) {
350     axis0_tmp=axis1;
351     axis1_tmp=axis0;
352     } else {
353     axis0_tmp=axis0;
354     axis1_tmp=axis1;
355     }
356     for (int i=0; i<rank; i++) {
357     if (i == axis0_tmp) {
358     out_shape.push_back(s[axis1_tmp]);
359     } else if (i == axis1_tmp) {
360     out_shape.push_back(s[axis0_tmp]);
361     } else {
362     out_shape.push_back(s[i]);
363     }
364     }
365     return out_shape;
366     }
367    
368    
369 jfenwick 2066 // determine the output shape for the general tensor product operation
370     // the additional parameters return information required later for the product
371     // the majority of this code is copy pasted from C_General_Tensor_Product
372     DataTypes::ShapeType
373     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374 jfenwick 1865 {
375 caltinay 5972
376 jfenwick 2066 // Get rank and shape of inputs
377     int rank0 = left->getRank();
378     int rank1 = right->getRank();
379     const DataTypes::ShapeType& shape0 = left->getShape();
380     const DataTypes::ShapeType& shape1 = right->getShape();
381    
382     // Prepare for the loops of the product and verify compatibility of shapes
383     int start0=0, start1=0;
384 caltinay 5972 if (transpose == 0) {}
385     else if (transpose == 1) { start0 = axis_offset; }
386     else if (transpose == 2) { start1 = rank1-axis_offset; }
387     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388 jfenwick 2066
389 jfenwick 2085 if (rank0<axis_offset)
390     {
391 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392 jfenwick 2085 }
393 jfenwick 2066
394     // Adjust the shapes for transpose
395 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
396     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
397     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399 jfenwick 2066
400     // Prepare for the loops of the product
401     SL=1, SM=1, SR=1;
402 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
403 jfenwick 2066 SL *= tmpShape0[i];
404     }
405 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
406 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408     }
409     SM *= tmpShape0[i];
410     }
411 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
412 jfenwick 2066 SR *= tmpShape1[i];
413     }
414    
415     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
417     { // block to limit the scope of out_index
418 jfenwick 2066 int out_index=0;
419     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421     }
422 jfenwick 2086
423     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424     {
425     ostringstream os;
426     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427     throw DataException(os.str());
428     }
429    
430 jfenwick 2066 return shape2;
431 jfenwick 1865 }
432    
433 caltinay 5972 } // end anonymous namespace
434 jfenwick 1865
435    
436 jfenwick 1899
437     // Return a string representing the operation
438 jfenwick 1865 const std::string&
439     opToString(ES_optype op)
440     {
441     if (op<0 || op>=ES_opcount)
442     {
443     op=UNKNOWNOP;
444     }
445     return ES_opstrings[op];
446     }
447    
448 jfenwick 2500 void DataLazy::LazyNodeSetup()
449     {
450     #ifdef _OPENMP
451     int numthreads=omp_get_max_threads();
452     m_samples.resize(numthreads*m_samplesize);
453     m_sampleids=new int[numthreads];
454     for (int i=0;i<numthreads;++i)
455     {
456     m_sampleids[i]=-1;
457     }
458     #else
459     m_samples.resize(m_samplesize);
460     m_sampleids=new int[1];
461     m_sampleids[0]=-1;
462     #endif // _OPENMP
463     }
464 jfenwick 1865
465 jfenwick 2500
466     // Creates an identity node
467 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
468 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
469     ,m_sampleids(0),
470     m_samples(1)
471 jfenwick 1865 {
472 jfenwick 1879 if (p->isLazy())
473     {
474 caltinay 5972 // I don't want identity of Lazy.
475     // Question: Why would that be so bad?
476     // Answer: We assume that the child of ID is something we can call getVector on
477     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478 jfenwick 1879 }
479     else
480     {
481 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
482     makeIdentity(dr);
483 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
484 jfenwick 1879 }
485 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
486 jfenwick 1865 }
487    
488 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
489 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
490     m_op(op),
491     m_axis_offset(0),
492     m_transpose(0),
493     m_SL(0), m_SM(0), m_SR(0)
494 jfenwick 1886 {
495 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
496 jfenwick 1886 {
497 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
498 jfenwick 1886 }
499 jfenwick 2066
500 jfenwick 1886 DataLazy_ptr lleft;
501     if (!left->isLazy())
502     {
503 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
504 jfenwick 1886 }
505     else
506     {
507 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
508 jfenwick 1886 }
509 jfenwick 1889 m_readytype=lleft->m_readytype;
510 jfenwick 1886 m_left=lleft;
511     m_samplesize=getNumDPPSample()*getNoValues();
512 jfenwick 2177 m_children=m_left->m_children+1;
513     m_height=m_left->m_height+1;
514 jfenwick 2500 LazyNodeSetup();
515 jfenwick 2177 SIZELIMIT
516 jfenwick 1886 }
517    
518    
519 jfenwick 1943 // In this constructor we need to consider interpolation
520 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
521 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
522     m_op(op),
523     m_SL(0), m_SM(0), m_SR(0)
524 jfenwick 1879 {
525 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
526 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
527 jfenwick 1886 {
528 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
529 jfenwick 1886 }
530 jfenwick 1943
531 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
532 jfenwick 1943 {
533 caltinay 5972 FunctionSpace fs=getFunctionSpace();
534     Data ltemp(left);
535     Data tmp(ltemp,fs);
536     left=tmp.borrowDataPtr();
537 jfenwick 1943 }
538 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
539 jfenwick 1943 {
540 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
541     right=tmp.borrowDataPtr();
542 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
543 jfenwick 1943 }
544     left->operandCheck(*right);
545    
546 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
547 jfenwick 1879 {
548 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
549 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
550 jfenwick 1879 }
551     else
552     {
553 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
554 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
555 jfenwick 1879 }
556     if (right->isLazy())
557     {
558 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
559 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
560 jfenwick 1879 }
561     else
562     {
563 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
564 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
565 jfenwick 1879 }
566 jfenwick 1889 char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')
569     {
570 caltinay 5972 m_readytype='E';
571 jfenwick 1889 }
572     else if (lt=='T' || rt=='T')
573     {
574 caltinay 5972 m_readytype='T';
575 jfenwick 1889 }
576     else
577     {
578 caltinay 5972 m_readytype='C';
579 jfenwick 1889 }
580 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
581 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;
583 jfenwick 2500 LazyNodeSetup();
584 jfenwick 2177 SIZELIMIT
585 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
586 jfenwick 1879 }
587    
588 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
589 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
590     m_op(op),
591     m_axis_offset(axis_offset),
592     m_transpose(transpose)
593 jfenwick 2066 {
594     if ((getOpgroup(op)!=G_TENSORPROD))
595     {
596 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
597 jfenwick 2066 }
598     if ((transpose>2) || (transpose<0))
599     {
600 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
601 jfenwick 2066 }
602 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
603 jfenwick 2066 {
604 caltinay 5972 FunctionSpace fs=getFunctionSpace();
605     Data ltemp(left);
606     Data tmp(ltemp,fs);
607     left=tmp.borrowDataPtr();
608 jfenwick 2066 }
609 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
610 jfenwick 2066 {
611 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
612     right=tmp.borrowDataPtr();
613 jfenwick 2066 }
614 jfenwick 2195 // left->operandCheck(*right);
615 jfenwick 1879
616 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
617 jfenwick 2066 {
618 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
619 jfenwick 2066 }
620     else
621     {
622 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
623 jfenwick 2066 }
624     if (right->isLazy())
625     {
626 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
627 jfenwick 2066 }
628     else
629     {
630 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
631 jfenwick 2066 }
632     char lt=m_left->m_readytype;
633     char rt=m_right->m_readytype;
634     if (lt=='E' || rt=='E')
635     {
636 caltinay 5972 m_readytype='E';
637 jfenwick 2066 }
638     else if (lt=='T' || rt=='T')
639     {
640 caltinay 5972 m_readytype='T';
641 jfenwick 2066 }
642     else
643     {
644 caltinay 5972 m_readytype='C';
645 jfenwick 2066 }
646     m_samplesize=getNumDPPSample()*getNoValues();
647 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
648     m_height=max(m_left->m_height,m_right->m_height)+1;
649 jfenwick 2500 LazyNodeSetup();
650 jfenwick 2177 SIZELIMIT
651 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
652 jfenwick 2066 }
653    
654    
655 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
656 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
657     m_op(op),
658     m_axis_offset(axis_offset),
659     m_transpose(0),
660     m_tol(0)
661 jfenwick 2084 {
662     if ((getOpgroup(op)!=G_NP1OUT_P))
663     {
664 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
665 jfenwick 2084 }
666     DataLazy_ptr lleft;
667     if (!left->isLazy())
668     {
669 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
670 jfenwick 2084 }
671     else
672     {
673 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
674 jfenwick 2084 }
675     m_readytype=lleft->m_readytype;
676     m_left=lleft;
677     m_samplesize=getNumDPPSample()*getNoValues();
678 jfenwick 2177 m_children=m_left->m_children+1;
679     m_height=m_left->m_height+1;
680 jfenwick 2500 LazyNodeSetup();
681 jfenwick 2177 SIZELIMIT
682 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
683 jfenwick 2084 }
684    
685 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
686 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
687     m_op(op),
688     m_axis_offset(0),
689     m_transpose(0),
690     m_tol(tol)
691 jfenwick 2147 {
692     if ((getOpgroup(op)!=G_UNARY_P))
693     {
694 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
695 jfenwick 2147 }
696     DataLazy_ptr lleft;
697     if (!left->isLazy())
698     {
699 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
700 jfenwick 2147 }
701     else
702     {
703 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
704 jfenwick 2147 }
705     m_readytype=lleft->m_readytype;
706     m_left=lleft;
707     m_samplesize=getNumDPPSample()*getNoValues();
708 jfenwick 2177 m_children=m_left->m_children+1;
709     m_height=m_left->m_height+1;
710 jfenwick 2500 LazyNodeSetup();
711 jfenwick 2177 SIZELIMIT
712 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
713     }
714 jfenwick 2084
715 jfenwick 2496
716     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
717 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
718     m_op(op),
719     m_axis_offset(axis0),
720     m_transpose(axis1),
721     m_tol(0)
722 jfenwick 2496 {
723     if ((getOpgroup(op)!=G_NP1OUT_2P))
724     {
725 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
726 jfenwick 2496 }
727     DataLazy_ptr lleft;
728     if (!left->isLazy())
729     {
730 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
731 jfenwick 2496 }
732     else
733     {
734 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
735 jfenwick 2496 }
736     m_readytype=lleft->m_readytype;
737     m_left=lleft;
738     m_samplesize=getNumDPPSample()*getNoValues();
739     m_children=m_left->m_children+1;
740     m_height=m_left->m_height+1;
741 jfenwick 2500 LazyNodeSetup();
742 jfenwick 2496 SIZELIMIT
743     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
744     }
745    
746 jfenwick 3035
747     namespace
748     {
749    
750     inline int max3(int a, int b, int c)
751     {
752 caltinay 5972 int t=(a>b?a:b);
753     return (t>c?t:c);
754 jfenwick 3035
755     }
756     }
757    
758     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
759 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
760     m_op(CONDEVAL),
761     m_axis_offset(0),
762     m_transpose(0),
763     m_tol(0)
764 jfenwick 3035 {
765    
766     DataLazy_ptr lmask;
767     DataLazy_ptr lleft;
768     DataLazy_ptr lright;
769     if (!mask->isLazy())
770     {
771 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
772 jfenwick 3035 }
773     else
774     {
775 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
776 jfenwick 3035 }
777     if (!left->isLazy())
778     {
779 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
780 jfenwick 3035 }
781     else
782     {
783 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
784 jfenwick 3035 }
785     if (!right->isLazy())
786     {
787 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
788 jfenwick 3035 }
789     else
790     {
791 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
792 jfenwick 3035 }
793     m_readytype=lmask->m_readytype;
794     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
795     {
796 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
797 jfenwick 3035 }
798     m_left=lleft;
799     m_right=lright;
800     m_mask=lmask;
801     m_samplesize=getNumDPPSample()*getNoValues();
802     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
803     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
804     LazyNodeSetup();
805     SIZELIMIT
806     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
807     }
808    
809    
810    
811 jfenwick 1865 DataLazy::~DataLazy()
812     {
813 jfenwick 2500 delete[] m_sampleids;
814 jfenwick 1865 }
815    
816 jfenwick 1879
817 jfenwick 1899 /*
818     \brief Evaluates the expression using methods on Data.
819     This does the work for the collapse method.
820     For reasons of efficiency do not call this method on DataExpanded nodes.
821     */
822 jfenwick 1889 DataReady_ptr
823 jfenwick 4621 DataLazy::collapseToReady() const
824 jfenwick 1889 {
825     if (m_readytype=='E')
826 caltinay 5972 { // this is more an efficiency concern than anything else
827 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
828     }
829     if (m_op==IDENTITY)
830     {
831     return m_id;
832     }
833     DataReady_ptr pleft=m_left->collapseToReady();
834     Data left(pleft);
835     Data right;
836 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
837 jfenwick 1889 {
838     right=Data(m_right->collapseToReady());
839     }
840     Data result;
841     switch(m_op)
842     {
843     case ADD:
844 caltinay 5972 result=left+right;
845     break;
846     case SUB:
847     result=left-right;
848     break;
849     case MUL:
850     result=left*right;
851     break;
852     case DIV:
853     result=left/right;
854     break;
855 jfenwick 1889 case SIN:
856 caltinay 5972 result=left.sin();
857     break;
858 jfenwick 1889 case COS:
859 caltinay 5972 result=left.cos();
860     break;
861 jfenwick 1889 case TAN:
862 caltinay 5972 result=left.tan();
863     break;
864 jfenwick 1889 case ASIN:
865 caltinay 5972 result=left.asin();
866     break;
867 jfenwick 1889 case ACOS:
868 caltinay 5972 result=left.acos();
869     break;
870 jfenwick 1889 case ATAN:
871 caltinay 5972 result=left.atan();
872     break;
873 jfenwick 1889 case SINH:
874 caltinay 5972 result=left.sinh();
875     break;
876 jfenwick 1889 case COSH:
877 caltinay 5972 result=left.cosh();
878     break;
879 jfenwick 1889 case TANH:
880 caltinay 5972 result=left.tanh();
881     break;
882 jfenwick 1889 case ERF:
883 caltinay 5972 result=left.erf();
884     break;
885 jfenwick 1889 case ASINH:
886 caltinay 5972 result=left.asinh();
887     break;
888 jfenwick 1889 case ACOSH:
889 caltinay 5972 result=left.acosh();
890     break;
891 jfenwick 1889 case ATANH:
892 caltinay 5972 result=left.atanh();
893     break;
894 jfenwick 1889 case LOG10:
895 caltinay 5972 result=left.log10();
896     break;
897 jfenwick 1889 case LOG:
898 caltinay 5972 result=left.log();
899     break;
900 jfenwick 1889 case SIGN:
901 caltinay 5972 result=left.sign();
902     break;
903 jfenwick 1889 case ABS:
904 caltinay 5972 result=left.abs();
905     break;
906 jfenwick 1889 case NEG:
907 caltinay 5972 result=left.neg();
908     break;
909 jfenwick 1889 case POS:
910 caltinay 5972 // it doesn't mean anything for delayed.
911     // it will just trigger a deep copy of the lazy object
912     throw DataException("Programmer error - POS not supported for lazy data.");
913     break;
914 jfenwick 1889 case EXP:
915 caltinay 5972 result=left.exp();
916     break;
917 jfenwick 1889 case SQRT:
918 caltinay 5972 result=left.sqrt();
919     break;
920 jfenwick 1889 case RECIP:
921 caltinay 5972 result=left.oneOver();
922     break;
923 jfenwick 1889 case GZ:
924 caltinay 5972 result=left.wherePositive();
925     break;
926 jfenwick 1889 case LZ:
927 caltinay 5972 result=left.whereNegative();
928     break;
929 jfenwick 1889 case GEZ:
930 caltinay 5972 result=left.whereNonNegative();
931     break;
932 jfenwick 1889 case LEZ:
933 caltinay 5972 result=left.whereNonPositive();
934     break;
935 jfenwick 2147 case NEZ:
936 caltinay 5972 result=left.whereNonZero(m_tol);
937     break;
938 jfenwick 2147 case EZ:
939 caltinay 5972 result=left.whereZero(m_tol);
940     break;
941 jfenwick 2037 case SYM:
942 caltinay 5972 result=left.symmetric();
943     break;
944 jfenwick 2037 case NSYM:
945 jfenwick 6110 result=left.antisymmetric();
946 caltinay 5972 break;
947 jfenwick 2066 case PROD:
948 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
949     break;
950 jfenwick 2084 case TRANS:
951 caltinay 5972 result=left.transpose(m_axis_offset);
952     break;
953 jfenwick 2084 case TRACE:
954 caltinay 5972 result=left.trace(m_axis_offset);
955     break;
956 jfenwick 2496 case SWAP:
957 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
958     break;
959 jfenwick 2721 case MINVAL:
960 caltinay 5972 result=left.minval();
961     break;
962 jfenwick 2721 case MAXVAL:
963 caltinay 5972 result=left.minval();
964     break;
965 jfenwick 1889 default:
966 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
967 jfenwick 1889 }
968     return result.borrowReadyPtr();
969     }
970    
971 jfenwick 1899 /*
972     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
973     This method uses the original methods on the Data class to evaluate the expressions.
974     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
975     the purpose of using DataLazy in the first place).
976     */
977 jfenwick 1889 void
978 jfenwick 4621 DataLazy::collapse() const
979 jfenwick 1889 {
980     if (m_op==IDENTITY)
981     {
982 caltinay 5972 return;
983 jfenwick 1889 }
984     if (m_readytype=='E')
985 caltinay 5972 { // this is more an efficiency concern than anything else
986 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
987     }
988     m_id=collapseToReady();
989     m_op=IDENTITY;
990     }
991    
992 jfenwick 2500 // The result will be stored in m_samples
993     // The return value is a pointer to the DataVector, offset is the offset within the return value
994 jfenwick 5938 const DataTypes::RealVectorType*
995 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
996 jfenwick 2500 {
997     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
998 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
999 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1000     {
1001 caltinay 5972 collapse();
1002 jfenwick 2500 }
1003 caltinay 5972 if (m_op==IDENTITY)
1004 jfenwick 2500 {
1005 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1006 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1007 jfenwick 2777 #ifdef LAZY_STACK_PROF
1008     int x;
1009     if (&x<stackend[omp_get_thread_num()])
1010     {
1011     stackend[omp_get_thread_num()]=&x;
1012     }
1013     #endif
1014 jfenwick 2500 return &(vec);
1015     }
1016     if (m_readytype!='E')
1017     {
1018     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1019     }
1020 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1021     {
1022 caltinay 5972 roffset=tid*m_samplesize;
1023     return &(m_samples); // sample is already resolved
1024 jfenwick 2501 }
1025     m_sampleids[tid]=sampleNo;
1026 jfenwick 3035
1027 jfenwick 2500 switch (getOpgroup(m_op))
1028     {
1029     case G_UNARY:
1030     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1031     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1032     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1033     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1034     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1035     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1036 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1037 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1038 jfenwick 2500 default:
1039     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1040     }
1041     }
1042    
1043 jfenwick 5938 const DataTypes::RealVectorType*
1044 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1045 jfenwick 2500 {
1046 caltinay 5972 // we assume that any collapsing has been done before we get here
1047     // since we only have one argument we don't need to think about only
1048     // processing single points.
1049     // we will also know we won't get identity nodes
1050 jfenwick 2500 if (m_readytype!='E')
1051     {
1052     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1053     }
1054     if (m_op==IDENTITY)
1055     {
1056     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1057     }
1058 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1059 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1060     roffset=m_samplesize*tid;
1061     double* result=&(m_samples[roffset]);
1062 jfenwick 6066 escript::ESFunction operation=SINF;
1063 jfenwick 2500 switch (m_op)
1064     {
1065 jfenwick 6066 case SIN:
1066     operation=SINF;
1067     break;
1068 jfenwick 2500 case COS:
1069 jfenwick 6066 operation=COSF;
1070     break;
1071 jfenwick 2500 case TAN:
1072 jfenwick 6066 operation=TANF;
1073     break;
1074 jfenwick 2500 case ASIN:
1075 jfenwick 6066 operation=ASINF;
1076     break;
1077 jfenwick 2500 case ACOS:
1078 jfenwick 6066 operation=ACOSF;
1079     break;
1080 jfenwick 2500 case ATAN:
1081 jfenwick 6066 operation=ATANF;
1082     break;
1083 jfenwick 2500 case SINH:
1084 jfenwick 6066 operation=SINHF;
1085     break;
1086 jfenwick 2500 case COSH:
1087 jfenwick 6066 operation=COSHF;
1088     break;
1089 jfenwick 2500 case TANH:
1090 jfenwick 6066 operation=TANHF;
1091     break;
1092 jfenwick 2500 case ERF:
1093 jfenwick 6066 operation=ERFF;
1094     break;
1095 jfenwick 2500 case ASINH:
1096 jfenwick 6066 operation=ASINHF;
1097     break;
1098 jfenwick 2500 case ACOSH:
1099 jfenwick 6066 operation=ACOSHF;
1100     break;
1101 jfenwick 2500 case ATANH:
1102 jfenwick 6066 operation=ATANHF;
1103     break;
1104 jfenwick 2500 case LOG10:
1105 jfenwick 6066 operation=LOG10F;
1106     break;
1107 jfenwick 2500 case LOG:
1108 jfenwick 6066 operation=LOGF;
1109     break;
1110 jfenwick 2500 case SIGN:
1111 jfenwick 6066 operation=SIGNF;
1112     break;
1113 jfenwick 2500 case ABS:
1114 jfenwick 6066 operation=ABSF;
1115     break;
1116 jfenwick 2500 case NEG:
1117 jfenwick 6066 operation=NEGF;
1118     break;
1119 jfenwick 2500 case POS:
1120 caltinay 5972 // it doesn't mean anything for delayed.
1121     // it will just trigger a deep copy of the lazy object
1122     throw DataException("Programmer error - POS not supported for lazy data.");
1123     break;
1124 jfenwick 2500 case EXP:
1125 jfenwick 6066 operation=EXPF;
1126     break;
1127 jfenwick 2500 case SQRT:
1128 jfenwick 6066 operation=SQRTF;
1129     break;
1130 jfenwick 2500 case RECIP:
1131 jfenwick 6066 operation=INVF;
1132     break;
1133 jfenwick 2500 case GZ:
1134 jfenwick 6066 operation=GTZEROF;
1135     break;
1136 jfenwick 2500 case LZ:
1137 jfenwick 6066 operation=LTZEROF;
1138     break;
1139 jfenwick 2500 case GEZ:
1140 jfenwick 6066 operation=GEZEROF;
1141     break;
1142 jfenwick 2500 case LEZ:
1143 jfenwick 6066 operation=LEZEROF;
1144     break;
1145 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1146     case NEZ:
1147 jfenwick 6066 operation=NEQZEROF;
1148     break;
1149 jfenwick 2500 case EZ:
1150 jfenwick 6066 operation=EQZEROF;
1151     break;
1152 jfenwick 2500 default:
1153 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1154 jfenwick 2500 }
1155 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1156     left,
1157     result,
1158     operation,
1159     m_tol);
1160 jfenwick 2500 return &(m_samples);
1161     }
1162    
1163    
1164 jfenwick 5938 const DataTypes::RealVectorType*
1165 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1166 jfenwick 2721 {
1167 caltinay 5972 // we assume that any collapsing has been done before we get here
1168     // since we only have one argument we don't need to think about only
1169     // processing single points.
1170     // we will also know we won't get identity nodes
1171 jfenwick 2721 if (m_readytype!='E')
1172     {
1173     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1174     }
1175     if (m_op==IDENTITY)
1176     {
1177     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1178     }
1179     size_t loffset=0;
1180 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1181 jfenwick 2721
1182     roffset=m_samplesize*tid;
1183 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1184 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1185 jfenwick 2721 double* result=&(m_samples[roffset]);
1186     switch (m_op)
1187     {
1188     case MINVAL:
1189 caltinay 5972 {
1190     for (unsigned int z=0;z<ndpps;++z)
1191     {
1192     FMin op;
1193 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1194 caltinay 5972 loffset+=psize;
1195     result++;
1196     }
1197     }
1198     break;
1199 jfenwick 2721 case MAXVAL:
1200 caltinay 5972 {
1201     for (unsigned int z=0;z<ndpps;++z)
1202     {
1203     FMax op;
1204 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1205 caltinay 5972 loffset+=psize;
1206     result++;
1207     }
1208     }
1209     break;
1210 jfenwick 2721 default:
1211 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1212 jfenwick 2721 }
1213     return &(m_samples);
1214     }
1215    
1216 jfenwick 5938 const DataTypes::RealVectorType*
1217 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1218 jfenwick 2500 {
1219 caltinay 5972 // we assume that any collapsing has been done before we get here
1220     // since we only have one argument we don't need to think about only
1221     // processing single points.
1222 jfenwick 2500 if (m_readytype!='E')
1223     {
1224     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1225     }
1226     if (m_op==IDENTITY)
1227     {
1228     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1229     }
1230     size_t subroffset;
1231 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1232 jfenwick 2500 roffset=m_samplesize*tid;
1233     size_t loop=0;
1234     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1235     size_t step=getNoValues();
1236     size_t offset=roffset;
1237     switch (m_op)
1238     {
1239     case SYM:
1240 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1241     {
1242 jfenwick 6083 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1243 caltinay 5972 subroffset+=step;
1244     offset+=step;
1245     }
1246     break;
1247 jfenwick 2500 case NSYM:
1248 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1249     {
1250 jfenwick 6114 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1251 caltinay 5972 subroffset+=step;
1252     offset+=step;
1253     }
1254     break;
1255 jfenwick 2500 default:
1256 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1257 jfenwick 2500 }
1258     return &m_samples;
1259     }
1260    
1261 jfenwick 5938 const DataTypes::RealVectorType*
1262 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1263 jfenwick 2500 {
1264 caltinay 5972 // we assume that any collapsing has been done before we get here
1265     // since we only have one argument we don't need to think about only
1266     // processing single points.
1267 jfenwick 2500 if (m_readytype!='E')
1268     {
1269     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1270     }
1271     if (m_op==IDENTITY)
1272     {
1273     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1274     }
1275     size_t subroffset;
1276     size_t offset;
1277 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1278 jfenwick 2500 roffset=m_samplesize*tid;
1279     offset=roffset;
1280     size_t loop=0;
1281     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1282     size_t outstep=getNoValues();
1283     size_t instep=m_left->getNoValues();
1284     switch (m_op)
1285     {
1286     case TRACE:
1287 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1288     {
1289 jfenwick 6083 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1290 caltinay 5972 subroffset+=instep;
1291     offset+=outstep;
1292     }
1293     break;
1294 jfenwick 2500 case TRANS:
1295 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1296     {
1297 jfenwick 6083 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1298 caltinay 5972 subroffset+=instep;
1299     offset+=outstep;
1300     }
1301     break;
1302 jfenwick 2500 default:
1303 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1304 jfenwick 2500 }
1305     return &m_samples;
1306     }
1307    
1308    
1309 jfenwick 5938 const DataTypes::RealVectorType*
1310 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1311 jfenwick 2500 {
1312     if (m_readytype!='E')
1313     {
1314     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1315     }
1316     if (m_op==IDENTITY)
1317     {
1318     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1319     }
1320     size_t subroffset;
1321     size_t offset;
1322 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1323 jfenwick 2500 roffset=m_samplesize*tid;
1324     offset=roffset;
1325     size_t loop=0;
1326     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1327     size_t outstep=getNoValues();
1328     size_t instep=m_left->getNoValues();
1329     switch (m_op)
1330     {
1331     case SWAP:
1332 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1333     {
1334 jfenwick 6083 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1335 caltinay 5972 subroffset+=instep;
1336     offset+=outstep;
1337     }
1338     break;
1339 jfenwick 2500 default:
1340 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1341 jfenwick 2500 }
1342     return &m_samples;
1343     }
1344    
1345 jfenwick 5938 const DataTypes::RealVectorType*
1346 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1347 jfenwick 3035 {
1348     if (m_readytype!='E')
1349     {
1350     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1351     }
1352     if (m_op!=CONDEVAL)
1353     {
1354     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1355     }
1356     size_t subroffset;
1357 jfenwick 2500
1358 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1359     const RealVectorType* srcres=0;
1360 jfenwick 3035 if ((*maskres)[subroffset]>0)
1361     {
1362 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1363 jfenwick 3035 }
1364     else
1365     {
1366 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1367 jfenwick 3035 }
1368    
1369     // Now we need to copy the result
1370    
1371     roffset=m_samplesize*tid;
1372     for (int i=0;i<m_samplesize;++i)
1373     {
1374 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1375 jfenwick 3035 }
1376    
1377     return &m_samples;
1378     }
1379    
1380 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1381     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1382     // If both children are expanded, then we can process them in a single operation (we treat
1383     // the whole sample as one big datapoint.
1384     // If one of the children is not expanded, then we need to treat each point in the sample
1385     // individually.
1386     // There is an additional complication when scalar operations are considered.
1387     // For example, 2+Vector.
1388     // In this case each double within the point is treated individually
1389 jfenwick 5938 const DataTypes::RealVectorType*
1390 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1391 jfenwick 2500 {
1392     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1393    
1394 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1395     // first work out which of the children are expanded
1396 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1397     bool rightExp=(m_right->m_readytype=='E');
1398     if (!leftExp && !rightExp)
1399     {
1400 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1401 jfenwick 2500 }
1402     bool leftScalar=(m_left->getRank()==0);
1403     bool rightScalar=(m_right->getRank()==0);
1404     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1405     {
1406 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1407 jfenwick 2500 }
1408     size_t leftsize=m_left->getNoValues();
1409     size_t rightsize=m_right->getNoValues();
1410 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1411     int leftstep=0; // how far should the left offset advance after each step
1412 jfenwick 2500 int rightstep=0;
1413 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1414     int oleftstep=0; // the o variables refer to the outer loop
1415     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1416 jfenwick 2500 int onumsteps=1;
1417    
1418 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1419 jfenwick 2500 bool RES=(rightExp && rightScalar);
1420 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1421 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1422 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1423 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1424 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1425 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1426    
1427 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1428 jfenwick 2500 {
1429 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1430     leftstep=0;
1431     rightstep=0;
1432     numsteps=1;
1433 jfenwick 2500 }
1434     else if (LES || RES)
1435     {
1436 caltinay 5972 chunksize=1;
1437     if (LES) // left is an expanded scalar
1438     {
1439     if (RS)
1440     {
1441     leftstep=1;
1442     rightstep=0;
1443     numsteps=m_left->getNumDPPSample();
1444     }
1445     else // RN or REN
1446     {
1447     leftstep=0;
1448     oleftstep=1;
1449     rightstep=1;
1450     orightstep=(RN ? -(int)rightsize : 0);
1451     numsteps=rightsize;
1452     onumsteps=m_left->getNumDPPSample();
1453     }
1454     }
1455     else // right is an expanded scalar
1456     {
1457     if (LS)
1458     {
1459     rightstep=1;
1460     leftstep=0;
1461     numsteps=m_right->getNumDPPSample();
1462     }
1463     else
1464     {
1465     rightstep=0;
1466     orightstep=1;
1467     leftstep=1;
1468     oleftstep=(LN ? -(int)leftsize : 0);
1469     numsteps=leftsize;
1470     onumsteps=m_right->getNumDPPSample();
1471     }
1472     }
1473 jfenwick 2500 }
1474 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1475 jfenwick 2500 {
1476 caltinay 5972 if (LEN) // and Right will be a single value
1477     {
1478     chunksize=rightsize;
1479     leftstep=rightsize;
1480     rightstep=0;
1481     numsteps=m_left->getNumDPPSample();
1482     if (RS)
1483     {
1484     numsteps*=leftsize;
1485     }
1486     }
1487     else // REN
1488     {
1489     chunksize=leftsize;
1490     rightstep=leftsize;
1491     leftstep=0;
1492     numsteps=m_right->getNumDPPSample();
1493     if (LS)
1494     {
1495     numsteps*=rightsize;
1496     }
1497     }
1498 jfenwick 2500 }
1499    
1500 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1501     // Get the values of sub-expressions
1502 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1503     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1504 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1505     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1506     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1507     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1508     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1509     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1510     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1511    
1512     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1513     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1514    
1515    
1516     roffset=m_samplesize*tid;
1517 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1518 jfenwick 2500 switch(m_op)
1519     {
1520     case ADD:
1521 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1522 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1523 jfenwick 6042 &(*left)[0],
1524     &(*right)[0],
1525     chunksize,
1526     onumsteps,
1527     numsteps,
1528     resultStep,
1529     leftstep,
1530     rightstep,
1531     oleftstep,
1532     orightstep,
1533 jfenwick 6056 lroffset,
1534     rroffset,
1535 jfenwick 6042 escript::ESFunction::PLUSF);
1536 caltinay 5972 break;
1537 jfenwick 2500 case SUB:
1538 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1539 jfenwick 6057 &(*left)[0],
1540     &(*right)[0],
1541     chunksize,
1542     onumsteps,
1543     numsteps,
1544     resultStep,
1545     leftstep,
1546     rightstep,
1547     oleftstep,
1548     orightstep,
1549     lroffset,
1550     rroffset,
1551     escript::ESFunction::MINUSF);
1552     //PROC_OP(NO_ARG,minus<double>());
1553 caltinay 5972 break;
1554 jfenwick 2500 case MUL:
1555 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1556 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1557 jfenwick 6057 &(*left)[0],
1558     &(*right)[0],
1559     chunksize,
1560     onumsteps,
1561     numsteps,
1562     resultStep,
1563     leftstep,
1564     rightstep,
1565     oleftstep,
1566     orightstep,
1567     lroffset,
1568     rroffset,
1569     escript::ESFunction::MULTIPLIESF);
1570 caltinay 5972 break;
1571 jfenwick 2500 case DIV:
1572 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1573 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1574 jfenwick 6057 &(*left)[0],
1575     &(*right)[0],
1576     chunksize,
1577     onumsteps,
1578     numsteps,
1579     resultStep,
1580     leftstep,
1581     rightstep,
1582     oleftstep,
1583     orightstep,
1584     lroffset,
1585     rroffset,
1586     escript::ESFunction::DIVIDESF);
1587 caltinay 5972 break;
1588 jfenwick 2500 case POW:
1589 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1590 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1591 jfenwick 6057 &(*left)[0],
1592     &(*right)[0],
1593     chunksize,
1594     onumsteps,
1595     numsteps,
1596     resultStep,
1597     leftstep,
1598     rightstep,
1599     oleftstep,
1600     orightstep,
1601     lroffset,
1602     rroffset,
1603     escript::ESFunction::POWF);
1604 caltinay 5972 break;
1605 jfenwick 2500 default:
1606 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1607 jfenwick 2500 }
1608     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1609     return &m_samples;
1610     }
1611    
1612    
1613     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1614     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1615     // unlike the other resolve helpers, we must treat these datapoints separately.
1616 jfenwick 5938 const DataTypes::RealVectorType*
1617 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1618 jfenwick 2500 {
1619     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1620    
1621 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1622     // first work out which of the children are expanded
1623 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1624     bool rightExp=(m_right->m_readytype=='E');
1625     int steps=getNumDPPSample();
1626 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1627 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1628    
1629     int resultStep=getNoValues();
1630     roffset=m_samplesize*tid;
1631     size_t offset=roffset;
1632    
1633 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1634 jfenwick 2500
1635 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1636 jfenwick 2500
1637     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1638     cout << getNoValues() << endl;)
1639    
1640    
1641     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1642     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1643     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1644     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1645     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1646     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1647     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1648    
1649 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1650 jfenwick 2500 switch(m_op)
1651     {
1652     case PROD:
1653 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1654     {
1655     const double *ptr_0 = &((*left)[lroffset]);
1656     const double *ptr_1 = &((*right)[rroffset]);
1657 jfenwick 2500
1658     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1659     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1660    
1661 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1662 jfenwick 2500
1663 caltinay 5972 lroffset+=leftStep;
1664     rroffset+=rightStep;
1665     }
1666     break;
1667 jfenwick 2500 default:
1668 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1669 jfenwick 2500 }
1670     roffset=offset;
1671     return &m_samples;
1672     }
1673    
1674 jfenwick 1898
1675 jfenwick 5938 const DataTypes::RealVectorType*
1676 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1677 jfenwick 1879 {
1678 jfenwick 2271 #ifdef _OPENMP
1679 caltinay 5972 int tid=omp_get_thread_num();
1680 jfenwick 2271 #else
1681 caltinay 5972 int tid=0;
1682 jfenwick 2271 #endif
1683 jfenwick 2777
1684     #ifdef LAZY_STACK_PROF
1685 caltinay 5972 stackstart[tid]=&tid;
1686     stackend[tid]=&tid;
1687     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1688     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1689     #pragma omp critical
1690     if (d>maxstackuse)
1691     {
1692 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1693 caltinay 5972 maxstackuse=d;
1694     }
1695     return r;
1696 jfenwick 2777 #else
1697 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1698 jfenwick 2777 #endif
1699 jfenwick 2271 }
1700    
1701    
1702 jfenwick 2497 // This needs to do the work of the identity constructor
1703 jfenwick 2177 void
1704     DataLazy::resolveToIdentity()
1705     {
1706     if (m_op==IDENTITY)
1707 caltinay 5972 return;
1708 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1709 jfenwick 2177 makeIdentity(p);
1710     }
1711 jfenwick 1889
1712 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1713     {
1714     m_op=IDENTITY;
1715     m_axis_offset=0;
1716     m_transpose=0;
1717     m_SL=m_SM=m_SR=0;
1718     m_children=m_height=0;
1719     m_id=p;
1720     if(p->isConstant()) {m_readytype='C';}
1721     else if(p->isExpanded()) {m_readytype='E';}
1722     else if (p->isTagged()) {m_readytype='T';}
1723     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1724     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1725     m_left.reset();
1726     m_right.reset();
1727     }
1728    
1729 jfenwick 2497
1730     DataReady_ptr
1731     DataLazy::resolve()
1732     {
1733     resolveToIdentity();
1734     return m_id;
1735     }
1736    
1737 jfenwick 2799
1738     /* This is really a static method but I think that caused problems in windows */
1739     void
1740     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1741     {
1742     if (dats.empty())
1743     {
1744 caltinay 5972 return;
1745 jfenwick 2799 }
1746     vector<DataLazy*> work;
1747     FunctionSpace fs=dats[0]->getFunctionSpace();
1748     bool match=true;
1749 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1750 jfenwick 2799 {
1751 caltinay 5972 if (dats[i]->m_readytype!='E')
1752     {
1753     dats[i]->collapse();
1754     }
1755     if (dats[i]->m_op!=IDENTITY)
1756     {
1757     work.push_back(dats[i]);
1758     if (fs!=dats[i]->getFunctionSpace())
1759     {
1760     match=false;
1761     }
1762     }
1763 jfenwick 2799 }
1764     if (work.empty())
1765     {
1766 caltinay 5972 return; // no work to do
1767 jfenwick 2799 }
1768 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1769     { // it is possible that dats[0] is one of the objects which we discarded and
1770     // all the other functionspaces match.
1771     vector<DataExpanded*> dep;
1772 jfenwick 6042 vector<RealVectorType*> vecs;
1773 caltinay 5972 for (int i=0;i<work.size();++i)
1774     {
1775 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1776 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1777     }
1778     int totalsamples=work[0]->getNumSamples();
1779 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1780 caltinay 5972 int sample;
1781     #pragma omp parallel private(sample, res)
1782     {
1783     size_t roffset=0;
1784     #pragma omp for schedule(static)
1785     for (sample=0;sample<totalsamples;++sample)
1786     {
1787     roffset=0;
1788     int j;
1789     for (j=work.size()-1;j>=0;--j)
1790     {
1791 jfenwick 2799 #ifdef _OPENMP
1792 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1793 jfenwick 2799 #else
1794 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1795 jfenwick 2799 #endif
1796 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1797     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1798     }
1799     }
1800     }
1801     // Now we need to load the new results as identity ops into the lazy nodes
1802     for (int i=work.size()-1;i>=0;--i)
1803     {
1804 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1805 caltinay 5972 }
1806 jfenwick 2799 }
1807 caltinay 5972 else // functionspaces do not match
1808 jfenwick 2799 {
1809 caltinay 5972 for (int i=0;i<work.size();++i)
1810     {
1811     work[i]->resolveToIdentity();
1812     }
1813 jfenwick 2799 }
1814     }
1815    
1816    
1817    
1818 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1819     DataReady_ptr
1820     DataLazy::resolveNodeWorker()
1821     {
1822 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1823 jfenwick 2500 {
1824     collapse();
1825     }
1826 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1827 jfenwick 2500 {
1828     return m_id;
1829     }
1830 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1831 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1832     RealVectorType& resvec=result->getVectorRW();
1833 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1834    
1835     int sample;
1836     int totalsamples=getNumSamples();
1837 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1838 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1839 jfenwick 2777 #pragma omp parallel private(sample,res)
1840 jfenwick 2500 {
1841 caltinay 5972 size_t roffset=0;
1842 jfenwick 2777 #ifdef LAZY_STACK_PROF
1843 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1844     stackend[omp_get_thread_num()]=&roffset;
1845 jfenwick 2777 #endif
1846 caltinay 5972 #pragma omp for schedule(static)
1847     for (sample=0;sample<totalsamples;++sample)
1848     {
1849     roffset=0;
1850 jfenwick 2500 #ifdef _OPENMP
1851 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1852 jfenwick 2500 #else
1853 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1854 jfenwick 2500 #endif
1855     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1856     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1857 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1858     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1859     }
1860 jfenwick 2500 }
1861 jfenwick 2777 #ifdef LAZY_STACK_PROF
1862     for (int i=0;i<getNumberOfThreads();++i)
1863     {
1864 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1865     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1866     if (r>maxstackuse)
1867     {
1868     maxstackuse=r;
1869     }
1870 jfenwick 2777 }
1871     cout << "Max resolve Stack use=" << maxstackuse << endl;
1872     #endif
1873 jfenwick 2500 return resptr;
1874     }
1875    
1876 jfenwick 1865 std::string
1877     DataLazy::toString() const
1878     {
1879 jfenwick 1886 ostringstream oss;
1880 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1881 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1882 jfenwick 2737 {
1883 caltinay 5972 case 1: // tree format
1884     oss << endl;
1885     intoTreeString(oss,"");
1886     break;
1887     case 2: // just the depth
1888     break;
1889 jfenwick 2795 default:
1890 caltinay 5972 intoString(oss);
1891     break;
1892 jfenwick 2737 }
1893 jfenwick 1886 return oss.str();
1894 jfenwick 1865 }
1895    
1896 jfenwick 1899
1897 jfenwick 1886 void
1898     DataLazy::intoString(ostringstream& oss) const
1899     {
1900 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1901 jfenwick 1886 switch (getOpgroup(m_op))
1902     {
1903 jfenwick 1889 case G_IDENTITY:
1904 caltinay 5972 if (m_id->isExpanded())
1905     {
1906     oss << "E";
1907     }
1908     else if (m_id->isTagged())
1909     {
1910     oss << "T";
1911     }
1912     else if (m_id->isConstant())
1913     {
1914     oss << "C";
1915     }
1916     else
1917     {
1918     oss << "?";
1919     }
1920     oss << '@' << m_id.get();
1921     break;
1922 jfenwick 1886 case G_BINARY:
1923 caltinay 5972 oss << '(';
1924     m_left->intoString(oss);
1925     oss << ' ' << opToString(m_op) << ' ';
1926     m_right->intoString(oss);
1927     oss << ')';
1928     break;
1929 jfenwick 1886 case G_UNARY:
1930 jfenwick 2147 case G_UNARY_P:
1931 jfenwick 2037 case G_NP1OUT:
1932 jfenwick 2084 case G_NP1OUT_P:
1933 jfenwick 2721 case G_REDUCTION:
1934 caltinay 5972 oss << opToString(m_op) << '(';
1935     m_left->intoString(oss);
1936     oss << ')';
1937     break;
1938 jfenwick 2066 case G_TENSORPROD:
1939 caltinay 5972 oss << opToString(m_op) << '(';
1940     m_left->intoString(oss);
1941     oss << ", ";
1942     m_right->intoString(oss);
1943     oss << ')';
1944     break;
1945 jfenwick 2496 case G_NP1OUT_2P:
1946 caltinay 5972 oss << opToString(m_op) << '(';
1947     m_left->intoString(oss);
1948     oss << ", " << m_axis_offset << ", " << m_transpose;
1949     oss << ')';
1950     break;
1951 jfenwick 3035 case G_CONDEVAL:
1952 caltinay 5972 oss << opToString(m_op)<< '(' ;
1953     m_mask->intoString(oss);
1954     oss << " ? ";
1955     m_left->intoString(oss);
1956     oss << " : ";
1957     m_right->intoString(oss);
1958     oss << ')';
1959     break;
1960 jfenwick 1886 default:
1961 caltinay 5972 oss << "UNKNOWN";
1962 jfenwick 1886 }
1963     }
1964    
1965 jfenwick 2737
1966     void
1967     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1968     {
1969     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1970     switch (getOpgroup(m_op))
1971     {
1972     case G_IDENTITY:
1973 caltinay 5972 if (m_id->isExpanded())
1974     {
1975     oss << "E";
1976     }
1977     else if (m_id->isTagged())
1978     {
1979     oss << "T";
1980     }
1981     else if (m_id->isConstant())
1982     {
1983     oss << "C";
1984     }
1985     else
1986     {
1987     oss << "?";
1988     }
1989     oss << '@' << m_id.get() << endl;
1990     break;
1991 jfenwick 2737 case G_BINARY:
1992 caltinay 5972 oss << opToString(m_op) << endl;
1993     indent+='.';
1994     m_left->intoTreeString(oss, indent);
1995     m_right->intoTreeString(oss, indent);
1996     break;
1997 jfenwick 2737 case G_UNARY:
1998     case G_UNARY_P:
1999     case G_NP1OUT:
2000     case G_NP1OUT_P:
2001     case G_REDUCTION:
2002 caltinay 5972 oss << opToString(m_op) << endl;
2003     indent+='.';
2004     m_left->intoTreeString(oss, indent);
2005     break;
2006 jfenwick 2737 case G_TENSORPROD:
2007 caltinay 5972 oss << opToString(m_op) << endl;
2008     indent+='.';
2009     m_left->intoTreeString(oss, indent);
2010     m_right->intoTreeString(oss, indent);
2011     break;
2012 jfenwick 2737 case G_NP1OUT_2P:
2013 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2014     indent+='.';
2015     m_left->intoTreeString(oss, indent);
2016     break;
2017 jfenwick 2737 default:
2018 caltinay 5972 oss << "UNKNOWN";
2019 jfenwick 2737 }
2020     }
2021    
2022    
2023 jfenwick 1865 DataAbstract*
2024 jfenwick 5938 DataLazy::deepCopy() const
2025 jfenwick 1865 {
2026 jfenwick 1935 switch (getOpgroup(m_op))
2027 jfenwick 1865 {
2028 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
2029 caltinay 5972 case G_UNARY:
2030 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2031 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2032     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2033 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2034     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2035 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2036     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2037 jfenwick 1935 default:
2038 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2039 jfenwick 1865 }
2040     }
2041    
2042    
2043 jfenwick 2721
2044 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2045     // Instances of DataReady can look at the size of their vectors.
2046     // For lazy though, it could be the size the data would be if it were resolved;
2047     // or it could be some function of the lengths of the DataReady instances which
2048     // form part of the expression.
2049     // Rather than have people making assumptions, I have disabled the method.
2050 jfenwick 5938 DataTypes::RealVectorType::size_type
2051 jfenwick 1865 DataLazy::getLength() const
2052     {
2053 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2054 jfenwick 1865 }
2055    
2056    
2057     DataAbstract*
2058     DataLazy::getSlice(const DataTypes::RegionType& region) const
2059     {
2060 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2061 jfenwick 1865 }
2062    
2063 jfenwick 1935
2064     // To do this we need to rely on our child nodes
2065 jfenwick 5938 DataTypes::RealVectorType::size_type
2066 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2067 jfenwick 1935 int dataPointNo)
2068     {
2069     if (m_op==IDENTITY)
2070     {
2071 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2072 jfenwick 1935 }
2073     if (m_readytype!='E')
2074     {
2075 caltinay 5972 collapse();
2076     return m_id->getPointOffset(sampleNo,dataPointNo);
2077 jfenwick 1935 }
2078     // at this point we do not have an identity node and the expression will be Expanded
2079     // so we only need to know which child to ask
2080     if (m_left->m_readytype=='E')
2081     {
2082 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2083 jfenwick 1935 }
2084     else
2085     {
2086 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2087 jfenwick 1935 }
2088     }
2089    
2090     // To do this we need to rely on our child nodes
2091 jfenwick 5938 DataTypes::RealVectorType::size_type
2092 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2093 jfenwick 1865 int dataPointNo) const
2094     {
2095 jfenwick 1935 if (m_op==IDENTITY)
2096     {
2097 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2098 jfenwick 1935 }
2099     if (m_readytype=='E')
2100     {
2101     // at this point we do not have an identity node and the expression will be Expanded
2102     // so we only need to know which child to ask
2103     if (m_left->m_readytype=='E')
2104     {
2105 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2106 jfenwick 1935 }
2107     else
2108     {
2109 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2110 jfenwick 1935 }
2111     }
2112     if (m_readytype=='C')
2113     {
2114 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2115 jfenwick 1935 }
2116     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2117 jfenwick 1865 }
2118    
2119 jfenwick 2271
2120     // I have decided to let Data:: handle this issue.
2121 jfenwick 1901 void
2122     DataLazy::setToZero()
2123     {
2124 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2125 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2126     // m_op=IDENTITY;
2127     // m_right.reset();
2128     // m_left.reset();
2129     // m_readytype='C';
2130     // m_buffsRequired=1;
2131    
2132 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2133 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2134 jfenwick 1901 }
2135    
2136 jfenwick 2271 bool
2137     DataLazy::actsExpanded() const
2138     {
2139 caltinay 5972 return (m_readytype=='E');
2140 jfenwick 2271 }
2141    
2142 caltinay 5997 } // end namespace
2143 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26