/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3035 - (hide annotations)
Thu Jun 10 01:48:41 2010 UTC (8 years, 10 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 60569 byte(s)
Lazy node support for conditional evaluation.
It should be a complete operation now.

1 jfenwick 1865
2     /*******************************************************
3     *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 jfenwick 1865 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48 jfenwick 2472
49 jfenwick 2501
50 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52 jfenwick 1899 /*
53     How does DataLazy work?
54     ~~~~~~~~~~~~~~~~~~~~~~~
55    
56     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57     denoted left and right.
58    
59     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60     This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63     Note that IDENITY is not considered a unary operation.
64    
65     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66     It must however form a DAG (directed acyclic graph).
67     I will refer to individual DataLazy objects with the structure as nodes.
68    
69     Each node also stores:
70     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71     evaluated.
72     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
73     evaluate the expression.
74     - m_samplesize ~ the number of doubles stored in a sample.
75    
76     When a new node is created, the above values are computed based on the values in the child nodes.
77     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79     The resolve method, which produces a DataReady from a DataLazy, does the following:
80     1) Create a DataReady to hold the new result.
81     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86     resolveSample returns a Vector* and an offset within that vector where the result is stored.
87     Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93 jfenwick 2037
94 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 jfenwick 2037 1) Add to the ES_optype.
96     2) determine what opgroup your operation belongs to (X)
97     3) add a string for the op to the end of ES_opstrings
98     4) increase ES_opcount
99     5) add an entry (X) to opgroups
100     6) add an entry to the switch in collapseToReady
101     7) add an entry to resolveX
102 jfenwick 1899 */
103    
104    
105 jfenwick 1865 using namespace std;
106 jfenwick 1868 using namespace boost;
107 jfenwick 1865
108     namespace escript
109     {
110    
111     namespace
112     {
113    
114 jfenwick 2777
115     // enabling this will print out when ever the maximum stacksize used by resolve increases
116     // it assumes _OPENMP is also in use
117     //#define LAZY_STACK_PROF
118    
119    
120    
121     #ifndef _OPENMP
122     #ifdef LAZY_STACK_PROF
123     #undef LAZY_STACK_PROF
124     #endif
125     #endif
126    
127    
128     #ifdef LAZY_STACK_PROF
129     std::vector<void*> stackstart(getNumberOfThreads());
130     std::vector<void*> stackend(getNumberOfThreads());
131     size_t maxstackuse=0;
132     #endif
133    
134 jfenwick 1886 enum ES_opgroup
135     {
136     G_UNKNOWN,
137     G_IDENTITY,
138 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
139 jfenwick 2037 G_UNARY, // pointwise operations with one argument
140 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
142 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143 jfenwick 2496 G_TENSORPROD, // general tensor product
144 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
146     G_CONDEVAL
147 jfenwick 1886 };
148    
149    
150    
151    
152 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153     "sin","cos","tan",
154 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
155     "asinh","acosh","atanh",
156 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
157 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158 jfenwick 2066 "symmetric","nonsymmetric",
159 jfenwick 2084 "prod",
160 jfenwick 2496 "transpose", "trace",
161 jfenwick 2721 "swapaxes",
162 jfenwick 3035 "minval", "maxval",
163     "condEval"};
164     int ES_opcount=44;
165 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166     G_UNARY,G_UNARY,G_UNARY, //10
167     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
168     G_UNARY,G_UNARY,G_UNARY, // 20
169 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
171 jfenwick 2066 G_NP1OUT,G_NP1OUT,
172 jfenwick 2084 G_TENSORPROD,
173 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
174 jfenwick 2721 G_NP1OUT_2P,
175 jfenwick 3035 G_REDUCTION, G_REDUCTION,
176     G_CONDEVAL};
177 jfenwick 1886 inline
178     ES_opgroup
179     getOpgroup(ES_optype op)
180     {
181     return opgroups[op];
182     }
183    
184 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
185     FunctionSpace
186     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188     // perhaps this should call interpolate and throw or something?
189     // maybe we need an interpolate node -
190     // that way, if interpolate is required in any other op we can just throw a
191     // programming error exception.
192 jfenwick 1879
193 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
194     FunctionSpace r=right->getFunctionSpace();
195     if (l!=r)
196     {
197     if (r.probeInterpolation(l))
198     {
199     return l;
200     }
201     if (l.probeInterpolation(r))
202     {
203     return r;
204     }
205     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
206     }
207     return l;
208 jfenwick 1865 }
209    
210     // return the shape of the result of "left op right"
211 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
212 jfenwick 1865 DataTypes::ShapeType
213     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
214     {
215 jfenwick 1879 if (left->getShape()!=right->getShape())
216     {
217 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
218 jfenwick 1908 {
219     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
220     }
221 jfenwick 2721
222 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
223     {
224     return right->getShape();
225     }
226     if (right->getRank()==0)
227     {
228     return left->getShape();
229     }
230     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
231 jfenwick 1879 }
232     return left->getShape();
233 jfenwick 1865 }
234    
235 jfenwick 2084 // return the shape for "op left"
236    
237     DataTypes::ShapeType
238 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
239 jfenwick 2084 {
240     switch(op)
241     {
242     case TRANS:
243 jfenwick 2199 { // for the scoping of variables
244     const DataTypes::ShapeType& s=left->getShape();
245     DataTypes::ShapeType sh;
246     int rank=left->getRank();
247     if (axis_offset<0 || axis_offset>rank)
248     {
249 caltinay 2779 stringstream e;
250     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
251     throw DataException(e.str());
252     }
253     for (int i=0; i<rank; i++)
254 jfenwick 2199 {
255     int index = (axis_offset+i)%rank;
256 caltinay 2779 sh.push_back(s[index]); // Append to new shape
257     }
258 jfenwick 2199 return sh;
259     }
260 jfenwick 2084 break;
261     case TRACE:
262 jfenwick 2199 {
263     int rank=left->getRank();
264     if (rank<2)
265     {
266     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
267     }
268     if ((axis_offset>rank-2) || (axis_offset<0))
269     {
270     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
271     }
272     if (rank==2)
273     {
274     return DataTypes::scalarShape;
275     }
276     else if (rank==3)
277     {
278     DataTypes::ShapeType sh;
279     if (axis_offset==0)
280     {
281     sh.push_back(left->getShape()[2]);
282     }
283     else // offset==1
284     {
285     sh.push_back(left->getShape()[0]);
286     }
287     return sh;
288     }
289     else if (rank==4)
290     {
291     DataTypes::ShapeType sh;
292     const DataTypes::ShapeType& s=left->getShape();
293     if (axis_offset==0)
294     {
295     sh.push_back(s[2]);
296     sh.push_back(s[3]);
297     }
298     else if (axis_offset==1)
299     {
300     sh.push_back(s[0]);
301     sh.push_back(s[3]);
302     }
303     else // offset==2
304     {
305     sh.push_back(s[0]);
306     sh.push_back(s[1]);
307     }
308     return sh;
309     }
310     else // unknown rank
311     {
312     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
313     }
314     }
315 jfenwick 2084 break;
316     default:
317     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
318     }
319     }
320    
321 jfenwick 2496 DataTypes::ShapeType
322     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
323     {
324     // This code taken from the Data.cpp swapaxes() method
325     // Some of the checks are probably redundant here
326     int axis0_tmp,axis1_tmp;
327     const DataTypes::ShapeType& s=left->getShape();
328     DataTypes::ShapeType out_shape;
329     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
330     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
331     int rank=left->getRank();
332     if (rank<2) {
333     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
334     }
335     if (axis0<0 || axis0>rank-1) {
336 caltinay 2779 stringstream e;
337     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
338     throw DataException(e.str());
339 jfenwick 2496 }
340     if (axis1<0 || axis1>rank-1) {
341 caltinay 2782 stringstream e;
342 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
343     throw DataException(e.str());
344 jfenwick 2496 }
345     if (axis0 == axis1) {
346     throw DataException("Error - Data::swapaxes: axis indices must be different.");
347     }
348     if (axis0 > axis1) {
349     axis0_tmp=axis1;
350     axis1_tmp=axis0;
351     } else {
352     axis0_tmp=axis0;
353     axis1_tmp=axis1;
354     }
355     for (int i=0; i<rank; i++) {
356     if (i == axis0_tmp) {
357     out_shape.push_back(s[axis1_tmp]);
358     } else if (i == axis1_tmp) {
359     out_shape.push_back(s[axis0_tmp]);
360     } else {
361     out_shape.push_back(s[i]);
362     }
363     }
364     return out_shape;
365     }
366    
367    
368 jfenwick 2066 // determine the output shape for the general tensor product operation
369     // the additional parameters return information required later for the product
370     // the majority of this code is copy pasted from C_General_Tensor_Product
371     DataTypes::ShapeType
372     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
373 jfenwick 1865 {
374 jfenwick 2066
375     // Get rank and shape of inputs
376     int rank0 = left->getRank();
377     int rank1 = right->getRank();
378     const DataTypes::ShapeType& shape0 = left->getShape();
379     const DataTypes::ShapeType& shape1 = right->getShape();
380    
381     // Prepare for the loops of the product and verify compatibility of shapes
382     int start0=0, start1=0;
383     if (transpose == 0) {}
384     else if (transpose == 1) { start0 = axis_offset; }
385     else if (transpose == 2) { start1 = rank1-axis_offset; }
386     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
387    
388 jfenwick 2085 if (rank0<axis_offset)
389     {
390     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
391     }
392 jfenwick 2066
393     // Adjust the shapes for transpose
394     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
395     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
396     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
397     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
398    
399     // Prepare for the loops of the product
400     SL=1, SM=1, SR=1;
401     for (int i=0; i<rank0-axis_offset; i++) {
402     SL *= tmpShape0[i];
403     }
404     for (int i=rank0-axis_offset; i<rank0; i++) {
405     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
406     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
407     }
408     SM *= tmpShape0[i];
409     }
410     for (int i=axis_offset; i<rank1; i++) {
411     SR *= tmpShape1[i];
412     }
413    
414     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
415     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
416     { // block to limit the scope of out_index
417     int out_index=0;
418     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
419     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
420     }
421 jfenwick 2086
422     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
423     {
424     ostringstream os;
425     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
426     throw DataException(os.str());
427     }
428    
429 jfenwick 2066 return shape2;
430 jfenwick 1865 }
431    
432     } // end anonymous namespace
433    
434    
435 jfenwick 1899
436     // Return a string representing the operation
437 jfenwick 1865 const std::string&
438     opToString(ES_optype op)
439     {
440     if (op<0 || op>=ES_opcount)
441     {
442     op=UNKNOWNOP;
443     }
444     return ES_opstrings[op];
445     }
446    
447 jfenwick 2500 void DataLazy::LazyNodeSetup()
448     {
449     #ifdef _OPENMP
450     int numthreads=omp_get_max_threads();
451     m_samples.resize(numthreads*m_samplesize);
452     m_sampleids=new int[numthreads];
453     for (int i=0;i<numthreads;++i)
454     {
455     m_sampleids[i]=-1;
456     }
457     #else
458     m_samples.resize(m_samplesize);
459     m_sampleids=new int[1];
460     m_sampleids[0]=-1;
461     #endif // _OPENMP
462     }
463 jfenwick 1865
464 jfenwick 2500
465     // Creates an identity node
466 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
467 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
468 jfenwick 2500 ,m_sampleids(0),
469     m_samples(1)
470 jfenwick 1865 {
471 jfenwick 1879 if (p->isLazy())
472     {
473     // I don't want identity of Lazy.
474     // Question: Why would that be so bad?
475     // Answer: We assume that the child of ID is something we can call getVector on
476     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
477     }
478     else
479     {
480 jfenwick 2271 p->makeLazyShared();
481 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
482     makeIdentity(dr);
483 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
484 jfenwick 1879 }
485 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
486 jfenwick 1865 }
487    
488 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
489 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
490 jfenwick 2066 m_op(op),
491     m_axis_offset(0),
492     m_transpose(0),
493     m_SL(0), m_SM(0), m_SR(0)
494 jfenwick 1886 {
495 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
496 jfenwick 1886 {
497     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
498     }
499 jfenwick 2066
500 jfenwick 1886 DataLazy_ptr lleft;
501     if (!left->isLazy())
502     {
503     lleft=DataLazy_ptr(new DataLazy(left));
504     }
505     else
506     {
507     lleft=dynamic_pointer_cast<DataLazy>(left);
508     }
509 jfenwick 1889 m_readytype=lleft->m_readytype;
510 jfenwick 1886 m_left=lleft;
511     m_samplesize=getNumDPPSample()*getNoValues();
512 jfenwick 2177 m_children=m_left->m_children+1;
513     m_height=m_left->m_height+1;
514 jfenwick 2500 LazyNodeSetup();
515 jfenwick 2177 SIZELIMIT
516 jfenwick 1886 }
517    
518    
519 jfenwick 1943 // In this constructor we need to consider interpolation
520 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
521     : parent(resultFS(left,right,op), resultShape(left,right,op)),
522 jfenwick 2066 m_op(op),
523     m_SL(0), m_SM(0), m_SR(0)
524 jfenwick 1879 {
525 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
526 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
527 jfenwick 1886 {
528 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
529 jfenwick 1886 }
530 jfenwick 1943
531     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
532     {
533     FunctionSpace fs=getFunctionSpace();
534     Data ltemp(left);
535     Data tmp(ltemp,fs);
536     left=tmp.borrowDataPtr();
537     }
538 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
539 jfenwick 1943 {
540     Data tmp(Data(right),getFunctionSpace());
541     right=tmp.borrowDataPtr();
542 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
543 jfenwick 1943 }
544     left->operandCheck(*right);
545    
546 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
547 jfenwick 1879 {
548     m_left=dynamic_pointer_cast<DataLazy>(left);
549 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
550 jfenwick 1879 }
551     else
552     {
553     m_left=DataLazy_ptr(new DataLazy(left));
554 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
555 jfenwick 1879 }
556     if (right->isLazy())
557     {
558     m_right=dynamic_pointer_cast<DataLazy>(right);
559 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
560 jfenwick 1879 }
561     else
562     {
563     m_right=DataLazy_ptr(new DataLazy(right));
564 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
565 jfenwick 1879 }
566 jfenwick 1889 char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')
569     {
570     m_readytype='E';
571     }
572     else if (lt=='T' || rt=='T')
573     {
574     m_readytype='T';
575     }
576     else
577     {
578     m_readytype='C';
579     }
580 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
581 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;
583 jfenwick 2500 LazyNodeSetup();
584 jfenwick 2177 SIZELIMIT
585 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
586 jfenwick 1879 }
587    
588 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
589     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
590     m_op(op),
591     m_axis_offset(axis_offset),
592     m_transpose(transpose)
593     {
594     if ((getOpgroup(op)!=G_TENSORPROD))
595     {
596     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
597     }
598     if ((transpose>2) || (transpose<0))
599     {
600     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
601     }
602     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
603     {
604     FunctionSpace fs=getFunctionSpace();
605     Data ltemp(left);
606     Data tmp(ltemp,fs);
607     left=tmp.borrowDataPtr();
608     }
609     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
610     {
611     Data tmp(Data(right),getFunctionSpace());
612     right=tmp.borrowDataPtr();
613     }
614 jfenwick 2195 // left->operandCheck(*right);
615 jfenwick 1879
616 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
617     {
618     m_left=dynamic_pointer_cast<DataLazy>(left);
619     }
620     else
621     {
622     m_left=DataLazy_ptr(new DataLazy(left));
623     }
624     if (right->isLazy())
625     {
626     m_right=dynamic_pointer_cast<DataLazy>(right);
627     }
628     else
629     {
630     m_right=DataLazy_ptr(new DataLazy(right));
631     }
632     char lt=m_left->m_readytype;
633     char rt=m_right->m_readytype;
634     if (lt=='E' || rt=='E')
635     {
636     m_readytype='E';
637     }
638     else if (lt=='T' || rt=='T')
639     {
640     m_readytype='T';
641     }
642     else
643     {
644     m_readytype='C';
645     }
646     m_samplesize=getNumDPPSample()*getNoValues();
647 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
648     m_height=max(m_left->m_height,m_right->m_height)+1;
649 jfenwick 2500 LazyNodeSetup();
650 jfenwick 2177 SIZELIMIT
651 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
652 jfenwick 2066 }
653    
654    
655 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
656 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
657 jfenwick 2084 m_op(op),
658     m_axis_offset(axis_offset),
659 jfenwick 2147 m_transpose(0),
660     m_tol(0)
661 jfenwick 2084 {
662     if ((getOpgroup(op)!=G_NP1OUT_P))
663     {
664     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
665     }
666     DataLazy_ptr lleft;
667     if (!left->isLazy())
668     {
669     lleft=DataLazy_ptr(new DataLazy(left));
670     }
671     else
672     {
673     lleft=dynamic_pointer_cast<DataLazy>(left);
674     }
675     m_readytype=lleft->m_readytype;
676     m_left=lleft;
677     m_samplesize=getNumDPPSample()*getNoValues();
678 jfenwick 2177 m_children=m_left->m_children+1;
679     m_height=m_left->m_height+1;
680 jfenwick 2500 LazyNodeSetup();
681 jfenwick 2177 SIZELIMIT
682 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
683 jfenwick 2084 }
684    
685 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
686     : parent(left->getFunctionSpace(), left->getShape()),
687     m_op(op),
688     m_axis_offset(0),
689     m_transpose(0),
690     m_tol(tol)
691     {
692     if ((getOpgroup(op)!=G_UNARY_P))
693     {
694     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
695     }
696     DataLazy_ptr lleft;
697     if (!left->isLazy())
698     {
699     lleft=DataLazy_ptr(new DataLazy(left));
700     }
701     else
702     {
703     lleft=dynamic_pointer_cast<DataLazy>(left);
704     }
705     m_readytype=lleft->m_readytype;
706     m_left=lleft;
707     m_samplesize=getNumDPPSample()*getNoValues();
708 jfenwick 2177 m_children=m_left->m_children+1;
709     m_height=m_left->m_height+1;
710 jfenwick 2500 LazyNodeSetup();
711 jfenwick 2177 SIZELIMIT
712 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
713     }
714 jfenwick 2084
715 jfenwick 2496
716     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
717     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
718     m_op(op),
719     m_axis_offset(axis0),
720     m_transpose(axis1),
721     m_tol(0)
722     {
723     if ((getOpgroup(op)!=G_NP1OUT_2P))
724     {
725     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
726     }
727     DataLazy_ptr lleft;
728     if (!left->isLazy())
729     {
730     lleft=DataLazy_ptr(new DataLazy(left));
731     }
732     else
733     {
734     lleft=dynamic_pointer_cast<DataLazy>(left);
735     }
736     m_readytype=lleft->m_readytype;
737     m_left=lleft;
738     m_samplesize=getNumDPPSample()*getNoValues();
739     m_children=m_left->m_children+1;
740     m_height=m_left->m_height+1;
741 jfenwick 2500 LazyNodeSetup();
742 jfenwick 2496 SIZELIMIT
743     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
744     }
745    
746 jfenwick 3035
747     namespace
748     {
749    
750     inline int max3(int a, int b, int c)
751     {
752     int t=(a>b?a:b);
753     return (t>c?t:c);
754    
755     }
756     }
757    
758     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
759     : parent(left->getFunctionSpace(), left->getShape()),
760     m_op(CONDEVAL),
761     m_axis_offset(0),
762     m_transpose(0),
763     m_tol(0)
764     {
765    
766     DataLazy_ptr lmask;
767     DataLazy_ptr lleft;
768     DataLazy_ptr lright;
769     if (!mask->isLazy())
770     {
771     lmask=DataLazy_ptr(new DataLazy(mask));
772     }
773     else
774     {
775     lmask=dynamic_pointer_cast<DataLazy>(mask);
776     }
777     if (!left->isLazy())
778     {
779     lleft=DataLazy_ptr(new DataLazy(left));
780     }
781     else
782     {
783     lleft=dynamic_pointer_cast<DataLazy>(left);
784     }
785     if (!right->isLazy())
786     {
787     lright=DataLazy_ptr(new DataLazy(right));
788     }
789     else
790     {
791     lright=dynamic_pointer_cast<DataLazy>(right);
792     }
793     m_readytype=lmask->m_readytype;
794     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
795     {
796     throw DataException("Programmer Error - condEval arguments must have the same readytype");
797     }
798     m_left=lleft;
799     m_right=lright;
800     m_mask=lmask;
801     m_samplesize=getNumDPPSample()*getNoValues();
802     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
803     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
804     LazyNodeSetup();
805     SIZELIMIT
806     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
807     }
808    
809    
810    
811 jfenwick 1865 DataLazy::~DataLazy()
812     {
813 jfenwick 2500 delete[] m_sampleids;
814 jfenwick 1865 }
815    
816 jfenwick 1879
817 jfenwick 1899 /*
818     \brief Evaluates the expression using methods on Data.
819     This does the work for the collapse method.
820     For reasons of efficiency do not call this method on DataExpanded nodes.
821     */
822 jfenwick 1889 DataReady_ptr
823     DataLazy::collapseToReady()
824     {
825     if (m_readytype=='E')
826     { // this is more an efficiency concern than anything else
827     throw DataException("Programmer Error - do not use collapse on Expanded data.");
828     }
829     if (m_op==IDENTITY)
830     {
831     return m_id;
832     }
833     DataReady_ptr pleft=m_left->collapseToReady();
834     Data left(pleft);
835     Data right;
836 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
837 jfenwick 1889 {
838     right=Data(m_right->collapseToReady());
839     }
840     Data result;
841     switch(m_op)
842     {
843     case ADD:
844     result=left+right;
845     break;
846     case SUB:
847     result=left-right;
848     break;
849     case MUL:
850     result=left*right;
851     break;
852     case DIV:
853     result=left/right;
854     break;
855     case SIN:
856     result=left.sin();
857     break;
858     case COS:
859     result=left.cos();
860     break;
861     case TAN:
862     result=left.tan();
863     break;
864     case ASIN:
865     result=left.asin();
866     break;
867     case ACOS:
868     result=left.acos();
869     break;
870     case ATAN:
871     result=left.atan();
872     break;
873     case SINH:
874     result=left.sinh();
875     break;
876     case COSH:
877     result=left.cosh();
878     break;
879     case TANH:
880     result=left.tanh();
881     break;
882     case ERF:
883     result=left.erf();
884     break;
885     case ASINH:
886     result=left.asinh();
887     break;
888     case ACOSH:
889     result=left.acosh();
890     break;
891     case ATANH:
892     result=left.atanh();
893     break;
894     case LOG10:
895     result=left.log10();
896     break;
897     case LOG:
898     result=left.log();
899     break;
900     case SIGN:
901     result=left.sign();
902     break;
903     case ABS:
904     result=left.abs();
905     break;
906     case NEG:
907     result=left.neg();
908     break;
909     case POS:
910     // it doesn't mean anything for delayed.
911     // it will just trigger a deep copy of the lazy object
912     throw DataException("Programmer error - POS not supported for lazy data.");
913     break;
914     case EXP:
915     result=left.exp();
916     break;
917     case SQRT:
918     result=left.sqrt();
919     break;
920     case RECIP:
921     result=left.oneOver();
922     break;
923     case GZ:
924     result=left.wherePositive();
925     break;
926     case LZ:
927     result=left.whereNegative();
928     break;
929     case GEZ:
930     result=left.whereNonNegative();
931     break;
932     case LEZ:
933     result=left.whereNonPositive();
934     break;
935 jfenwick 2147 case NEZ:
936     result=left.whereNonZero(m_tol);
937     break;
938     case EZ:
939     result=left.whereZero(m_tol);
940     break;
941 jfenwick 2037 case SYM:
942     result=left.symmetric();
943     break;
944     case NSYM:
945     result=left.nonsymmetric();
946     break;
947 jfenwick 2066 case PROD:
948     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
949     break;
950 jfenwick 2084 case TRANS:
951     result=left.transpose(m_axis_offset);
952     break;
953     case TRACE:
954     result=left.trace(m_axis_offset);
955     break;
956 jfenwick 2496 case SWAP:
957     result=left.swapaxes(m_axis_offset, m_transpose);
958     break;
959 jfenwick 2721 case MINVAL:
960     result=left.minval();
961     break;
962     case MAXVAL:
963     result=left.minval();
964     break;
965 jfenwick 1889 default:
966 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
967 jfenwick 1889 }
968     return result.borrowReadyPtr();
969     }
970    
971 jfenwick 1899 /*
972     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
973     This method uses the original methods on the Data class to evaluate the expressions.
974     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
975     the purpose of using DataLazy in the first place).
976     */
977 jfenwick 1889 void
978     DataLazy::collapse()
979     {
980     if (m_op==IDENTITY)
981     {
982     return;
983     }
984     if (m_readytype=='E')
985     { // this is more an efficiency concern than anything else
986     throw DataException("Programmer Error - do not use collapse on Expanded data.");
987     }
988     m_id=collapseToReady();
989     m_op=IDENTITY;
990     }
991    
992 jfenwick 1899
993 jfenwick 1889
994    
995    
996 jfenwick 2147
997 phornby 1987 #define PROC_OP(TYPE,X) \
998 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
999     {\
1000     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1001     { \
1002     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1003 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1004 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1005 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1006 jfenwick 2152 lroffset+=leftstep; \
1007     rroffset+=rightstep; \
1008     }\
1009     lroffset+=oleftstep;\
1010     rroffset+=orightstep;\
1011 jfenwick 1889 }
1012    
1013 jfenwick 1899
1014 jfenwick 2500 // The result will be stored in m_samples
1015     // The return value is a pointer to the DataVector, offset is the offset within the return value
1016     const DataTypes::ValueType*
1017     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1018     {
1019     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1020     // collapse so we have a 'E' node or an IDENTITY for some other type
1021     if (m_readytype!='E' && m_op!=IDENTITY)
1022     {
1023     collapse();
1024     }
1025     if (m_op==IDENTITY)
1026     {
1027     const ValueType& vec=m_id->getVectorRO();
1028     roffset=m_id->getPointOffset(sampleNo, 0);
1029 jfenwick 2777 #ifdef LAZY_STACK_PROF
1030     int x;
1031     if (&x<stackend[omp_get_thread_num()])
1032     {
1033     stackend[omp_get_thread_num()]=&x;
1034     }
1035     #endif
1036 jfenwick 2500 return &(vec);
1037     }
1038     if (m_readytype!='E')
1039     {
1040     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1041     }
1042 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1043     {
1044     roffset=tid*m_samplesize;
1045     return &(m_samples); // sample is already resolved
1046     }
1047     m_sampleids[tid]=sampleNo;
1048 jfenwick 3035
1049 jfenwick 2500 switch (getOpgroup(m_op))
1050     {
1051     case G_UNARY:
1052     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1053     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1054     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1055     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1056     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1057     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1058 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1059 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1060 jfenwick 2500 default:
1061     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1062     }
1063     }
1064    
1065     const DataTypes::ValueType*
1066     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1067     {
1068     // we assume that any collapsing has been done before we get here
1069     // since we only have one argument we don't need to think about only
1070     // processing single points.
1071     // we will also know we won't get identity nodes
1072     if (m_readytype!='E')
1073     {
1074     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1075     }
1076     if (m_op==IDENTITY)
1077     {
1078     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1079     }
1080     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1081     const double* left=&((*leftres)[roffset]);
1082     roffset=m_samplesize*tid;
1083     double* result=&(m_samples[roffset]);
1084     switch (m_op)
1085     {
1086     case SIN:
1087     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1088     break;
1089     case COS:
1090     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1091     break;
1092     case TAN:
1093     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1094     break;
1095     case ASIN:
1096     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1097     break;
1098     case ACOS:
1099     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1100     break;
1101     case ATAN:
1102     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1103     break;
1104     case SINH:
1105     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1106     break;
1107     case COSH:
1108     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1109     break;
1110     case TANH:
1111     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1112     break;
1113     case ERF:
1114     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1115     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1116     #else
1117     tensor_unary_operation(m_samplesize, left, result, ::erf);
1118     break;
1119     #endif
1120     case ASINH:
1121     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1122     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1123     #else
1124     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1125     #endif
1126     break;
1127     case ACOSH:
1128     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1129     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1130     #else
1131     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1132     #endif
1133     break;
1134     case ATANH:
1135     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1136     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1137     #else
1138     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1139     #endif
1140     break;
1141     case LOG10:
1142     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1143     break;
1144     case LOG:
1145     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1146     break;
1147     case SIGN:
1148     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1149     break;
1150     case ABS:
1151     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1152     break;
1153     case NEG:
1154     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1155     break;
1156     case POS:
1157     // it doesn't mean anything for delayed.
1158     // it will just trigger a deep copy of the lazy object
1159     throw DataException("Programmer error - POS not supported for lazy data.");
1160     break;
1161     case EXP:
1162     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1163     break;
1164     case SQRT:
1165     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1166     break;
1167     case RECIP:
1168     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1169     break;
1170     case GZ:
1171     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1172     break;
1173     case LZ:
1174     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1175     break;
1176     case GEZ:
1177     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1178     break;
1179     case LEZ:
1180     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1181     break;
1182     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1183     case NEZ:
1184     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1185     break;
1186     case EZ:
1187     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1188     break;
1189    
1190     default:
1191     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1192     }
1193     return &(m_samples);
1194     }
1195    
1196    
1197     const DataTypes::ValueType*
1198 jfenwick 2721 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1199     {
1200     // we assume that any collapsing has been done before we get here
1201     // since we only have one argument we don't need to think about only
1202     // processing single points.
1203     // we will also know we won't get identity nodes
1204     if (m_readytype!='E')
1205     {
1206     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1207     }
1208     if (m_op==IDENTITY)
1209     {
1210     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1211     }
1212     size_t loffset=0;
1213     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1214    
1215     roffset=m_samplesize*tid;
1216 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1217     unsigned int psize=DataTypes::noValues(getShape());
1218 jfenwick 2721 double* result=&(m_samples[roffset]);
1219     switch (m_op)
1220     {
1221     case MINVAL:
1222     {
1223 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1224     {
1225     FMin op;
1226     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1227     loffset+=psize;
1228     result++;
1229     }
1230 jfenwick 2721 }
1231     break;
1232     case MAXVAL:
1233     {
1234 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1235     {
1236 jfenwick 2721 FMax op;
1237     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1238 jfenwick 2734 loffset+=psize;
1239     result++;
1240     }
1241 jfenwick 2721 }
1242     break;
1243     default:
1244     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1245     }
1246     return &(m_samples);
1247     }
1248    
1249     const DataTypes::ValueType*
1250 jfenwick 2500 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1251     {
1252     // we assume that any collapsing has been done before we get here
1253     // since we only have one argument we don't need to think about only
1254     // processing single points.
1255     if (m_readytype!='E')
1256     {
1257     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1258     }
1259     if (m_op==IDENTITY)
1260     {
1261     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1262     }
1263     size_t subroffset;
1264     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1265     roffset=m_samplesize*tid;
1266     size_t loop=0;
1267     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1268     size_t step=getNoValues();
1269     size_t offset=roffset;
1270     switch (m_op)
1271     {
1272     case SYM:
1273     for (loop=0;loop<numsteps;++loop)
1274     {
1275     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1276     subroffset+=step;
1277     offset+=step;
1278     }
1279     break;
1280     case NSYM:
1281     for (loop=0;loop<numsteps;++loop)
1282     {
1283     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1284     subroffset+=step;
1285     offset+=step;
1286     }
1287     break;
1288     default:
1289     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1290     }
1291     return &m_samples;
1292     }
1293    
1294     const DataTypes::ValueType*
1295     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1296     {
1297     // we assume that any collapsing has been done before we get here
1298     // since we only have one argument we don't need to think about only
1299     // processing single points.
1300     if (m_readytype!='E')
1301     {
1302     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1303     }
1304     if (m_op==IDENTITY)
1305     {
1306     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1307     }
1308     size_t subroffset;
1309     size_t offset;
1310     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1311     roffset=m_samplesize*tid;
1312     offset=roffset;
1313     size_t loop=0;
1314     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1315     size_t outstep=getNoValues();
1316     size_t instep=m_left->getNoValues();
1317     switch (m_op)
1318     {
1319     case TRACE:
1320     for (loop=0;loop<numsteps;++loop)
1321     {
1322     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1323     subroffset+=instep;
1324     offset+=outstep;
1325     }
1326     break;
1327     case TRANS:
1328     for (loop=0;loop<numsteps;++loop)
1329     {
1330     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1331     subroffset+=instep;
1332     offset+=outstep;
1333     }
1334     break;
1335     default:
1336     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1337     }
1338     return &m_samples;
1339     }
1340    
1341    
1342     const DataTypes::ValueType*
1343     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1344     {
1345     if (m_readytype!='E')
1346     {
1347     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1348     }
1349     if (m_op==IDENTITY)
1350     {
1351     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1352     }
1353     size_t subroffset;
1354     size_t offset;
1355     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1356     roffset=m_samplesize*tid;
1357     offset=roffset;
1358     size_t loop=0;
1359     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1360     size_t outstep=getNoValues();
1361     size_t instep=m_left->getNoValues();
1362     switch (m_op)
1363     {
1364     case SWAP:
1365     for (loop=0;loop<numsteps;++loop)
1366     {
1367     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1368     subroffset+=instep;
1369     offset+=outstep;
1370     }
1371     break;
1372     default:
1373     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1374     }
1375     return &m_samples;
1376     }
1377    
1378 jfenwick 3035 const DataTypes::ValueType*
1379     DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1380     {
1381     if (m_readytype!='E')
1382     {
1383     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1384     }
1385     if (m_op!=CONDEVAL)
1386     {
1387     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1388     }
1389     size_t subroffset;
1390     size_t offset;
1391 jfenwick 2500
1392    
1393 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394     const ValueType* srcres=0;
1395     if ((*maskres)[subroffset]>0)
1396     {
1397     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398     }
1399     else
1400     {
1401     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402     }
1403    
1404     // Now we need to copy the result
1405    
1406     roffset=m_samplesize*tid;
1407     offset=roffset;
1408     for (int i=0;i<m_samplesize;++i)
1409     {
1410     m_samples[roffset+i]=(*srcres)[subroffset+i];
1411     }
1412    
1413     return &m_samples;
1414     }
1415    
1416 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418     // If both children are expanded, then we can process them in a single operation (we treat
1419     // the whole sample as one big datapoint.
1420     // If one of the children is not expanded, then we need to treat each point in the sample
1421     // individually.
1422     // There is an additional complication when scalar operations are considered.
1423     // For example, 2+Vector.
1424     // In this case each double within the point is treated individually
1425     const DataTypes::ValueType*
1426     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1427     {
1428     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
1430     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1431     // first work out which of the children are expanded
1432     bool leftExp=(m_left->m_readytype=='E');
1433     bool rightExp=(m_right->m_readytype=='E');
1434     if (!leftExp && !rightExp)
1435     {
1436     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1437     }
1438     bool leftScalar=(m_left->getRank()==0);
1439     bool rightScalar=(m_right->getRank()==0);
1440     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1441     {
1442     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1443     }
1444     size_t leftsize=m_left->getNoValues();
1445     size_t rightsize=m_right->getNoValues();
1446     size_t chunksize=1; // how many doubles will be processed in one go
1447     int leftstep=0; // how far should the left offset advance after each step
1448     int rightstep=0;
1449     int numsteps=0; // total number of steps for the inner loop
1450     int oleftstep=0; // the o variables refer to the outer loop
1451     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1452     int onumsteps=1;
1453    
1454     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1455     bool RES=(rightExp && rightScalar);
1456     bool LS=(!leftExp && leftScalar); // left is a single scalar
1457     bool RS=(!rightExp && rightScalar);
1458     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1459     bool RN=(!rightExp && !rightScalar);
1460     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1461     bool REN=(rightExp && !rightScalar);
1462    
1463     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1464     {
1465     chunksize=m_left->getNumDPPSample()*leftsize;
1466     leftstep=0;
1467     rightstep=0;
1468     numsteps=1;
1469     }
1470     else if (LES || RES)
1471     {
1472     chunksize=1;
1473     if (LES) // left is an expanded scalar
1474     {
1475     if (RS)
1476     {
1477     leftstep=1;
1478     rightstep=0;
1479     numsteps=m_left->getNumDPPSample();
1480     }
1481     else // RN or REN
1482     {
1483     leftstep=0;
1484     oleftstep=1;
1485     rightstep=1;
1486     orightstep=(RN ? -(int)rightsize : 0);
1487     numsteps=rightsize;
1488     onumsteps=m_left->getNumDPPSample();
1489     }
1490     }
1491     else // right is an expanded scalar
1492     {
1493     if (LS)
1494     {
1495     rightstep=1;
1496     leftstep=0;
1497     numsteps=m_right->getNumDPPSample();
1498     }
1499     else
1500     {
1501     rightstep=0;
1502     orightstep=1;
1503     leftstep=1;
1504     oleftstep=(LN ? -(int)leftsize : 0);
1505     numsteps=leftsize;
1506     onumsteps=m_right->getNumDPPSample();
1507     }
1508     }
1509     }
1510     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1511     {
1512     if (LEN) // and Right will be a single value
1513     {
1514     chunksize=rightsize;
1515     leftstep=rightsize;
1516     rightstep=0;
1517     numsteps=m_left->getNumDPPSample();
1518     if (RS)
1519     {
1520     numsteps*=leftsize;
1521     }
1522     }
1523     else // REN
1524     {
1525     chunksize=leftsize;
1526     rightstep=leftsize;
1527     leftstep=0;
1528     numsteps=m_right->getNumDPPSample();
1529     if (LS)
1530     {
1531     numsteps*=rightsize;
1532     }
1533     }
1534     }
1535    
1536     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1537     // Get the values of sub-expressions
1538     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1539     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1540     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1543     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1544     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1545     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1547    
1548     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    
1552     roffset=m_samplesize*tid;
1553     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1554     switch(m_op)
1555     {
1556     case ADD:
1557     PROC_OP(NO_ARG,plus<double>());
1558     break;
1559     case SUB:
1560     PROC_OP(NO_ARG,minus<double>());
1561     break;
1562     case MUL:
1563     PROC_OP(NO_ARG,multiplies<double>());
1564     break;
1565     case DIV:
1566     PROC_OP(NO_ARG,divides<double>());
1567     break;
1568     case POW:
1569     PROC_OP(double (double,double),::pow);
1570     break;
1571     default:
1572     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573     }
1574     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575     return &m_samples;
1576     }
1577    
1578    
1579     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581     // unlike the other resolve helpers, we must treat these datapoints separately.
1582     const DataTypes::ValueType*
1583     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1584     {
1585     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
1587     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1588     // first work out which of the children are expanded
1589     bool leftExp=(m_left->m_readytype=='E');
1590     bool rightExp=(m_right->m_readytype=='E');
1591     int steps=getNumDPPSample();
1592     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1593     int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    
1595     int resultStep=getNoValues();
1596     roffset=m_samplesize*tid;
1597     size_t offset=roffset;
1598    
1599     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1602    
1603     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1604     cout << getNoValues() << endl;)
1605    
1606    
1607     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1610     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1611     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1612     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1616     switch(m_op)
1617     {
1618     case PROD:
1619     for (int i=0;i<steps;++i,resultp+=resultStep)
1620     {
1621     const double *ptr_0 = &((*left)[lroffset]);
1622     const double *ptr_1 = &((*right)[rroffset]);
1623    
1624     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629     lroffset+=leftStep;
1630     rroffset+=rightStep;
1631     }
1632     break;
1633     default:
1634     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635     }
1636     roffset=offset;
1637     return &m_samples;
1638     }
1639    
1640 jfenwick 1898
1641     const DataTypes::ValueType*
1642 jfenwick 2770 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1643 jfenwick 1879 {
1644 jfenwick 2271 #ifdef _OPENMP
1645     int tid=omp_get_thread_num();
1646     #else
1647     int tid=0;
1648     #endif
1649 jfenwick 2777
1650     #ifdef LAZY_STACK_PROF
1651     stackstart[tid]=&tid;
1652     stackend[tid]=&tid;
1653     const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655     #pragma omp critical
1656     if (d>maxstackuse)
1657     {
1658     cout << "Max resolve Stack use " << d << endl;
1659     maxstackuse=d;
1660     }
1661     return r;
1662     #else
1663 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1664 jfenwick 2777 #endif
1665 jfenwick 2271 }
1666    
1667    
1668 jfenwick 2497 // This needs to do the work of the identity constructor
1669 jfenwick 2177 void
1670     DataLazy::resolveToIdentity()
1671     {
1672     if (m_op==IDENTITY)
1673     return;
1674 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1675 jfenwick 2177 makeIdentity(p);
1676     }
1677 jfenwick 1889
1678 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1679     {
1680     m_op=IDENTITY;
1681     m_axis_offset=0;
1682     m_transpose=0;
1683     m_SL=m_SM=m_SR=0;
1684     m_children=m_height=0;
1685     m_id=p;
1686     if(p->isConstant()) {m_readytype='C';}
1687     else if(p->isExpanded()) {m_readytype='E';}
1688     else if (p->isTagged()) {m_readytype='T';}
1689     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1690     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1691     m_left.reset();
1692     m_right.reset();
1693     }
1694    
1695 jfenwick 2497
1696     DataReady_ptr
1697     DataLazy::resolve()
1698     {
1699     resolveToIdentity();
1700     return m_id;
1701     }
1702    
1703 jfenwick 2799
1704     /* This is really a static method but I think that caused problems in windows */
1705     void
1706     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707     {
1708     if (dats.empty())
1709     {
1710     return;
1711     }
1712     vector<DataLazy*> work;
1713     FunctionSpace fs=dats[0]->getFunctionSpace();
1714     bool match=true;
1715 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1716 jfenwick 2799 {
1717     if (dats[i]->m_readytype!='E')
1718     {
1719     dats[i]->collapse();
1720     }
1721     if (dats[i]->m_op!=IDENTITY)
1722     {
1723     work.push_back(dats[i]);
1724     if (fs!=dats[i]->getFunctionSpace())
1725     {
1726     match=false;
1727     }
1728     }
1729     }
1730     if (work.empty())
1731     {
1732     return; // no work to do
1733     }
1734     if (match) // all functionspaces match. Yes I realise this is overly strict
1735     { // it is possible that dats[0] is one of the objects which we discarded and
1736     // all the other functionspaces match.
1737     vector<DataExpanded*> dep;
1738     vector<ValueType*> vecs;
1739     for (int i=0;i<work.size();++i)
1740     {
1741     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742     vecs.push_back(&(dep[i]->getVectorRW()));
1743     }
1744     int totalsamples=work[0]->getNumSamples();
1745     const ValueType* res=0; // Storage for answer
1746     int sample;
1747     #pragma omp parallel private(sample, res)
1748     {
1749     size_t roffset=0;
1750     #pragma omp for schedule(static)
1751     for (sample=0;sample<totalsamples;++sample)
1752     {
1753     roffset=0;
1754     int j;
1755 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1756 jfenwick 2799 {
1757     #ifdef _OPENMP
1758     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759     #else
1760     res=work[j]->resolveNodeSample(0,sample,roffset);
1761     #endif
1762     DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764     }
1765     }
1766     }
1767     // Now we need to load the new results as identity ops into the lazy nodes
1768 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1769 jfenwick 2799 {
1770     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771     }
1772     }
1773     else // functionspaces do not match
1774     {
1775     for (int i=0;i<work.size();++i)
1776     {
1777     work[i]->resolveToIdentity();
1778     }
1779     }
1780     }
1781    
1782    
1783    
1784 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1785     DataReady_ptr
1786     DataLazy::resolveNodeWorker()
1787     {
1788     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789     {
1790     collapse();
1791     }
1792     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1793     {
1794     return m_id;
1795     }
1796     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1797     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1798     ValueType& resvec=result->getVectorRW();
1799     DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801     int sample;
1802     int totalsamples=getNumSamples();
1803     const ValueType* res=0; // Storage for answer
1804     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805 jfenwick 2777 #pragma omp parallel private(sample,res)
1806 jfenwick 2500 {
1807 jfenwick 2777 size_t roffset=0;
1808     #ifdef LAZY_STACK_PROF
1809     stackstart[omp_get_thread_num()]=&roffset;
1810     stackend[omp_get_thread_num()]=&roffset;
1811     #endif
1812     #pragma omp for schedule(static)
1813     for (sample=0;sample<totalsamples;++sample)
1814     {
1815     roffset=0;
1816 jfenwick 2500 #ifdef _OPENMP
1817 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818 jfenwick 2500 #else
1819 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1820 jfenwick 2500 #endif
1821     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823 jfenwick 2777 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825     }
1826 jfenwick 2500 }
1827 jfenwick 2777 #ifdef LAZY_STACK_PROF
1828     for (int i=0;i<getNumberOfThreads();++i)
1829     {
1830     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1832     if (r>maxstackuse)
1833     {
1834     maxstackuse=r;
1835     }
1836     }
1837     cout << "Max resolve Stack use=" << maxstackuse << endl;
1838     #endif
1839 jfenwick 2500 return resptr;
1840     }
1841    
1842 jfenwick 1865 std::string
1843     DataLazy::toString() const
1844     {
1845 jfenwick 1886 ostringstream oss;
1846 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1847 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1848 jfenwick 2737 {
1849 jfenwick 2795 case 1: // tree format
1850 jfenwick 2737 oss << endl;
1851 jfenwick 2795 intoTreeString(oss,"");
1852     break;
1853     case 2: // just the depth
1854     break;
1855     default:
1856     intoString(oss);
1857     break;
1858 jfenwick 2737 }
1859 jfenwick 1886 return oss.str();
1860 jfenwick 1865 }
1861    
1862 jfenwick 1899
1863 jfenwick 1886 void
1864     DataLazy::intoString(ostringstream& oss) const
1865     {
1866 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1867 jfenwick 1886 switch (getOpgroup(m_op))
1868     {
1869 jfenwick 1889 case G_IDENTITY:
1870     if (m_id->isExpanded())
1871     {
1872     oss << "E";
1873     }
1874     else if (m_id->isTagged())
1875     {
1876     oss << "T";
1877     }
1878     else if (m_id->isConstant())
1879     {
1880     oss << "C";
1881     }
1882     else
1883     {
1884     oss << "?";
1885     }
1886 jfenwick 1886 oss << '@' << m_id.get();
1887     break;
1888     case G_BINARY:
1889     oss << '(';
1890     m_left->intoString(oss);
1891     oss << ' ' << opToString(m_op) << ' ';
1892     m_right->intoString(oss);
1893     oss << ')';
1894     break;
1895     case G_UNARY:
1896 jfenwick 2147 case G_UNARY_P:
1897 jfenwick 2037 case G_NP1OUT:
1898 jfenwick 2084 case G_NP1OUT_P:
1899 jfenwick 2721 case G_REDUCTION:
1900 jfenwick 1886 oss << opToString(m_op) << '(';
1901     m_left->intoString(oss);
1902     oss << ')';
1903     break;
1904 jfenwick 2066 case G_TENSORPROD:
1905     oss << opToString(m_op) << '(';
1906     m_left->intoString(oss);
1907     oss << ", ";
1908     m_right->intoString(oss);
1909     oss << ')';
1910     break;
1911 jfenwick 2496 case G_NP1OUT_2P:
1912     oss << opToString(m_op) << '(';
1913     m_left->intoString(oss);
1914     oss << ", " << m_axis_offset << ", " << m_transpose;
1915     oss << ')';
1916     break;
1917 jfenwick 3035 case G_CONDEVAL:
1918     oss << opToString(m_op)<< '(' ;
1919     m_mask->intoString(oss);
1920     oss << " ? ";
1921     m_left->intoString(oss);
1922     oss << " : ";
1923     m_right->intoString(oss);
1924     oss << ')';
1925     break;
1926 jfenwick 1886 default:
1927     oss << "UNKNOWN";
1928     }
1929     }
1930    
1931 jfenwick 2737
1932     void
1933     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934     {
1935     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936     switch (getOpgroup(m_op))
1937     {
1938     case G_IDENTITY:
1939     if (m_id->isExpanded())
1940     {
1941     oss << "E";
1942     }
1943     else if (m_id->isTagged())
1944     {
1945     oss << "T";
1946     }
1947     else if (m_id->isConstant())
1948     {
1949     oss << "C";
1950     }
1951     else
1952     {
1953     oss << "?";
1954     }
1955     oss << '@' << m_id.get() << endl;
1956     break;
1957     case G_BINARY:
1958     oss << opToString(m_op) << endl;
1959     indent+='.';
1960     m_left->intoTreeString(oss, indent);
1961     m_right->intoTreeString(oss, indent);
1962     break;
1963     case G_UNARY:
1964     case G_UNARY_P:
1965     case G_NP1OUT:
1966     case G_NP1OUT_P:
1967     case G_REDUCTION:
1968     oss << opToString(m_op) << endl;
1969     indent+='.';
1970     m_left->intoTreeString(oss, indent);
1971     break;
1972     case G_TENSORPROD:
1973     oss << opToString(m_op) << endl;
1974     indent+='.';
1975     m_left->intoTreeString(oss, indent);
1976     m_right->intoTreeString(oss, indent);
1977     break;
1978     case G_NP1OUT_2P:
1979     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980     indent+='.';
1981     m_left->intoTreeString(oss, indent);
1982     break;
1983     default:
1984     oss << "UNKNOWN";
1985     }
1986     }
1987    
1988    
1989 jfenwick 1865 DataAbstract*
1990     DataLazy::deepCopy()
1991     {
1992 jfenwick 1935 switch (getOpgroup(m_op))
1993 jfenwick 1865 {
1994 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1995 jfenwick 2721 case G_UNARY:
1996     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2002     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003 jfenwick 1935 default:
2004     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005 jfenwick 1865 }
2006     }
2007    
2008    
2009 jfenwick 2721
2010 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2011     // Instances of DataReady can look at the size of their vectors.
2012     // For lazy though, it could be the size the data would be if it were resolved;
2013     // or it could be some function of the lengths of the DataReady instances which
2014     // form part of the expression.
2015     // Rather than have people making assumptions, I have disabled the method.
2016 jfenwick 1865 DataTypes::ValueType::size_type
2017     DataLazy::getLength() const
2018     {
2019 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2020 jfenwick 1865 }
2021    
2022    
2023     DataAbstract*
2024     DataLazy::getSlice(const DataTypes::RegionType& region) const
2025     {
2026 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2027 jfenwick 1865 }
2028    
2029 jfenwick 1935
2030     // To do this we need to rely on our child nodes
2031 jfenwick 1865 DataTypes::ValueType::size_type
2032     DataLazy::getPointOffset(int sampleNo,
2033 jfenwick 1935 int dataPointNo)
2034     {
2035     if (m_op==IDENTITY)
2036     {
2037     return m_id->getPointOffset(sampleNo,dataPointNo);
2038     }
2039     if (m_readytype!='E')
2040     {
2041     collapse();
2042     return m_id->getPointOffset(sampleNo,dataPointNo);
2043     }
2044     // at this point we do not have an identity node and the expression will be Expanded
2045     // so we only need to know which child to ask
2046     if (m_left->m_readytype=='E')
2047     {
2048     return m_left->getPointOffset(sampleNo,dataPointNo);
2049     }
2050     else
2051     {
2052     return m_right->getPointOffset(sampleNo,dataPointNo);
2053     }
2054     }
2055    
2056     // To do this we need to rely on our child nodes
2057     DataTypes::ValueType::size_type
2058     DataLazy::getPointOffset(int sampleNo,
2059 jfenwick 1865 int dataPointNo) const
2060     {
2061 jfenwick 1935 if (m_op==IDENTITY)
2062     {
2063     return m_id->getPointOffset(sampleNo,dataPointNo);
2064     }
2065     if (m_readytype=='E')
2066     {
2067     // at this point we do not have an identity node and the expression will be Expanded
2068     // so we only need to know which child to ask
2069     if (m_left->m_readytype=='E')
2070     {
2071     return m_left->getPointOffset(sampleNo,dataPointNo);
2072     }
2073     else
2074     {
2075     return m_right->getPointOffset(sampleNo,dataPointNo);
2076     }
2077     }
2078     if (m_readytype=='C')
2079     {
2080     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2081     }
2082     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2083 jfenwick 1865 }
2084    
2085 jfenwick 2271
2086     // I have decided to let Data:: handle this issue.
2087 jfenwick 1901 void
2088     DataLazy::setToZero()
2089     {
2090 jfenwick 2271 // DataTypes::ValueType v(getNoValues(),0);
2091     // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2092     // m_op=IDENTITY;
2093     // m_right.reset();
2094     // m_left.reset();
2095     // m_readytype='C';
2096     // m_buffsRequired=1;
2097    
2098 jfenwick 2514 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
2099 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2100 jfenwick 1901 }
2101    
2102 jfenwick 2271 bool
2103     DataLazy::actsExpanded() const
2104     {
2105     return (m_readytype=='E');
2106     }
2107    
2108 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26