/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5448 - (hide annotations)
Fri Feb 6 05:31:37 2015 UTC (4 years, 2 months ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 60857 byte(s)
Updating all the dates
1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5448 * Copyright (c) 2003-2015 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17    
18     #include "DataLazy.h"
19 caltinay 3317 #include "esysUtils/Esys_MPI.h"
20 jfenwick 1885 #ifdef _OPENMP
21     #include <omp.h>
22     #endif
23 jfenwick 1865 #include "FunctionSpace.h"
24     #include "DataTypes.h"
25 jfenwick 1868 #include "Data.h"
26 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
27 jfenwick 1950 #include "Utils.h"
28 jfenwick 1865
29 jfenwick 2472 #include "EscriptParams.h"
30    
31 caltinay 3317 #ifdef USE_NETCDF
32     #include <netcdfcpp.h>
33     #endif
34    
35 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
36    
37 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
38 jfenwick 2092 #define LAZYDEBUG(X)
39 jfenwick 2157 namespace
40     {
41     bool privdebug=false;
42 jfenwick 2092
43 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
44     #define DISABLEDEBUG privdebug=false;
45     }
46    
47 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
48 jfenwick 2177
49 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
50 jfenwick 2472
51 jfenwick 2501
52 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
53    
54 jfenwick 1899 /*
55     How does DataLazy work?
56     ~~~~~~~~~~~~~~~~~~~~~~~
57    
58     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59     denoted left and right.
60    
61     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62     This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65 caltinay 4286 Note that IDENTITY is not considered a unary operation.
66 jfenwick 1899
67     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68     It must however form a DAG (directed acyclic graph).
69     I will refer to individual DataLazy objects with the structure as nodes.
70    
71     Each node also stores:
72     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73     evaluated.
74 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75 jfenwick 1899 evaluate the expression.
76     - m_samplesize ~ the number of doubles stored in a sample.
77    
78     When a new node is created, the above values are computed based on the values in the child nodes.
79     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81     The resolve method, which produces a DataReady from a DataLazy, does the following:
82     1) Create a DataReady to hold the new result.
83     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88     resolveSample returns a Vector* and an offset within that vector where the result is stored.
89     Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95 jfenwick 2037
96 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97 jfenwick 2037 1) Add to the ES_optype.
98     2) determine what opgroup your operation belongs to (X)
99     3) add a string for the op to the end of ES_opstrings
100     4) increase ES_opcount
101     5) add an entry (X) to opgroups
102     6) add an entry to the switch in collapseToReady
103     7) add an entry to resolveX
104 jfenwick 1899 */
105    
106    
107 jfenwick 1865 using namespace std;
108 jfenwick 1868 using namespace boost;
109 jfenwick 1865
110     namespace escript
111     {
112    
113     namespace
114     {
115    
116 jfenwick 2777
117     // enabling this will print out when ever the maximum stacksize used by resolve increases
118     // it assumes _OPENMP is also in use
119     //#define LAZY_STACK_PROF
120    
121    
122    
123     #ifndef _OPENMP
124     #ifdef LAZY_STACK_PROF
125     #undef LAZY_STACK_PROF
126     #endif
127     #endif
128    
129    
130     #ifdef LAZY_STACK_PROF
131     std::vector<void*> stackstart(getNumberOfThreads());
132     std::vector<void*> stackend(getNumberOfThreads());
133     size_t maxstackuse=0;
134     #endif
135    
136 jfenwick 1886 enum ES_opgroup
137     {
138     G_UNKNOWN,
139     G_IDENTITY,
140 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
141 jfenwick 2037 G_UNARY, // pointwise operations with one argument
142 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
143 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
144 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
145 jfenwick 2496 G_TENSORPROD, // general tensor product
146 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
147 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
148     G_CONDEVAL
149 jfenwick 1886 };
150    
151    
152    
153    
154 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
155     "sin","cos","tan",
156 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
157     "asinh","acosh","atanh",
158 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
159 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
160 jfenwick 2066 "symmetric","nonsymmetric",
161 jfenwick 2084 "prod",
162 jfenwick 2496 "transpose", "trace",
163 jfenwick 2721 "swapaxes",
164 jfenwick 3035 "minval", "maxval",
165     "condEval"};
166     int ES_opcount=44;
167 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168     G_UNARY,G_UNARY,G_UNARY, //10
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
170     G_UNARY,G_UNARY,G_UNARY, // 20
171 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
172     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
173 jfenwick 2066 G_NP1OUT,G_NP1OUT,
174 jfenwick 2084 G_TENSORPROD,
175 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
176 jfenwick 2721 G_NP1OUT_2P,
177 jfenwick 3035 G_REDUCTION, G_REDUCTION,
178     G_CONDEVAL};
179 jfenwick 1886 inline
180     ES_opgroup
181     getOpgroup(ES_optype op)
182     {
183     return opgroups[op];
184     }
185    
186 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
187     FunctionSpace
188     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189     {
190     // perhaps this should call interpolate and throw or something?
191     // maybe we need an interpolate node -
192     // that way, if interpolate is required in any other op we can just throw a
193     // programming error exception.
194 jfenwick 1879
195 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
196     FunctionSpace r=right->getFunctionSpace();
197     if (l!=r)
198     {
199 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
200     if (res==1)
201 jfenwick 1943 {
202     return l;
203     }
204 jfenwick 4264 if (res==-1)
205 jfenwick 1943 {
206     return r;
207     }
208     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
209     }
210     return l;
211 jfenwick 1865 }
212    
213     // return the shape of the result of "left op right"
214 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
215 jfenwick 1865 DataTypes::ShapeType
216     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
217     {
218 jfenwick 1879 if (left->getShape()!=right->getShape())
219     {
220 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
221 jfenwick 1908 {
222     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
223     }
224 jfenwick 2721
225 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
226     {
227     return right->getShape();
228     }
229     if (right->getRank()==0)
230     {
231     return left->getShape();
232     }
233     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
234 jfenwick 1879 }
235     return left->getShape();
236 jfenwick 1865 }
237    
238 jfenwick 2084 // return the shape for "op left"
239    
240     DataTypes::ShapeType
241 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
242 jfenwick 2084 {
243     switch(op)
244     {
245     case TRANS:
246 jfenwick 2199 { // for the scoping of variables
247     const DataTypes::ShapeType& s=left->getShape();
248     DataTypes::ShapeType sh;
249     int rank=left->getRank();
250     if (axis_offset<0 || axis_offset>rank)
251     {
252 caltinay 2779 stringstream e;
253     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
254     throw DataException(e.str());
255     }
256     for (int i=0; i<rank; i++)
257 jfenwick 2199 {
258     int index = (axis_offset+i)%rank;
259 caltinay 2779 sh.push_back(s[index]); // Append to new shape
260     }
261 jfenwick 2199 return sh;
262     }
263 jfenwick 2084 break;
264     case TRACE:
265 jfenwick 2199 {
266     int rank=left->getRank();
267     if (rank<2)
268     {
269     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
270     }
271     if ((axis_offset>rank-2) || (axis_offset<0))
272     {
273     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
274     }
275     if (rank==2)
276     {
277     return DataTypes::scalarShape;
278     }
279     else if (rank==3)
280     {
281     DataTypes::ShapeType sh;
282     if (axis_offset==0)
283     {
284     sh.push_back(left->getShape()[2]);
285     }
286     else // offset==1
287     {
288     sh.push_back(left->getShape()[0]);
289     }
290     return sh;
291     }
292     else if (rank==4)
293     {
294     DataTypes::ShapeType sh;
295     const DataTypes::ShapeType& s=left->getShape();
296     if (axis_offset==0)
297     {
298     sh.push_back(s[2]);
299     sh.push_back(s[3]);
300     }
301     else if (axis_offset==1)
302     {
303     sh.push_back(s[0]);
304     sh.push_back(s[3]);
305     }
306     else // offset==2
307     {
308     sh.push_back(s[0]);
309     sh.push_back(s[1]);
310     }
311     return sh;
312     }
313     else // unknown rank
314     {
315     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
316     }
317     }
318 jfenwick 2084 break;
319     default:
320     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
321     }
322     }
323    
324 jfenwick 2496 DataTypes::ShapeType
325     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
326     {
327     // This code taken from the Data.cpp swapaxes() method
328     // Some of the checks are probably redundant here
329     int axis0_tmp,axis1_tmp;
330     const DataTypes::ShapeType& s=left->getShape();
331     DataTypes::ShapeType out_shape;
332     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
333     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
334     int rank=left->getRank();
335     if (rank<2) {
336     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
337     }
338     if (axis0<0 || axis0>rank-1) {
339 caltinay 2779 stringstream e;
340     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
341     throw DataException(e.str());
342 jfenwick 2496 }
343     if (axis1<0 || axis1>rank-1) {
344 caltinay 2782 stringstream e;
345 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
346     throw DataException(e.str());
347 jfenwick 2496 }
348     if (axis0 == axis1) {
349     throw DataException("Error - Data::swapaxes: axis indices must be different.");
350     }
351     if (axis0 > axis1) {
352     axis0_tmp=axis1;
353     axis1_tmp=axis0;
354     } else {
355     axis0_tmp=axis0;
356     axis1_tmp=axis1;
357     }
358     for (int i=0; i<rank; i++) {
359     if (i == axis0_tmp) {
360     out_shape.push_back(s[axis1_tmp]);
361     } else if (i == axis1_tmp) {
362     out_shape.push_back(s[axis0_tmp]);
363     } else {
364     out_shape.push_back(s[i]);
365     }
366     }
367     return out_shape;
368     }
369    
370    
371 jfenwick 2066 // determine the output shape for the general tensor product operation
372     // the additional parameters return information required later for the product
373     // the majority of this code is copy pasted from C_General_Tensor_Product
374     DataTypes::ShapeType
375     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
376 jfenwick 1865 {
377 jfenwick 2066
378     // Get rank and shape of inputs
379     int rank0 = left->getRank();
380     int rank1 = right->getRank();
381     const DataTypes::ShapeType& shape0 = left->getShape();
382     const DataTypes::ShapeType& shape1 = right->getShape();
383    
384     // Prepare for the loops of the product and verify compatibility of shapes
385     int start0=0, start1=0;
386     if (transpose == 0) {}
387     else if (transpose == 1) { start0 = axis_offset; }
388     else if (transpose == 2) { start1 = rank1-axis_offset; }
389     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
390    
391 jfenwick 2085 if (rank0<axis_offset)
392     {
393     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
394     }
395 jfenwick 2066
396     // Adjust the shapes for transpose
397     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
398     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
399     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
400     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
401    
402     // Prepare for the loops of the product
403     SL=1, SM=1, SR=1;
404     for (int i=0; i<rank0-axis_offset; i++) {
405     SL *= tmpShape0[i];
406     }
407     for (int i=rank0-axis_offset; i<rank0; i++) {
408     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
409     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
410     }
411     SM *= tmpShape0[i];
412     }
413     for (int i=axis_offset; i<rank1; i++) {
414     SR *= tmpShape1[i];
415     }
416    
417     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
418     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
419     { // block to limit the scope of out_index
420     int out_index=0;
421     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
422     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
423     }
424 jfenwick 2086
425     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
426     {
427     ostringstream os;
428     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
429     throw DataException(os.str());
430     }
431    
432 jfenwick 2066 return shape2;
433 jfenwick 1865 }
434    
435     } // end anonymous namespace
436    
437    
438 jfenwick 1899
439     // Return a string representing the operation
440 jfenwick 1865 const std::string&
441     opToString(ES_optype op)
442     {
443     if (op<0 || op>=ES_opcount)
444     {
445     op=UNKNOWNOP;
446     }
447     return ES_opstrings[op];
448     }
449    
450 jfenwick 2500 void DataLazy::LazyNodeSetup()
451     {
452     #ifdef _OPENMP
453     int numthreads=omp_get_max_threads();
454     m_samples.resize(numthreads*m_samplesize);
455     m_sampleids=new int[numthreads];
456     for (int i=0;i<numthreads;++i)
457     {
458     m_sampleids[i]=-1;
459     }
460     #else
461     m_samples.resize(m_samplesize);
462     m_sampleids=new int[1];
463     m_sampleids[0]=-1;
464     #endif // _OPENMP
465     }
466 jfenwick 1865
467 jfenwick 2500
468     // Creates an identity node
469 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
470 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
471 jfenwick 2500 ,m_sampleids(0),
472     m_samples(1)
473 jfenwick 1865 {
474 jfenwick 1879 if (p->isLazy())
475     {
476     // I don't want identity of Lazy.
477     // Question: Why would that be so bad?
478     // Answer: We assume that the child of ID is something we can call getVector on
479     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
480     }
481     else
482     {
483 jfenwick 2271 p->makeLazyShared();
484 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
485     makeIdentity(dr);
486 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
487 jfenwick 1879 }
488 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
489 jfenwick 1865 }
490    
491 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
492 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
493 jfenwick 2066 m_op(op),
494     m_axis_offset(0),
495     m_transpose(0),
496     m_SL(0), m_SM(0), m_SR(0)
497 jfenwick 1886 {
498 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
499 jfenwick 1886 {
500     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
501     }
502 jfenwick 2066
503 jfenwick 1886 DataLazy_ptr lleft;
504     if (!left->isLazy())
505     {
506     lleft=DataLazy_ptr(new DataLazy(left));
507     }
508     else
509     {
510     lleft=dynamic_pointer_cast<DataLazy>(left);
511     }
512 jfenwick 1889 m_readytype=lleft->m_readytype;
513 jfenwick 1886 m_left=lleft;
514     m_samplesize=getNumDPPSample()*getNoValues();
515 jfenwick 2177 m_children=m_left->m_children+1;
516     m_height=m_left->m_height+1;
517 jfenwick 2500 LazyNodeSetup();
518 jfenwick 2177 SIZELIMIT
519 jfenwick 1886 }
520    
521    
522 jfenwick 1943 // In this constructor we need to consider interpolation
523 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524     : parent(resultFS(left,right,op), resultShape(left,right,op)),
525 jfenwick 2066 m_op(op),
526     m_SL(0), m_SM(0), m_SR(0)
527 jfenwick 1879 {
528 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
530 jfenwick 1886 {
531 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532 jfenwick 1886 }
533 jfenwick 1943
534     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
535     {
536     FunctionSpace fs=getFunctionSpace();
537     Data ltemp(left);
538     Data tmp(ltemp,fs);
539     left=tmp.borrowDataPtr();
540     }
541 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
542 jfenwick 1943 {
543     Data tmp(Data(right),getFunctionSpace());
544     right=tmp.borrowDataPtr();
545 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546 jfenwick 1943 }
547     left->operandCheck(*right);
548    
549 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
550 jfenwick 1879 {
551     m_left=dynamic_pointer_cast<DataLazy>(left);
552 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553 jfenwick 1879 }
554     else
555     {
556     m_left=DataLazy_ptr(new DataLazy(left));
557 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558 jfenwick 1879 }
559     if (right->isLazy())
560     {
561     m_right=dynamic_pointer_cast<DataLazy>(right);
562 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563 jfenwick 1879 }
564     else
565     {
566     m_right=DataLazy_ptr(new DataLazy(right));
567 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568 jfenwick 1879 }
569 jfenwick 1889 char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;
571     if (lt=='E' || rt=='E')
572     {
573     m_readytype='E';
574     }
575     else if (lt=='T' || rt=='T')
576     {
577     m_readytype='T';
578     }
579     else
580     {
581     m_readytype='C';
582     }
583 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
584 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
585     m_height=max(m_left->m_height,m_right->m_height)+1;
586 jfenwick 2500 LazyNodeSetup();
587 jfenwick 2177 SIZELIMIT
588 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589 jfenwick 1879 }
590    
591 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
592     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
593     m_op(op),
594     m_axis_offset(axis_offset),
595     m_transpose(transpose)
596     {
597     if ((getOpgroup(op)!=G_TENSORPROD))
598     {
599     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
600     }
601     if ((transpose>2) || (transpose<0))
602     {
603     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
604     }
605     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
606     {
607     FunctionSpace fs=getFunctionSpace();
608     Data ltemp(left);
609     Data tmp(ltemp,fs);
610     left=tmp.borrowDataPtr();
611     }
612     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
613     {
614     Data tmp(Data(right),getFunctionSpace());
615     right=tmp.borrowDataPtr();
616     }
617 jfenwick 2195 // left->operandCheck(*right);
618 jfenwick 1879
619 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
620     {
621     m_left=dynamic_pointer_cast<DataLazy>(left);
622     }
623     else
624     {
625     m_left=DataLazy_ptr(new DataLazy(left));
626     }
627     if (right->isLazy())
628     {
629     m_right=dynamic_pointer_cast<DataLazy>(right);
630     }
631     else
632     {
633     m_right=DataLazy_ptr(new DataLazy(right));
634     }
635     char lt=m_left->m_readytype;
636     char rt=m_right->m_readytype;
637     if (lt=='E' || rt=='E')
638     {
639     m_readytype='E';
640     }
641     else if (lt=='T' || rt=='T')
642     {
643     m_readytype='T';
644     }
645     else
646     {
647     m_readytype='C';
648     }
649     m_samplesize=getNumDPPSample()*getNoValues();
650 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
651     m_height=max(m_left->m_height,m_right->m_height)+1;
652 jfenwick 2500 LazyNodeSetup();
653 jfenwick 2177 SIZELIMIT
654 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
655 jfenwick 2066 }
656    
657    
658 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
659 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
660 jfenwick 2084 m_op(op),
661     m_axis_offset(axis_offset),
662 jfenwick 2147 m_transpose(0),
663     m_tol(0)
664 jfenwick 2084 {
665     if ((getOpgroup(op)!=G_NP1OUT_P))
666     {
667     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
668     }
669     DataLazy_ptr lleft;
670     if (!left->isLazy())
671     {
672     lleft=DataLazy_ptr(new DataLazy(left));
673     }
674     else
675     {
676     lleft=dynamic_pointer_cast<DataLazy>(left);
677     }
678     m_readytype=lleft->m_readytype;
679     m_left=lleft;
680     m_samplesize=getNumDPPSample()*getNoValues();
681 jfenwick 2177 m_children=m_left->m_children+1;
682     m_height=m_left->m_height+1;
683 jfenwick 2500 LazyNodeSetup();
684 jfenwick 2177 SIZELIMIT
685 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
686 jfenwick 2084 }
687    
688 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
689     : parent(left->getFunctionSpace(), left->getShape()),
690     m_op(op),
691     m_axis_offset(0),
692     m_transpose(0),
693     m_tol(tol)
694     {
695     if ((getOpgroup(op)!=G_UNARY_P))
696     {
697     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
698     }
699     DataLazy_ptr lleft;
700     if (!left->isLazy())
701     {
702     lleft=DataLazy_ptr(new DataLazy(left));
703     }
704     else
705     {
706     lleft=dynamic_pointer_cast<DataLazy>(left);
707     }
708     m_readytype=lleft->m_readytype;
709     m_left=lleft;
710     m_samplesize=getNumDPPSample()*getNoValues();
711 jfenwick 2177 m_children=m_left->m_children+1;
712     m_height=m_left->m_height+1;
713 jfenwick 2500 LazyNodeSetup();
714 jfenwick 2177 SIZELIMIT
715 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
716     }
717 jfenwick 2084
718 jfenwick 2496
719     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
720     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
721     m_op(op),
722     m_axis_offset(axis0),
723     m_transpose(axis1),
724     m_tol(0)
725     {
726     if ((getOpgroup(op)!=G_NP1OUT_2P))
727     {
728     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
729     }
730     DataLazy_ptr lleft;
731     if (!left->isLazy())
732     {
733     lleft=DataLazy_ptr(new DataLazy(left));
734     }
735     else
736     {
737     lleft=dynamic_pointer_cast<DataLazy>(left);
738     }
739     m_readytype=lleft->m_readytype;
740     m_left=lleft;
741     m_samplesize=getNumDPPSample()*getNoValues();
742     m_children=m_left->m_children+1;
743     m_height=m_left->m_height+1;
744 jfenwick 2500 LazyNodeSetup();
745 jfenwick 2496 SIZELIMIT
746     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
747     }
748    
749 jfenwick 3035
750     namespace
751     {
752    
753     inline int max3(int a, int b, int c)
754     {
755     int t=(a>b?a:b);
756     return (t>c?t:c);
757    
758     }
759     }
760    
761     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
762     : parent(left->getFunctionSpace(), left->getShape()),
763     m_op(CONDEVAL),
764     m_axis_offset(0),
765     m_transpose(0),
766     m_tol(0)
767     {
768    
769     DataLazy_ptr lmask;
770     DataLazy_ptr lleft;
771     DataLazy_ptr lright;
772     if (!mask->isLazy())
773     {
774     lmask=DataLazy_ptr(new DataLazy(mask));
775     }
776     else
777     {
778     lmask=dynamic_pointer_cast<DataLazy>(mask);
779     }
780     if (!left->isLazy())
781     {
782     lleft=DataLazy_ptr(new DataLazy(left));
783     }
784     else
785     {
786     lleft=dynamic_pointer_cast<DataLazy>(left);
787     }
788     if (!right->isLazy())
789     {
790     lright=DataLazy_ptr(new DataLazy(right));
791     }
792     else
793     {
794     lright=dynamic_pointer_cast<DataLazy>(right);
795     }
796     m_readytype=lmask->m_readytype;
797     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
798     {
799     throw DataException("Programmer Error - condEval arguments must have the same readytype");
800     }
801     m_left=lleft;
802     m_right=lright;
803     m_mask=lmask;
804     m_samplesize=getNumDPPSample()*getNoValues();
805     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
806     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
807     LazyNodeSetup();
808     SIZELIMIT
809     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
810     }
811    
812    
813    
814 jfenwick 1865 DataLazy::~DataLazy()
815     {
816 jfenwick 2500 delete[] m_sampleids;
817 jfenwick 1865 }
818    
819 jfenwick 1879
820 jfenwick 1899 /*
821     \brief Evaluates the expression using methods on Data.
822     This does the work for the collapse method.
823     For reasons of efficiency do not call this method on DataExpanded nodes.
824     */
825 jfenwick 1889 DataReady_ptr
826 jfenwick 4621 DataLazy::collapseToReady() const
827 jfenwick 1889 {
828     if (m_readytype=='E')
829     { // this is more an efficiency concern than anything else
830     throw DataException("Programmer Error - do not use collapse on Expanded data.");
831     }
832     if (m_op==IDENTITY)
833     {
834     return m_id;
835     }
836     DataReady_ptr pleft=m_left->collapseToReady();
837     Data left(pleft);
838     Data right;
839 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
840 jfenwick 1889 {
841     right=Data(m_right->collapseToReady());
842     }
843     Data result;
844     switch(m_op)
845     {
846     case ADD:
847     result=left+right;
848     break;
849     case SUB:
850     result=left-right;
851     break;
852     case MUL:
853     result=left*right;
854     break;
855     case DIV:
856     result=left/right;
857     break;
858     case SIN:
859     result=left.sin();
860     break;
861     case COS:
862     result=left.cos();
863     break;
864     case TAN:
865     result=left.tan();
866     break;
867     case ASIN:
868     result=left.asin();
869     break;
870     case ACOS:
871     result=left.acos();
872     break;
873     case ATAN:
874     result=left.atan();
875     break;
876     case SINH:
877     result=left.sinh();
878     break;
879     case COSH:
880     result=left.cosh();
881     break;
882     case TANH:
883     result=left.tanh();
884     break;
885     case ERF:
886     result=left.erf();
887     break;
888     case ASINH:
889     result=left.asinh();
890     break;
891     case ACOSH:
892     result=left.acosh();
893     break;
894     case ATANH:
895     result=left.atanh();
896     break;
897     case LOG10:
898     result=left.log10();
899     break;
900     case LOG:
901     result=left.log();
902     break;
903     case SIGN:
904     result=left.sign();
905     break;
906     case ABS:
907     result=left.abs();
908     break;
909     case NEG:
910     result=left.neg();
911     break;
912     case POS:
913     // it doesn't mean anything for delayed.
914     // it will just trigger a deep copy of the lazy object
915     throw DataException("Programmer error - POS not supported for lazy data.");
916     break;
917     case EXP:
918     result=left.exp();
919     break;
920     case SQRT:
921     result=left.sqrt();
922     break;
923     case RECIP:
924     result=left.oneOver();
925     break;
926     case GZ:
927     result=left.wherePositive();
928     break;
929     case LZ:
930     result=left.whereNegative();
931     break;
932     case GEZ:
933     result=left.whereNonNegative();
934     break;
935     case LEZ:
936     result=left.whereNonPositive();
937     break;
938 jfenwick 2147 case NEZ:
939     result=left.whereNonZero(m_tol);
940     break;
941     case EZ:
942     result=left.whereZero(m_tol);
943     break;
944 jfenwick 2037 case SYM:
945     result=left.symmetric();
946     break;
947     case NSYM:
948     result=left.nonsymmetric();
949     break;
950 jfenwick 2066 case PROD:
951     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
952     break;
953 jfenwick 2084 case TRANS:
954     result=left.transpose(m_axis_offset);
955     break;
956     case TRACE:
957     result=left.trace(m_axis_offset);
958     break;
959 jfenwick 2496 case SWAP:
960     result=left.swapaxes(m_axis_offset, m_transpose);
961     break;
962 jfenwick 2721 case MINVAL:
963     result=left.minval();
964     break;
965     case MAXVAL:
966     result=left.minval();
967     break;
968 jfenwick 1889 default:
969 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970 jfenwick 1889 }
971     return result.borrowReadyPtr();
972     }
973    
974 jfenwick 1899 /*
975     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
976     This method uses the original methods on the Data class to evaluate the expressions.
977     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
978     the purpose of using DataLazy in the first place).
979     */
980 jfenwick 1889 void
981 jfenwick 4621 DataLazy::collapse() const
982 jfenwick 1889 {
983     if (m_op==IDENTITY)
984     {
985     return;
986     }
987     if (m_readytype=='E')
988     { // this is more an efficiency concern than anything else
989     throw DataException("Programmer Error - do not use collapse on Expanded data.");
990     }
991     m_id=collapseToReady();
992     m_op=IDENTITY;
993     }
994    
995 jfenwick 1899
996 jfenwick 1889
997    
998    
999 jfenwick 2147
1000 phornby 1987 #define PROC_OP(TYPE,X) \
1001 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1002     {\
1003     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1004     { \
1005     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1006 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1007 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1008 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1009 jfenwick 2152 lroffset+=leftstep; \
1010     rroffset+=rightstep; \
1011     }\
1012     lroffset+=oleftstep;\
1013     rroffset+=orightstep;\
1014 jfenwick 1889 }
1015    
1016 jfenwick 1899
1017 jfenwick 2500 // The result will be stored in m_samples
1018     // The return value is a pointer to the DataVector, offset is the offset within the return value
1019     const DataTypes::ValueType*
1020 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1021 jfenwick 2500 {
1022     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1023     // collapse so we have a 'E' node or an IDENTITY for some other type
1024     if (m_readytype!='E' && m_op!=IDENTITY)
1025     {
1026     collapse();
1027     }
1028     if (m_op==IDENTITY)
1029     {
1030     const ValueType& vec=m_id->getVectorRO();
1031     roffset=m_id->getPointOffset(sampleNo, 0);
1032 jfenwick 2777 #ifdef LAZY_STACK_PROF
1033     int x;
1034     if (&x<stackend[omp_get_thread_num()])
1035     {
1036     stackend[omp_get_thread_num()]=&x;
1037     }
1038     #endif
1039 jfenwick 2500 return &(vec);
1040     }
1041     if (m_readytype!='E')
1042     {
1043     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1044     }
1045 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1046     {
1047     roffset=tid*m_samplesize;
1048     return &(m_samples); // sample is already resolved
1049     }
1050     m_sampleids[tid]=sampleNo;
1051 jfenwick 3035
1052 jfenwick 2500 switch (getOpgroup(m_op))
1053     {
1054     case G_UNARY:
1055     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1056     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1057     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1058     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1059     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1060     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1061 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1062 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1063 jfenwick 2500 default:
1064     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1065     }
1066     }
1067    
1068     const DataTypes::ValueType*
1069 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1070 jfenwick 2500 {
1071     // we assume that any collapsing has been done before we get here
1072     // since we only have one argument we don't need to think about only
1073     // processing single points.
1074     // we will also know we won't get identity nodes
1075     if (m_readytype!='E')
1076     {
1077     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1078     }
1079     if (m_op==IDENTITY)
1080     {
1081     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1082     }
1083     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1084     const double* left=&((*leftres)[roffset]);
1085     roffset=m_samplesize*tid;
1086     double* result=&(m_samples[roffset]);
1087     switch (m_op)
1088     {
1089     case SIN:
1090     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1091     break;
1092     case COS:
1093     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1094     break;
1095     case TAN:
1096     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1097     break;
1098     case ASIN:
1099     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1100     break;
1101     case ACOS:
1102     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1103     break;
1104     case ATAN:
1105     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1106     break;
1107     case SINH:
1108     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1109     break;
1110     case COSH:
1111     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1112     break;
1113     case TANH:
1114     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1115     break;
1116     case ERF:
1117     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1118     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1119     #else
1120     tensor_unary_operation(m_samplesize, left, result, ::erf);
1121     break;
1122     #endif
1123     case ASINH:
1124     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1125     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1126     #else
1127     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1128     #endif
1129     break;
1130     case ACOSH:
1131     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1132     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1133     #else
1134     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1135     #endif
1136     break;
1137     case ATANH:
1138     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1139     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1140     #else
1141     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1142     #endif
1143     break;
1144     case LOG10:
1145     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1146     break;
1147     case LOG:
1148     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1149     break;
1150     case SIGN:
1151     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1152     break;
1153     case ABS:
1154     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1155     break;
1156     case NEG:
1157     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1158     break;
1159     case POS:
1160     // it doesn't mean anything for delayed.
1161     // it will just trigger a deep copy of the lazy object
1162     throw DataException("Programmer error - POS not supported for lazy data.");
1163     break;
1164     case EXP:
1165     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1166     break;
1167     case SQRT:
1168     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1169     break;
1170     case RECIP:
1171     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1172     break;
1173     case GZ:
1174     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1175     break;
1176     case LZ:
1177     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1178     break;
1179     case GEZ:
1180     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1181     break;
1182     case LEZ:
1183     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1184     break;
1185     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1186     case NEZ:
1187     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1188     break;
1189     case EZ:
1190     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1191     break;
1192    
1193     default:
1194     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1195     }
1196     return &(m_samples);
1197     }
1198    
1199    
1200     const DataTypes::ValueType*
1201 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1202 jfenwick 2721 {
1203     // we assume that any collapsing has been done before we get here
1204     // since we only have one argument we don't need to think about only
1205     // processing single points.
1206     // we will also know we won't get identity nodes
1207     if (m_readytype!='E')
1208     {
1209     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1210     }
1211     if (m_op==IDENTITY)
1212     {
1213     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1214     }
1215     size_t loffset=0;
1216     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1217    
1218     roffset=m_samplesize*tid;
1219 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1220 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1221 jfenwick 2721 double* result=&(m_samples[roffset]);
1222     switch (m_op)
1223     {
1224     case MINVAL:
1225     {
1226 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1227     {
1228     FMin op;
1229     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1230     loffset+=psize;
1231     result++;
1232     }
1233 jfenwick 2721 }
1234     break;
1235     case MAXVAL:
1236     {
1237 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1238     {
1239 jfenwick 2721 FMax op;
1240     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1241 jfenwick 2734 loffset+=psize;
1242     result++;
1243     }
1244 jfenwick 2721 }
1245     break;
1246     default:
1247     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1248     }
1249     return &(m_samples);
1250     }
1251    
1252     const DataTypes::ValueType*
1253 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1254 jfenwick 2500 {
1255     // we assume that any collapsing has been done before we get here
1256     // since we only have one argument we don't need to think about only
1257     // processing single points.
1258     if (m_readytype!='E')
1259     {
1260     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1261     }
1262     if (m_op==IDENTITY)
1263     {
1264     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1265     }
1266     size_t subroffset;
1267     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1268     roffset=m_samplesize*tid;
1269     size_t loop=0;
1270     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1271     size_t step=getNoValues();
1272     size_t offset=roffset;
1273     switch (m_op)
1274     {
1275     case SYM:
1276     for (loop=0;loop<numsteps;++loop)
1277     {
1278     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1279     subroffset+=step;
1280     offset+=step;
1281     }
1282     break;
1283     case NSYM:
1284     for (loop=0;loop<numsteps;++loop)
1285     {
1286     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1287     subroffset+=step;
1288     offset+=step;
1289     }
1290     break;
1291     default:
1292     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1293     }
1294     return &m_samples;
1295     }
1296    
1297     const DataTypes::ValueType*
1298 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1299 jfenwick 2500 {
1300     // we assume that any collapsing has been done before we get here
1301     // since we only have one argument we don't need to think about only
1302     // processing single points.
1303     if (m_readytype!='E')
1304     {
1305     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1306     }
1307     if (m_op==IDENTITY)
1308     {
1309     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1310     }
1311     size_t subroffset;
1312     size_t offset;
1313     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1314     roffset=m_samplesize*tid;
1315     offset=roffset;
1316     size_t loop=0;
1317     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1318     size_t outstep=getNoValues();
1319     size_t instep=m_left->getNoValues();
1320     switch (m_op)
1321     {
1322     case TRACE:
1323     for (loop=0;loop<numsteps;++loop)
1324     {
1325     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1326     subroffset+=instep;
1327     offset+=outstep;
1328     }
1329     break;
1330     case TRANS:
1331     for (loop=0;loop<numsteps;++loop)
1332     {
1333     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1334     subroffset+=instep;
1335     offset+=outstep;
1336     }
1337     break;
1338     default:
1339     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1340     }
1341     return &m_samples;
1342     }
1343    
1344    
1345     const DataTypes::ValueType*
1346 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1347 jfenwick 2500 {
1348     if (m_readytype!='E')
1349     {
1350     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1351     }
1352     if (m_op==IDENTITY)
1353     {
1354     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1355     }
1356     size_t subroffset;
1357     size_t offset;
1358     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1359     roffset=m_samplesize*tid;
1360     offset=roffset;
1361     size_t loop=0;
1362     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1363     size_t outstep=getNoValues();
1364     size_t instep=m_left->getNoValues();
1365     switch (m_op)
1366     {
1367     case SWAP:
1368     for (loop=0;loop<numsteps;++loop)
1369     {
1370     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1371     subroffset+=instep;
1372     offset+=outstep;
1373     }
1374     break;
1375     default:
1376     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1377     }
1378     return &m_samples;
1379     }
1380    
1381 jfenwick 3035 const DataTypes::ValueType*
1382 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1383 jfenwick 3035 {
1384     if (m_readytype!='E')
1385     {
1386     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1387     }
1388     if (m_op!=CONDEVAL)
1389     {
1390     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1391     }
1392     size_t subroffset;
1393 jfenwick 2500
1394 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1395     const ValueType* srcres=0;
1396     if ((*maskres)[subroffset]>0)
1397     {
1398     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1399     }
1400     else
1401     {
1402     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1403     }
1404    
1405     // Now we need to copy the result
1406    
1407     roffset=m_samplesize*tid;
1408     for (int i=0;i<m_samplesize;++i)
1409     {
1410     m_samples[roffset+i]=(*srcres)[subroffset+i];
1411     }
1412    
1413     return &m_samples;
1414     }
1415    
1416 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418     // If both children are expanded, then we can process them in a single operation (we treat
1419     // the whole sample as one big datapoint.
1420     // If one of the children is not expanded, then we need to treat each point in the sample
1421     // individually.
1422     // There is an additional complication when scalar operations are considered.
1423     // For example, 2+Vector.
1424     // In this case each double within the point is treated individually
1425     const DataTypes::ValueType*
1426 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1427 jfenwick 2500 {
1428     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
1430     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1431     // first work out which of the children are expanded
1432     bool leftExp=(m_left->m_readytype=='E');
1433     bool rightExp=(m_right->m_readytype=='E');
1434     if (!leftExp && !rightExp)
1435     {
1436     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1437     }
1438     bool leftScalar=(m_left->getRank()==0);
1439     bool rightScalar=(m_right->getRank()==0);
1440     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1441     {
1442     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1443     }
1444     size_t leftsize=m_left->getNoValues();
1445     size_t rightsize=m_right->getNoValues();
1446     size_t chunksize=1; // how many doubles will be processed in one go
1447     int leftstep=0; // how far should the left offset advance after each step
1448     int rightstep=0;
1449     int numsteps=0; // total number of steps for the inner loop
1450     int oleftstep=0; // the o variables refer to the outer loop
1451     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1452     int onumsteps=1;
1453    
1454     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1455     bool RES=(rightExp && rightScalar);
1456     bool LS=(!leftExp && leftScalar); // left is a single scalar
1457     bool RS=(!rightExp && rightScalar);
1458     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1459     bool RN=(!rightExp && !rightScalar);
1460     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1461     bool REN=(rightExp && !rightScalar);
1462    
1463     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1464     {
1465     chunksize=m_left->getNumDPPSample()*leftsize;
1466     leftstep=0;
1467     rightstep=0;
1468     numsteps=1;
1469     }
1470     else if (LES || RES)
1471     {
1472     chunksize=1;
1473     if (LES) // left is an expanded scalar
1474     {
1475     if (RS)
1476     {
1477     leftstep=1;
1478     rightstep=0;
1479     numsteps=m_left->getNumDPPSample();
1480     }
1481     else // RN or REN
1482     {
1483     leftstep=0;
1484     oleftstep=1;
1485     rightstep=1;
1486     orightstep=(RN ? -(int)rightsize : 0);
1487     numsteps=rightsize;
1488     onumsteps=m_left->getNumDPPSample();
1489     }
1490     }
1491     else // right is an expanded scalar
1492     {
1493     if (LS)
1494     {
1495     rightstep=1;
1496     leftstep=0;
1497     numsteps=m_right->getNumDPPSample();
1498     }
1499     else
1500     {
1501     rightstep=0;
1502     orightstep=1;
1503     leftstep=1;
1504     oleftstep=(LN ? -(int)leftsize : 0);
1505     numsteps=leftsize;
1506     onumsteps=m_right->getNumDPPSample();
1507     }
1508     }
1509     }
1510     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1511     {
1512     if (LEN) // and Right will be a single value
1513     {
1514     chunksize=rightsize;
1515     leftstep=rightsize;
1516     rightstep=0;
1517     numsteps=m_left->getNumDPPSample();
1518     if (RS)
1519     {
1520     numsteps*=leftsize;
1521     }
1522     }
1523     else // REN
1524     {
1525     chunksize=leftsize;
1526     rightstep=leftsize;
1527     leftstep=0;
1528     numsteps=m_right->getNumDPPSample();
1529     if (LS)
1530     {
1531     numsteps*=rightsize;
1532     }
1533     }
1534     }
1535    
1536     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1537     // Get the values of sub-expressions
1538     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1539     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1540     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1543     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1544     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1545     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1547    
1548     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    
1552     roffset=m_samplesize*tid;
1553 caltinay 4286 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1554 jfenwick 2500 switch(m_op)
1555     {
1556     case ADD:
1557     PROC_OP(NO_ARG,plus<double>());
1558     break;
1559     case SUB:
1560     PROC_OP(NO_ARG,minus<double>());
1561     break;
1562     case MUL:
1563     PROC_OP(NO_ARG,multiplies<double>());
1564     break;
1565     case DIV:
1566     PROC_OP(NO_ARG,divides<double>());
1567     break;
1568     case POW:
1569     PROC_OP(double (double,double),::pow);
1570     break;
1571     default:
1572     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573     }
1574     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575     return &m_samples;
1576     }
1577    
1578    
1579     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581     // unlike the other resolve helpers, we must treat these datapoints separately.
1582     const DataTypes::ValueType*
1583 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1584 jfenwick 2500 {
1585     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
1587     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1588     // first work out which of the children are expanded
1589     bool leftExp=(m_left->m_readytype=='E');
1590     bool rightExp=(m_right->m_readytype=='E');
1591     int steps=getNumDPPSample();
1592     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1593     int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    
1595     int resultStep=getNoValues();
1596     roffset=m_samplesize*tid;
1597     size_t offset=roffset;
1598    
1599     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1602    
1603     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1604     cout << getNoValues() << endl;)
1605    
1606    
1607     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1610     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1611     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1612     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615 caltinay 4286 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1616 jfenwick 2500 switch(m_op)
1617     {
1618     case PROD:
1619     for (int i=0;i<steps;++i,resultp+=resultStep)
1620     {
1621     const double *ptr_0 = &((*left)[lroffset]);
1622     const double *ptr_1 = &((*right)[rroffset]);
1623    
1624     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629     lroffset+=leftStep;
1630     rroffset+=rightStep;
1631     }
1632     break;
1633     default:
1634     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635     }
1636     roffset=offset;
1637     return &m_samples;
1638     }
1639    
1640 jfenwick 1898
1641     const DataTypes::ValueType*
1642 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1643 jfenwick 1879 {
1644 jfenwick 2271 #ifdef _OPENMP
1645     int tid=omp_get_thread_num();
1646     #else
1647     int tid=0;
1648     #endif
1649 jfenwick 2777
1650     #ifdef LAZY_STACK_PROF
1651     stackstart[tid]=&tid;
1652     stackend[tid]=&tid;
1653     const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655     #pragma omp critical
1656     if (d>maxstackuse)
1657     {
1658     cout << "Max resolve Stack use " << d << endl;
1659     maxstackuse=d;
1660     }
1661     return r;
1662     #else
1663 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1664 jfenwick 2777 #endif
1665 jfenwick 2271 }
1666    
1667    
1668 jfenwick 2497 // This needs to do the work of the identity constructor
1669 jfenwick 2177 void
1670     DataLazy::resolveToIdentity()
1671     {
1672     if (m_op==IDENTITY)
1673     return;
1674 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1675 jfenwick 2177 makeIdentity(p);
1676     }
1677 jfenwick 1889
1678 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1679     {
1680     m_op=IDENTITY;
1681     m_axis_offset=0;
1682     m_transpose=0;
1683     m_SL=m_SM=m_SR=0;
1684     m_children=m_height=0;
1685     m_id=p;
1686     if(p->isConstant()) {m_readytype='C';}
1687     else if(p->isExpanded()) {m_readytype='E';}
1688     else if (p->isTagged()) {m_readytype='T';}
1689     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1690     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1691     m_left.reset();
1692     m_right.reset();
1693     }
1694    
1695 jfenwick 2497
1696     DataReady_ptr
1697     DataLazy::resolve()
1698     {
1699     resolveToIdentity();
1700     return m_id;
1701     }
1702    
1703 jfenwick 2799
1704     /* This is really a static method but I think that caused problems in windows */
1705     void
1706     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707     {
1708     if (dats.empty())
1709     {
1710     return;
1711     }
1712     vector<DataLazy*> work;
1713     FunctionSpace fs=dats[0]->getFunctionSpace();
1714     bool match=true;
1715 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1716 jfenwick 2799 {
1717     if (dats[i]->m_readytype!='E')
1718     {
1719     dats[i]->collapse();
1720     }
1721     if (dats[i]->m_op!=IDENTITY)
1722     {
1723     work.push_back(dats[i]);
1724     if (fs!=dats[i]->getFunctionSpace())
1725     {
1726     match=false;
1727     }
1728     }
1729     }
1730     if (work.empty())
1731     {
1732     return; // no work to do
1733     }
1734     if (match) // all functionspaces match. Yes I realise this is overly strict
1735     { // it is possible that dats[0] is one of the objects which we discarded and
1736     // all the other functionspaces match.
1737     vector<DataExpanded*> dep;
1738     vector<ValueType*> vecs;
1739     for (int i=0;i<work.size();++i)
1740     {
1741     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742     vecs.push_back(&(dep[i]->getVectorRW()));
1743     }
1744     int totalsamples=work[0]->getNumSamples();
1745     const ValueType* res=0; // Storage for answer
1746     int sample;
1747     #pragma omp parallel private(sample, res)
1748     {
1749     size_t roffset=0;
1750     #pragma omp for schedule(static)
1751     for (sample=0;sample<totalsamples;++sample)
1752     {
1753     roffset=0;
1754     int j;
1755 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1756 jfenwick 2799 {
1757     #ifdef _OPENMP
1758     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759     #else
1760     res=work[j]->resolveNodeSample(0,sample,roffset);
1761     #endif
1762     DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764     }
1765     }
1766     }
1767     // Now we need to load the new results as identity ops into the lazy nodes
1768 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1769 jfenwick 2799 {
1770     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771     }
1772     }
1773     else // functionspaces do not match
1774     {
1775     for (int i=0;i<work.size();++i)
1776     {
1777     work[i]->resolveToIdentity();
1778     }
1779     }
1780     }
1781    
1782    
1783    
1784 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1785     DataReady_ptr
1786     DataLazy::resolveNodeWorker()
1787     {
1788     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789     {
1790     collapse();
1791     }
1792     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1793     {
1794     return m_id;
1795     }
1796     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1797     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1798     ValueType& resvec=result->getVectorRW();
1799     DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801     int sample;
1802     int totalsamples=getNumSamples();
1803     const ValueType* res=0; // Storage for answer
1804     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805 jfenwick 2777 #pragma omp parallel private(sample,res)
1806 jfenwick 2500 {
1807 jfenwick 2777 size_t roffset=0;
1808     #ifdef LAZY_STACK_PROF
1809     stackstart[omp_get_thread_num()]=&roffset;
1810     stackend[omp_get_thread_num()]=&roffset;
1811     #endif
1812     #pragma omp for schedule(static)
1813     for (sample=0;sample<totalsamples;++sample)
1814     {
1815     roffset=0;
1816 jfenwick 2500 #ifdef _OPENMP
1817 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818 jfenwick 2500 #else
1819 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1820 jfenwick 2500 #endif
1821     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823 jfenwick 2777 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825     }
1826 jfenwick 2500 }
1827 jfenwick 2777 #ifdef LAZY_STACK_PROF
1828     for (int i=0;i<getNumberOfThreads();++i)
1829     {
1830     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1832     if (r>maxstackuse)
1833     {
1834     maxstackuse=r;
1835     }
1836     }
1837     cout << "Max resolve Stack use=" << maxstackuse << endl;
1838     #endif
1839 jfenwick 2500 return resptr;
1840     }
1841    
1842 jfenwick 1865 std::string
1843     DataLazy::toString() const
1844     {
1845 jfenwick 1886 ostringstream oss;
1846 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1847 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1848 jfenwick 2737 {
1849 jfenwick 2795 case 1: // tree format
1850 jfenwick 2737 oss << endl;
1851 jfenwick 2795 intoTreeString(oss,"");
1852     break;
1853     case 2: // just the depth
1854     break;
1855     default:
1856     intoString(oss);
1857     break;
1858 jfenwick 2737 }
1859 jfenwick 1886 return oss.str();
1860 jfenwick 1865 }
1861    
1862 jfenwick 1899
1863 jfenwick 1886 void
1864     DataLazy::intoString(ostringstream& oss) const
1865     {
1866 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1867 jfenwick 1886 switch (getOpgroup(m_op))
1868     {
1869 jfenwick 1889 case G_IDENTITY:
1870     if (m_id->isExpanded())
1871     {
1872     oss << "E";
1873     }
1874     else if (m_id->isTagged())
1875     {
1876     oss << "T";
1877     }
1878     else if (m_id->isConstant())
1879     {
1880     oss << "C";
1881     }
1882     else
1883     {
1884     oss << "?";
1885     }
1886 jfenwick 1886 oss << '@' << m_id.get();
1887     break;
1888     case G_BINARY:
1889     oss << '(';
1890     m_left->intoString(oss);
1891     oss << ' ' << opToString(m_op) << ' ';
1892     m_right->intoString(oss);
1893     oss << ')';
1894     break;
1895     case G_UNARY:
1896 jfenwick 2147 case G_UNARY_P:
1897 jfenwick 2037 case G_NP1OUT:
1898 jfenwick 2084 case G_NP1OUT_P:
1899 jfenwick 2721 case G_REDUCTION:
1900 jfenwick 1886 oss << opToString(m_op) << '(';
1901     m_left->intoString(oss);
1902     oss << ')';
1903     break;
1904 jfenwick 2066 case G_TENSORPROD:
1905     oss << opToString(m_op) << '(';
1906     m_left->intoString(oss);
1907     oss << ", ";
1908     m_right->intoString(oss);
1909     oss << ')';
1910     break;
1911 jfenwick 2496 case G_NP1OUT_2P:
1912     oss << opToString(m_op) << '(';
1913     m_left->intoString(oss);
1914     oss << ", " << m_axis_offset << ", " << m_transpose;
1915     oss << ')';
1916     break;
1917 jfenwick 3035 case G_CONDEVAL:
1918     oss << opToString(m_op)<< '(' ;
1919     m_mask->intoString(oss);
1920     oss << " ? ";
1921     m_left->intoString(oss);
1922     oss << " : ";
1923     m_right->intoString(oss);
1924     oss << ')';
1925     break;
1926 jfenwick 1886 default:
1927     oss << "UNKNOWN";
1928     }
1929     }
1930    
1931 jfenwick 2737
1932     void
1933     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934     {
1935     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936     switch (getOpgroup(m_op))
1937     {
1938     case G_IDENTITY:
1939     if (m_id->isExpanded())
1940     {
1941     oss << "E";
1942     }
1943     else if (m_id->isTagged())
1944     {
1945     oss << "T";
1946     }
1947     else if (m_id->isConstant())
1948     {
1949     oss << "C";
1950     }
1951     else
1952     {
1953     oss << "?";
1954     }
1955     oss << '@' << m_id.get() << endl;
1956     break;
1957     case G_BINARY:
1958     oss << opToString(m_op) << endl;
1959     indent+='.';
1960     m_left->intoTreeString(oss, indent);
1961     m_right->intoTreeString(oss, indent);
1962     break;
1963     case G_UNARY:
1964     case G_UNARY_P:
1965     case G_NP1OUT:
1966     case G_NP1OUT_P:
1967     case G_REDUCTION:
1968     oss << opToString(m_op) << endl;
1969     indent+='.';
1970     m_left->intoTreeString(oss, indent);
1971     break;
1972     case G_TENSORPROD:
1973     oss << opToString(m_op) << endl;
1974     indent+='.';
1975     m_left->intoTreeString(oss, indent);
1976     m_right->intoTreeString(oss, indent);
1977     break;
1978     case G_NP1OUT_2P:
1979     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980     indent+='.';
1981     m_left->intoTreeString(oss, indent);
1982     break;
1983     default:
1984     oss << "UNKNOWN";
1985     }
1986     }
1987    
1988    
1989 jfenwick 1865 DataAbstract*
1990     DataLazy::deepCopy()
1991     {
1992 jfenwick 1935 switch (getOpgroup(m_op))
1993 jfenwick 1865 {
1994 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1995 jfenwick 2721 case G_UNARY:
1996     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2002     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003 jfenwick 1935 default:
2004     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005 jfenwick 1865 }
2006     }
2007    
2008    
2009 jfenwick 2721
2010 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2011     // Instances of DataReady can look at the size of their vectors.
2012     // For lazy though, it could be the size the data would be if it were resolved;
2013     // or it could be some function of the lengths of the DataReady instances which
2014     // form part of the expression.
2015     // Rather than have people making assumptions, I have disabled the method.
2016 jfenwick 1865 DataTypes::ValueType::size_type
2017     DataLazy::getLength() const
2018     {
2019 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2020 jfenwick 1865 }
2021    
2022    
2023     DataAbstract*
2024     DataLazy::getSlice(const DataTypes::RegionType& region) const
2025     {
2026 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2027 jfenwick 1865 }
2028    
2029 jfenwick 1935
2030     // To do this we need to rely on our child nodes
2031 jfenwick 1865 DataTypes::ValueType::size_type
2032     DataLazy::getPointOffset(int sampleNo,
2033 jfenwick 1935 int dataPointNo)
2034     {
2035     if (m_op==IDENTITY)
2036     {
2037     return m_id->getPointOffset(sampleNo,dataPointNo);
2038     }
2039     if (m_readytype!='E')
2040     {
2041     collapse();
2042     return m_id->getPointOffset(sampleNo,dataPointNo);
2043     }
2044     // at this point we do not have an identity node and the expression will be Expanded
2045     // so we only need to know which child to ask
2046     if (m_left->m_readytype=='E')
2047     {
2048     return m_left->getPointOffset(sampleNo,dataPointNo);
2049     }
2050     else
2051     {
2052     return m_right->getPointOffset(sampleNo,dataPointNo);
2053     }
2054     }
2055    
2056     // To do this we need to rely on our child nodes
2057     DataTypes::ValueType::size_type
2058     DataLazy::getPointOffset(int sampleNo,
2059 jfenwick 1865 int dataPointNo) const
2060     {
2061 jfenwick 1935 if (m_op==IDENTITY)
2062     {
2063     return m_id->getPointOffset(sampleNo,dataPointNo);
2064     }
2065     if (m_readytype=='E')
2066     {
2067     // at this point we do not have an identity node and the expression will be Expanded
2068     // so we only need to know which child to ask
2069     if (m_left->m_readytype=='E')
2070     {
2071     return m_left->getPointOffset(sampleNo,dataPointNo);
2072     }
2073     else
2074     {
2075     return m_right->getPointOffset(sampleNo,dataPointNo);
2076     }
2077     }
2078     if (m_readytype=='C')
2079     {
2080     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2081     }
2082     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2083 jfenwick 1865 }
2084    
2085 jfenwick 2271
2086     // I have decided to let Data:: handle this issue.
2087 jfenwick 1901 void
2088     DataLazy::setToZero()
2089     {
2090 jfenwick 2271 // DataTypes::ValueType v(getNoValues(),0);
2091     // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2092     // m_op=IDENTITY;
2093     // m_right.reset();
2094     // m_left.reset();
2095     // m_readytype='C';
2096     // m_buffsRequired=1;
2097    
2098 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2099 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2100 jfenwick 1901 }
2101    
2102 jfenwick 2271 bool
2103     DataLazy::actsExpanded() const
2104     {
2105     return (m_readytype=='E');
2106     }
2107    
2108 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26