/[escript]/trunk/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5863 - (hide annotations)
Wed Jan 13 02:25:48 2016 UTC (3 years, 4 months ago) by jfenwick
File size: 60911 byte(s)
Copyright dates updated.
\version for doxygen to read

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17 jfenwick 5464 #define ESNEEDPYTHON
18     #include "esysUtils/first.h"
19 jfenwick 1865
20     #include "DataLazy.h"
21 caltinay 3317 #include "esysUtils/Esys_MPI.h"
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 caltinay 3317 #ifdef USE_NETCDF
34     #include <netcdfcpp.h>
35     #endif
36    
37 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
38    
39 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
40 jfenwick 2092 #define LAZYDEBUG(X)
41 jfenwick 2157 namespace
42     {
43     bool privdebug=false;
44 jfenwick 2092
45 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
46     #define DISABLEDEBUG privdebug=false;
47     }
48    
49 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
50 jfenwick 2177
51 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
52 jfenwick 2472
53 jfenwick 2501
54 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
55    
56 jfenwick 1899 /*
57     How does DataLazy work?
58     ~~~~~~~~~~~~~~~~~~~~~~~
59    
60     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
61     denoted left and right.
62    
63     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
64     This means that all "internal" nodes in the structure are instances of DataLazy.
65    
66     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
67 caltinay 4286 Note that IDENTITY is not considered a unary operation.
68 jfenwick 1899
69     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
70     It must however form a DAG (directed acyclic graph).
71     I will refer to individual DataLazy objects with the structure as nodes.
72    
73     Each node also stores:
74     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
75     evaluated.
76 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
77 jfenwick 1899 evaluate the expression.
78     - m_samplesize ~ the number of doubles stored in a sample.
79    
80     When a new node is created, the above values are computed based on the values in the child nodes.
81     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
82    
83     The resolve method, which produces a DataReady from a DataLazy, does the following:
84     1) Create a DataReady to hold the new result.
85     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
86     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
87    
88     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
89    
90     resolveSample returns a Vector* and an offset within that vector where the result is stored.
91     Normally, this would be v, but for identity nodes their internal vector is returned instead.
92    
93     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
94    
95     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
96     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
97 jfenwick 2037
98 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
99 jfenwick 2037 1) Add to the ES_optype.
100     2) determine what opgroup your operation belongs to (X)
101     3) add a string for the op to the end of ES_opstrings
102     4) increase ES_opcount
103     5) add an entry (X) to opgroups
104     6) add an entry to the switch in collapseToReady
105     7) add an entry to resolveX
106 jfenwick 1899 */
107    
108    
109 jfenwick 1865 using namespace std;
110 jfenwick 1868 using namespace boost;
111 jfenwick 1865
112     namespace escript
113     {
114    
115     namespace
116     {
117    
118 jfenwick 2777
119     // enabling this will print out when ever the maximum stacksize used by resolve increases
120     // it assumes _OPENMP is also in use
121     //#define LAZY_STACK_PROF
122    
123    
124    
125     #ifndef _OPENMP
126     #ifdef LAZY_STACK_PROF
127     #undef LAZY_STACK_PROF
128     #endif
129     #endif
130    
131    
132     #ifdef LAZY_STACK_PROF
133     std::vector<void*> stackstart(getNumberOfThreads());
134     std::vector<void*> stackend(getNumberOfThreads());
135     size_t maxstackuse=0;
136     #endif
137    
138 jfenwick 1886 enum ES_opgroup
139     {
140     G_UNKNOWN,
141     G_IDENTITY,
142 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
143 jfenwick 2037 G_UNARY, // pointwise operations with one argument
144 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
145 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
146 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
147 jfenwick 2496 G_TENSORPROD, // general tensor product
148 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
149 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
150     G_CONDEVAL
151 jfenwick 1886 };
152    
153    
154    
155    
156 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
157     "sin","cos","tan",
158 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
159     "asinh","acosh","atanh",
160 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
161 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
162 jfenwick 2066 "symmetric","nonsymmetric",
163 jfenwick 2084 "prod",
164 jfenwick 2496 "transpose", "trace",
165 jfenwick 2721 "swapaxes",
166 jfenwick 3035 "minval", "maxval",
167     "condEval"};
168     int ES_opcount=44;
169 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
170     G_UNARY,G_UNARY,G_UNARY, //10
171     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
172     G_UNARY,G_UNARY,G_UNARY, // 20
173 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
174     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
175 jfenwick 2066 G_NP1OUT,G_NP1OUT,
176 jfenwick 2084 G_TENSORPROD,
177 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
178 jfenwick 2721 G_NP1OUT_2P,
179 jfenwick 3035 G_REDUCTION, G_REDUCTION,
180     G_CONDEVAL};
181 jfenwick 1886 inline
182     ES_opgroup
183     getOpgroup(ES_optype op)
184     {
185     return opgroups[op];
186     }
187    
188 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
189     FunctionSpace
190     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
191     {
192     // perhaps this should call interpolate and throw or something?
193     // maybe we need an interpolate node -
194     // that way, if interpolate is required in any other op we can just throw a
195     // programming error exception.
196 jfenwick 1879
197 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
198     FunctionSpace r=right->getFunctionSpace();
199     if (l!=r)
200     {
201 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202     if (res==1)
203 jfenwick 1943 {
204     return l;
205     }
206 jfenwick 4264 if (res==-1)
207 jfenwick 1943 {
208     return r;
209     }
210     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
211     }
212     return l;
213 jfenwick 1865 }
214    
215     // return the shape of the result of "left op right"
216 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
217 jfenwick 1865 DataTypes::ShapeType
218     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
219     {
220 jfenwick 1879 if (left->getShape()!=right->getShape())
221     {
222 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
223 jfenwick 1908 {
224     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225     }
226 jfenwick 2721
227 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
228     {
229     return right->getShape();
230     }
231     if (right->getRank()==0)
232     {
233     return left->getShape();
234     }
235     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
236 jfenwick 1879 }
237     return left->getShape();
238 jfenwick 1865 }
239    
240 jfenwick 2084 // return the shape for "op left"
241    
242     DataTypes::ShapeType
243 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244 jfenwick 2084 {
245     switch(op)
246     {
247     case TRANS:
248 jfenwick 2199 { // for the scoping of variables
249     const DataTypes::ShapeType& s=left->getShape();
250     DataTypes::ShapeType sh;
251     int rank=left->getRank();
252     if (axis_offset<0 || axis_offset>rank)
253     {
254 caltinay 2779 stringstream e;
255     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256     throw DataException(e.str());
257     }
258     for (int i=0; i<rank; i++)
259 jfenwick 2199 {
260     int index = (axis_offset+i)%rank;
261 caltinay 2779 sh.push_back(s[index]); // Append to new shape
262     }
263 jfenwick 2199 return sh;
264     }
265 jfenwick 2084 break;
266     case TRACE:
267 jfenwick 2199 {
268     int rank=left->getRank();
269     if (rank<2)
270     {
271     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272     }
273     if ((axis_offset>rank-2) || (axis_offset<0))
274     {
275     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276     }
277     if (rank==2)
278     {
279     return DataTypes::scalarShape;
280     }
281     else if (rank==3)
282     {
283     DataTypes::ShapeType sh;
284     if (axis_offset==0)
285     {
286     sh.push_back(left->getShape()[2]);
287     }
288     else // offset==1
289     {
290     sh.push_back(left->getShape()[0]);
291     }
292     return sh;
293     }
294     else if (rank==4)
295     {
296     DataTypes::ShapeType sh;
297     const DataTypes::ShapeType& s=left->getShape();
298     if (axis_offset==0)
299     {
300     sh.push_back(s[2]);
301     sh.push_back(s[3]);
302     }
303     else if (axis_offset==1)
304     {
305     sh.push_back(s[0]);
306     sh.push_back(s[3]);
307     }
308     else // offset==2
309     {
310     sh.push_back(s[0]);
311     sh.push_back(s[1]);
312     }
313     return sh;
314     }
315     else // unknown rank
316     {
317     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318     }
319     }
320 jfenwick 2084 break;
321     default:
322     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323     }
324     }
325    
326 jfenwick 2496 DataTypes::ShapeType
327     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
328     {
329     // This code taken from the Data.cpp swapaxes() method
330     // Some of the checks are probably redundant here
331     int axis0_tmp,axis1_tmp;
332     const DataTypes::ShapeType& s=left->getShape();
333     DataTypes::ShapeType out_shape;
334     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
335     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
336     int rank=left->getRank();
337     if (rank<2) {
338     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339     }
340     if (axis0<0 || axis0>rank-1) {
341 caltinay 2779 stringstream e;
342     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343     throw DataException(e.str());
344 jfenwick 2496 }
345     if (axis1<0 || axis1>rank-1) {
346 caltinay 2782 stringstream e;
347 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348     throw DataException(e.str());
349 jfenwick 2496 }
350     if (axis0 == axis1) {
351     throw DataException("Error - Data::swapaxes: axis indices must be different.");
352     }
353     if (axis0 > axis1) {
354     axis0_tmp=axis1;
355     axis1_tmp=axis0;
356     } else {
357     axis0_tmp=axis0;
358     axis1_tmp=axis1;
359     }
360     for (int i=0; i<rank; i++) {
361     if (i == axis0_tmp) {
362     out_shape.push_back(s[axis1_tmp]);
363     } else if (i == axis1_tmp) {
364     out_shape.push_back(s[axis0_tmp]);
365     } else {
366     out_shape.push_back(s[i]);
367     }
368     }
369     return out_shape;
370     }
371    
372    
373 jfenwick 2066 // determine the output shape for the general tensor product operation
374     // the additional parameters return information required later for the product
375     // the majority of this code is copy pasted from C_General_Tensor_Product
376     DataTypes::ShapeType
377     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
378 jfenwick 1865 {
379 jfenwick 2066
380     // Get rank and shape of inputs
381     int rank0 = left->getRank();
382     int rank1 = right->getRank();
383     const DataTypes::ShapeType& shape0 = left->getShape();
384     const DataTypes::ShapeType& shape1 = right->getShape();
385    
386     // Prepare for the loops of the product and verify compatibility of shapes
387     int start0=0, start1=0;
388     if (transpose == 0) {}
389     else if (transpose == 1) { start0 = axis_offset; }
390     else if (transpose == 2) { start1 = rank1-axis_offset; }
391     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392    
393 jfenwick 2085 if (rank0<axis_offset)
394     {
395     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396     }
397 jfenwick 2066
398     // Adjust the shapes for transpose
399     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
400     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
401     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
402     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
403    
404     // Prepare for the loops of the product
405     SL=1, SM=1, SR=1;
406     for (int i=0; i<rank0-axis_offset; i++) {
407     SL *= tmpShape0[i];
408     }
409     for (int i=rank0-axis_offset; i<rank0; i++) {
410     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
411     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
412     }
413     SM *= tmpShape0[i];
414     }
415     for (int i=axis_offset; i<rank1; i++) {
416     SR *= tmpShape1[i];
417     }
418    
419     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
420     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
421     { // block to limit the scope of out_index
422     int out_index=0;
423     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
425     }
426 jfenwick 2086
427     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
428     {
429     ostringstream os;
430     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
431     throw DataException(os.str());
432     }
433    
434 jfenwick 2066 return shape2;
435 jfenwick 1865 }
436    
437     } // end anonymous namespace
438    
439    
440 jfenwick 1899
441     // Return a string representing the operation
442 jfenwick 1865 const std::string&
443     opToString(ES_optype op)
444     {
445     if (op<0 || op>=ES_opcount)
446     {
447     op=UNKNOWNOP;
448     }
449     return ES_opstrings[op];
450     }
451    
452 jfenwick 2500 void DataLazy::LazyNodeSetup()
453     {
454     #ifdef _OPENMP
455     int numthreads=omp_get_max_threads();
456     m_samples.resize(numthreads*m_samplesize);
457     m_sampleids=new int[numthreads];
458     for (int i=0;i<numthreads;++i)
459     {
460     m_sampleids[i]=-1;
461     }
462     #else
463     m_samples.resize(m_samplesize);
464     m_sampleids=new int[1];
465     m_sampleids[0]=-1;
466     #endif // _OPENMP
467     }
468 jfenwick 1865
469 jfenwick 2500
470     // Creates an identity node
471 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
472 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
473 jfenwick 2500 ,m_sampleids(0),
474     m_samples(1)
475 jfenwick 1865 {
476 jfenwick 1879 if (p->isLazy())
477     {
478     // I don't want identity of Lazy.
479     // Question: Why would that be so bad?
480     // Answer: We assume that the child of ID is something we can call getVector on
481     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
482     }
483     else
484     {
485 jfenwick 2271 p->makeLazyShared();
486 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
487     makeIdentity(dr);
488 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
489 jfenwick 1879 }
490 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
491 jfenwick 1865 }
492    
493 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
494 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
495 jfenwick 2066 m_op(op),
496     m_axis_offset(0),
497     m_transpose(0),
498     m_SL(0), m_SM(0), m_SR(0)
499 jfenwick 1886 {
500 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
501 jfenwick 1886 {
502     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
503     }
504 jfenwick 2066
505 jfenwick 1886 DataLazy_ptr lleft;
506     if (!left->isLazy())
507     {
508     lleft=DataLazy_ptr(new DataLazy(left));
509     }
510     else
511     {
512     lleft=dynamic_pointer_cast<DataLazy>(left);
513     }
514 jfenwick 1889 m_readytype=lleft->m_readytype;
515 jfenwick 1886 m_left=lleft;
516     m_samplesize=getNumDPPSample()*getNoValues();
517 jfenwick 2177 m_children=m_left->m_children+1;
518     m_height=m_left->m_height+1;
519 jfenwick 2500 LazyNodeSetup();
520 jfenwick 2177 SIZELIMIT
521 jfenwick 1886 }
522    
523    
524 jfenwick 1943 // In this constructor we need to consider interpolation
525 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
526     : parent(resultFS(left,right,op), resultShape(left,right,op)),
527 jfenwick 2066 m_op(op),
528     m_SL(0), m_SM(0), m_SR(0)
529 jfenwick 1879 {
530 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
531 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
532 jfenwick 1886 {
533 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
534 jfenwick 1886 }
535 jfenwick 1943
536     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
537     {
538     FunctionSpace fs=getFunctionSpace();
539     Data ltemp(left);
540     Data tmp(ltemp,fs);
541     left=tmp.borrowDataPtr();
542     }
543 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
544 jfenwick 1943 {
545     Data tmp(Data(right),getFunctionSpace());
546     right=tmp.borrowDataPtr();
547 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
548 jfenwick 1943 }
549     left->operandCheck(*right);
550    
551 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
552 jfenwick 1879 {
553     m_left=dynamic_pointer_cast<DataLazy>(left);
554 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
555 jfenwick 1879 }
556     else
557     {
558     m_left=DataLazy_ptr(new DataLazy(left));
559 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
560 jfenwick 1879 }
561     if (right->isLazy())
562     {
563     m_right=dynamic_pointer_cast<DataLazy>(right);
564 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
565 jfenwick 1879 }
566     else
567     {
568     m_right=DataLazy_ptr(new DataLazy(right));
569 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
570 jfenwick 1879 }
571 jfenwick 1889 char lt=m_left->m_readytype;
572     char rt=m_right->m_readytype;
573     if (lt=='E' || rt=='E')
574     {
575     m_readytype='E';
576     }
577     else if (lt=='T' || rt=='T')
578     {
579     m_readytype='T';
580     }
581     else
582     {
583     m_readytype='C';
584     }
585 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
586 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;
588 jfenwick 2500 LazyNodeSetup();
589 jfenwick 2177 SIZELIMIT
590 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
591 jfenwick 1879 }
592    
593 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
594     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
595     m_op(op),
596     m_axis_offset(axis_offset),
597     m_transpose(transpose)
598     {
599     if ((getOpgroup(op)!=G_TENSORPROD))
600     {
601     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
602     }
603     if ((transpose>2) || (transpose<0))
604     {
605     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
606     }
607     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
608     {
609     FunctionSpace fs=getFunctionSpace();
610     Data ltemp(left);
611     Data tmp(ltemp,fs);
612     left=tmp.borrowDataPtr();
613     }
614     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
615     {
616     Data tmp(Data(right),getFunctionSpace());
617     right=tmp.borrowDataPtr();
618     }
619 jfenwick 2195 // left->operandCheck(*right);
620 jfenwick 1879
621 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
622     {
623     m_left=dynamic_pointer_cast<DataLazy>(left);
624     }
625     else
626     {
627     m_left=DataLazy_ptr(new DataLazy(left));
628     }
629     if (right->isLazy())
630     {
631     m_right=dynamic_pointer_cast<DataLazy>(right);
632     }
633     else
634     {
635     m_right=DataLazy_ptr(new DataLazy(right));
636     }
637     char lt=m_left->m_readytype;
638     char rt=m_right->m_readytype;
639     if (lt=='E' || rt=='E')
640     {
641     m_readytype='E';
642     }
643     else if (lt=='T' || rt=='T')
644     {
645     m_readytype='T';
646     }
647     else
648     {
649     m_readytype='C';
650     }
651     m_samplesize=getNumDPPSample()*getNoValues();
652 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
653     m_height=max(m_left->m_height,m_right->m_height)+1;
654 jfenwick 2500 LazyNodeSetup();
655 jfenwick 2177 SIZELIMIT
656 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
657 jfenwick 2066 }
658    
659    
660 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
661 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
662 jfenwick 2084 m_op(op),
663     m_axis_offset(axis_offset),
664 jfenwick 2147 m_transpose(0),
665     m_tol(0)
666 jfenwick 2084 {
667     if ((getOpgroup(op)!=G_NP1OUT_P))
668     {
669     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
670     }
671     DataLazy_ptr lleft;
672     if (!left->isLazy())
673     {
674     lleft=DataLazy_ptr(new DataLazy(left));
675     }
676     else
677     {
678     lleft=dynamic_pointer_cast<DataLazy>(left);
679     }
680     m_readytype=lleft->m_readytype;
681     m_left=lleft;
682     m_samplesize=getNumDPPSample()*getNoValues();
683 jfenwick 2177 m_children=m_left->m_children+1;
684     m_height=m_left->m_height+1;
685 jfenwick 2500 LazyNodeSetup();
686 jfenwick 2177 SIZELIMIT
687 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
688 jfenwick 2084 }
689    
690 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
691     : parent(left->getFunctionSpace(), left->getShape()),
692     m_op(op),
693     m_axis_offset(0),
694     m_transpose(0),
695     m_tol(tol)
696     {
697     if ((getOpgroup(op)!=G_UNARY_P))
698     {
699     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
700     }
701     DataLazy_ptr lleft;
702     if (!left->isLazy())
703     {
704     lleft=DataLazy_ptr(new DataLazy(left));
705     }
706     else
707     {
708     lleft=dynamic_pointer_cast<DataLazy>(left);
709     }
710     m_readytype=lleft->m_readytype;
711     m_left=lleft;
712     m_samplesize=getNumDPPSample()*getNoValues();
713 jfenwick 2177 m_children=m_left->m_children+1;
714     m_height=m_left->m_height+1;
715 jfenwick 2500 LazyNodeSetup();
716 jfenwick 2177 SIZELIMIT
717 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
718     }
719 jfenwick 2084
720 jfenwick 2496
721     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
722     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
723     m_op(op),
724     m_axis_offset(axis0),
725     m_transpose(axis1),
726     m_tol(0)
727     {
728     if ((getOpgroup(op)!=G_NP1OUT_2P))
729     {
730     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
731     }
732     DataLazy_ptr lleft;
733     if (!left->isLazy())
734     {
735     lleft=DataLazy_ptr(new DataLazy(left));
736     }
737     else
738     {
739     lleft=dynamic_pointer_cast<DataLazy>(left);
740     }
741     m_readytype=lleft->m_readytype;
742     m_left=lleft;
743     m_samplesize=getNumDPPSample()*getNoValues();
744     m_children=m_left->m_children+1;
745     m_height=m_left->m_height+1;
746 jfenwick 2500 LazyNodeSetup();
747 jfenwick 2496 SIZELIMIT
748     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
749     }
750    
751 jfenwick 3035
752     namespace
753     {
754    
755     inline int max3(int a, int b, int c)
756     {
757     int t=(a>b?a:b);
758     return (t>c?t:c);
759    
760     }
761     }
762    
763     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
764     : parent(left->getFunctionSpace(), left->getShape()),
765     m_op(CONDEVAL),
766     m_axis_offset(0),
767     m_transpose(0),
768     m_tol(0)
769     {
770    
771     DataLazy_ptr lmask;
772     DataLazy_ptr lleft;
773     DataLazy_ptr lright;
774     if (!mask->isLazy())
775     {
776     lmask=DataLazy_ptr(new DataLazy(mask));
777     }
778     else
779     {
780     lmask=dynamic_pointer_cast<DataLazy>(mask);
781     }
782     if (!left->isLazy())
783     {
784     lleft=DataLazy_ptr(new DataLazy(left));
785     }
786     else
787     {
788     lleft=dynamic_pointer_cast<DataLazy>(left);
789     }
790     if (!right->isLazy())
791     {
792     lright=DataLazy_ptr(new DataLazy(right));
793     }
794     else
795     {
796     lright=dynamic_pointer_cast<DataLazy>(right);
797     }
798     m_readytype=lmask->m_readytype;
799     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
800     {
801     throw DataException("Programmer Error - condEval arguments must have the same readytype");
802     }
803     m_left=lleft;
804     m_right=lright;
805     m_mask=lmask;
806     m_samplesize=getNumDPPSample()*getNoValues();
807     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
808     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
809     LazyNodeSetup();
810     SIZELIMIT
811     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
812     }
813    
814    
815    
816 jfenwick 1865 DataLazy::~DataLazy()
817     {
818 jfenwick 2500 delete[] m_sampleids;
819 jfenwick 1865 }
820    
821 jfenwick 1879
822 jfenwick 1899 /*
823     \brief Evaluates the expression using methods on Data.
824     This does the work for the collapse method.
825     For reasons of efficiency do not call this method on DataExpanded nodes.
826     */
827 jfenwick 1889 DataReady_ptr
828 jfenwick 4621 DataLazy::collapseToReady() const
829 jfenwick 1889 {
830     if (m_readytype=='E')
831     { // this is more an efficiency concern than anything else
832     throw DataException("Programmer Error - do not use collapse on Expanded data.");
833     }
834     if (m_op==IDENTITY)
835     {
836     return m_id;
837     }
838     DataReady_ptr pleft=m_left->collapseToReady();
839     Data left(pleft);
840     Data right;
841 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
842 jfenwick 1889 {
843     right=Data(m_right->collapseToReady());
844     }
845     Data result;
846     switch(m_op)
847     {
848     case ADD:
849     result=left+right;
850     break;
851     case SUB:
852     result=left-right;
853     break;
854     case MUL:
855     result=left*right;
856     break;
857     case DIV:
858     result=left/right;
859     break;
860     case SIN:
861     result=left.sin();
862     break;
863     case COS:
864     result=left.cos();
865     break;
866     case TAN:
867     result=left.tan();
868     break;
869     case ASIN:
870     result=left.asin();
871     break;
872     case ACOS:
873     result=left.acos();
874     break;
875     case ATAN:
876     result=left.atan();
877     break;
878     case SINH:
879     result=left.sinh();
880     break;
881     case COSH:
882     result=left.cosh();
883     break;
884     case TANH:
885     result=left.tanh();
886     break;
887     case ERF:
888     result=left.erf();
889     break;
890     case ASINH:
891     result=left.asinh();
892     break;
893     case ACOSH:
894     result=left.acosh();
895     break;
896     case ATANH:
897     result=left.atanh();
898     break;
899     case LOG10:
900     result=left.log10();
901     break;
902     case LOG:
903     result=left.log();
904     break;
905     case SIGN:
906     result=left.sign();
907     break;
908     case ABS:
909     result=left.abs();
910     break;
911     case NEG:
912     result=left.neg();
913     break;
914     case POS:
915     // it doesn't mean anything for delayed.
916     // it will just trigger a deep copy of the lazy object
917     throw DataException("Programmer error - POS not supported for lazy data.");
918     break;
919     case EXP:
920     result=left.exp();
921     break;
922     case SQRT:
923     result=left.sqrt();
924     break;
925     case RECIP:
926     result=left.oneOver();
927     break;
928     case GZ:
929     result=left.wherePositive();
930     break;
931     case LZ:
932     result=left.whereNegative();
933     break;
934     case GEZ:
935     result=left.whereNonNegative();
936     break;
937     case LEZ:
938     result=left.whereNonPositive();
939     break;
940 jfenwick 2147 case NEZ:
941     result=left.whereNonZero(m_tol);
942     break;
943     case EZ:
944     result=left.whereZero(m_tol);
945     break;
946 jfenwick 2037 case SYM:
947     result=left.symmetric();
948     break;
949     case NSYM:
950     result=left.nonsymmetric();
951     break;
952 jfenwick 2066 case PROD:
953     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
954     break;
955 jfenwick 2084 case TRANS:
956     result=left.transpose(m_axis_offset);
957     break;
958     case TRACE:
959     result=left.trace(m_axis_offset);
960     break;
961 jfenwick 2496 case SWAP:
962     result=left.swapaxes(m_axis_offset, m_transpose);
963     break;
964 jfenwick 2721 case MINVAL:
965     result=left.minval();
966     break;
967     case MAXVAL:
968     result=left.minval();
969     break;
970 jfenwick 1889 default:
971 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
972 jfenwick 1889 }
973     return result.borrowReadyPtr();
974     }
975    
976 jfenwick 1899 /*
977     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
978     This method uses the original methods on the Data class to evaluate the expressions.
979     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
980     the purpose of using DataLazy in the first place).
981     */
982 jfenwick 1889 void
983 jfenwick 4621 DataLazy::collapse() const
984 jfenwick 1889 {
985     if (m_op==IDENTITY)
986     {
987     return;
988     }
989     if (m_readytype=='E')
990     { // this is more an efficiency concern than anything else
991     throw DataException("Programmer Error - do not use collapse on Expanded data.");
992     }
993     m_id=collapseToReady();
994     m_op=IDENTITY;
995     }
996    
997 jfenwick 1899
998 jfenwick 1889
999    
1000    
1001 jfenwick 2147
1002 phornby 1987 #define PROC_OP(TYPE,X) \
1003 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1004     {\
1005     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1006     { \
1007     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1008 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1009 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1010 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1011 jfenwick 2152 lroffset+=leftstep; \
1012     rroffset+=rightstep; \
1013     }\
1014     lroffset+=oleftstep;\
1015     rroffset+=orightstep;\
1016 jfenwick 1889 }
1017    
1018 jfenwick 1899
1019 jfenwick 2500 // The result will be stored in m_samples
1020     // The return value is a pointer to the DataVector, offset is the offset within the return value
1021     const DataTypes::ValueType*
1022 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1023 jfenwick 2500 {
1024     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1025     // collapse so we have a 'E' node or an IDENTITY for some other type
1026     if (m_readytype!='E' && m_op!=IDENTITY)
1027     {
1028     collapse();
1029     }
1030     if (m_op==IDENTITY)
1031     {
1032     const ValueType& vec=m_id->getVectorRO();
1033     roffset=m_id->getPointOffset(sampleNo, 0);
1034 jfenwick 2777 #ifdef LAZY_STACK_PROF
1035     int x;
1036     if (&x<stackend[omp_get_thread_num()])
1037     {
1038     stackend[omp_get_thread_num()]=&x;
1039     }
1040     #endif
1041 jfenwick 2500 return &(vec);
1042     }
1043     if (m_readytype!='E')
1044     {
1045     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1046     }
1047 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1048     {
1049     roffset=tid*m_samplesize;
1050     return &(m_samples); // sample is already resolved
1051     }
1052     m_sampleids[tid]=sampleNo;
1053 jfenwick 3035
1054 jfenwick 2500 switch (getOpgroup(m_op))
1055     {
1056     case G_UNARY:
1057     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1058     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1059     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1060     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1061     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1062     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1063 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1064 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1065 jfenwick 2500 default:
1066     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1067     }
1068     }
1069    
1070     const DataTypes::ValueType*
1071 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1072 jfenwick 2500 {
1073     // we assume that any collapsing has been done before we get here
1074     // since we only have one argument we don't need to think about only
1075     // processing single points.
1076     // we will also know we won't get identity nodes
1077     if (m_readytype!='E')
1078     {
1079     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1080     }
1081     if (m_op==IDENTITY)
1082     {
1083     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1084     }
1085     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1086     const double* left=&((*leftres)[roffset]);
1087     roffset=m_samplesize*tid;
1088     double* result=&(m_samples[roffset]);
1089     switch (m_op)
1090     {
1091     case SIN:
1092     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1093     break;
1094     case COS:
1095     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1096     break;
1097     case TAN:
1098     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1099     break;
1100     case ASIN:
1101     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1102     break;
1103     case ACOS:
1104     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1105     break;
1106     case ATAN:
1107     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1108     break;
1109     case SINH:
1110     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1111     break;
1112     case COSH:
1113     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1114     break;
1115     case TANH:
1116     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1117     break;
1118     case ERF:
1119     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1120     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1121     #else
1122     tensor_unary_operation(m_samplesize, left, result, ::erf);
1123     break;
1124     #endif
1125     case ASINH:
1126     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1127     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1128     #else
1129     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1130     #endif
1131     break;
1132     case ACOSH:
1133     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1134     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1135     #else
1136     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1137     #endif
1138     break;
1139     case ATANH:
1140     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1141     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1142     #else
1143     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1144     #endif
1145     break;
1146     case LOG10:
1147     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1148     break;
1149     case LOG:
1150     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1151     break;
1152     case SIGN:
1153     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1154     break;
1155     case ABS:
1156     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1157     break;
1158     case NEG:
1159     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1160     break;
1161     case POS:
1162     // it doesn't mean anything for delayed.
1163     // it will just trigger a deep copy of the lazy object
1164     throw DataException("Programmer error - POS not supported for lazy data.");
1165     break;
1166     case EXP:
1167     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1168     break;
1169     case SQRT:
1170     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1171     break;
1172     case RECIP:
1173     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1174     break;
1175     case GZ:
1176     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1177     break;
1178     case LZ:
1179     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1180     break;
1181     case GEZ:
1182     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1183     break;
1184     case LEZ:
1185     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1186     break;
1187     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1188     case NEZ:
1189     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1190     break;
1191     case EZ:
1192     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1193     break;
1194    
1195     default:
1196     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1197     }
1198     return &(m_samples);
1199     }
1200    
1201    
1202     const DataTypes::ValueType*
1203 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1204 jfenwick 2721 {
1205     // we assume that any collapsing has been done before we get here
1206     // since we only have one argument we don't need to think about only
1207     // processing single points.
1208     // we will also know we won't get identity nodes
1209     if (m_readytype!='E')
1210     {
1211     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1212     }
1213     if (m_op==IDENTITY)
1214     {
1215     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1216     }
1217     size_t loffset=0;
1218     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1219    
1220     roffset=m_samplesize*tid;
1221 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1222 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1223 jfenwick 2721 double* result=&(m_samples[roffset]);
1224     switch (m_op)
1225     {
1226     case MINVAL:
1227     {
1228 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1229     {
1230     FMin op;
1231     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1232     loffset+=psize;
1233     result++;
1234     }
1235 jfenwick 2721 }
1236     break;
1237     case MAXVAL:
1238     {
1239 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1240     {
1241 jfenwick 2721 FMax op;
1242     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1243 jfenwick 2734 loffset+=psize;
1244     result++;
1245     }
1246 jfenwick 2721 }
1247     break;
1248     default:
1249     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1250     }
1251     return &(m_samples);
1252     }
1253    
1254     const DataTypes::ValueType*
1255 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1256 jfenwick 2500 {
1257     // we assume that any collapsing has been done before we get here
1258     // since we only have one argument we don't need to think about only
1259     // processing single points.
1260     if (m_readytype!='E')
1261     {
1262     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1263     }
1264     if (m_op==IDENTITY)
1265     {
1266     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1267     }
1268     size_t subroffset;
1269     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1270     roffset=m_samplesize*tid;
1271     size_t loop=0;
1272     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1273     size_t step=getNoValues();
1274     size_t offset=roffset;
1275     switch (m_op)
1276     {
1277     case SYM:
1278     for (loop=0;loop<numsteps;++loop)
1279     {
1280     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1281     subroffset+=step;
1282     offset+=step;
1283     }
1284     break;
1285     case NSYM:
1286     for (loop=0;loop<numsteps;++loop)
1287     {
1288     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1289     subroffset+=step;
1290     offset+=step;
1291     }
1292     break;
1293     default:
1294     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1295     }
1296     return &m_samples;
1297     }
1298    
1299     const DataTypes::ValueType*
1300 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1301 jfenwick 2500 {
1302     // we assume that any collapsing has been done before we get here
1303     // since we only have one argument we don't need to think about only
1304     // processing single points.
1305     if (m_readytype!='E')
1306     {
1307     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1308     }
1309     if (m_op==IDENTITY)
1310     {
1311     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1312     }
1313     size_t subroffset;
1314     size_t offset;
1315     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1316     roffset=m_samplesize*tid;
1317     offset=roffset;
1318     size_t loop=0;
1319     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1320     size_t outstep=getNoValues();
1321     size_t instep=m_left->getNoValues();
1322     switch (m_op)
1323     {
1324     case TRACE:
1325     for (loop=0;loop<numsteps;++loop)
1326     {
1327     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1328     subroffset+=instep;
1329     offset+=outstep;
1330     }
1331     break;
1332     case TRANS:
1333     for (loop=0;loop<numsteps;++loop)
1334     {
1335     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1336     subroffset+=instep;
1337     offset+=outstep;
1338     }
1339     break;
1340     default:
1341     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1342     }
1343     return &m_samples;
1344     }
1345    
1346    
1347     const DataTypes::ValueType*
1348 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1349 jfenwick 2500 {
1350     if (m_readytype!='E')
1351     {
1352     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1353     }
1354     if (m_op==IDENTITY)
1355     {
1356     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1357     }
1358     size_t subroffset;
1359     size_t offset;
1360     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1361     roffset=m_samplesize*tid;
1362     offset=roffset;
1363     size_t loop=0;
1364     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1365     size_t outstep=getNoValues();
1366     size_t instep=m_left->getNoValues();
1367     switch (m_op)
1368     {
1369     case SWAP:
1370     for (loop=0;loop<numsteps;++loop)
1371     {
1372     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1373     subroffset+=instep;
1374     offset+=outstep;
1375     }
1376     break;
1377     default:
1378     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1379     }
1380     return &m_samples;
1381     }
1382    
1383 jfenwick 3035 const DataTypes::ValueType*
1384 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1385 jfenwick 3035 {
1386     if (m_readytype!='E')
1387     {
1388     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1389     }
1390     if (m_op!=CONDEVAL)
1391     {
1392     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1393     }
1394     size_t subroffset;
1395 jfenwick 2500
1396 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1397     const ValueType* srcres=0;
1398     if ((*maskres)[subroffset]>0)
1399     {
1400     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1401     }
1402     else
1403     {
1404     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1405     }
1406    
1407     // Now we need to copy the result
1408    
1409     roffset=m_samplesize*tid;
1410     for (int i=0;i<m_samplesize;++i)
1411     {
1412     m_samples[roffset+i]=(*srcres)[subroffset+i];
1413     }
1414    
1415     return &m_samples;
1416     }
1417    
1418 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1419     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1420     // If both children are expanded, then we can process them in a single operation (we treat
1421     // the whole sample as one big datapoint.
1422     // If one of the children is not expanded, then we need to treat each point in the sample
1423     // individually.
1424     // There is an additional complication when scalar operations are considered.
1425     // For example, 2+Vector.
1426     // In this case each double within the point is treated individually
1427     const DataTypes::ValueType*
1428 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1429 jfenwick 2500 {
1430     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1431    
1432     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1433     // first work out which of the children are expanded
1434     bool leftExp=(m_left->m_readytype=='E');
1435     bool rightExp=(m_right->m_readytype=='E');
1436     if (!leftExp && !rightExp)
1437     {
1438     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1439     }
1440     bool leftScalar=(m_left->getRank()==0);
1441     bool rightScalar=(m_right->getRank()==0);
1442     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1443     {
1444     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1445     }
1446     size_t leftsize=m_left->getNoValues();
1447     size_t rightsize=m_right->getNoValues();
1448     size_t chunksize=1; // how many doubles will be processed in one go
1449     int leftstep=0; // how far should the left offset advance after each step
1450     int rightstep=0;
1451     int numsteps=0; // total number of steps for the inner loop
1452     int oleftstep=0; // the o variables refer to the outer loop
1453     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1454     int onumsteps=1;
1455    
1456     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1457     bool RES=(rightExp && rightScalar);
1458     bool LS=(!leftExp && leftScalar); // left is a single scalar
1459     bool RS=(!rightExp && rightScalar);
1460     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1461     bool RN=(!rightExp && !rightScalar);
1462     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1463     bool REN=(rightExp && !rightScalar);
1464    
1465     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1466     {
1467     chunksize=m_left->getNumDPPSample()*leftsize;
1468     leftstep=0;
1469     rightstep=0;
1470     numsteps=1;
1471     }
1472     else if (LES || RES)
1473     {
1474     chunksize=1;
1475     if (LES) // left is an expanded scalar
1476     {
1477     if (RS)
1478     {
1479     leftstep=1;
1480     rightstep=0;
1481     numsteps=m_left->getNumDPPSample();
1482     }
1483     else // RN or REN
1484     {
1485     leftstep=0;
1486     oleftstep=1;
1487     rightstep=1;
1488     orightstep=(RN ? -(int)rightsize : 0);
1489     numsteps=rightsize;
1490     onumsteps=m_left->getNumDPPSample();
1491     }
1492     }
1493     else // right is an expanded scalar
1494     {
1495     if (LS)
1496     {
1497     rightstep=1;
1498     leftstep=0;
1499     numsteps=m_right->getNumDPPSample();
1500     }
1501     else
1502     {
1503     rightstep=0;
1504     orightstep=1;
1505     leftstep=1;
1506     oleftstep=(LN ? -(int)leftsize : 0);
1507     numsteps=leftsize;
1508     onumsteps=m_right->getNumDPPSample();
1509     }
1510     }
1511     }
1512     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1513     {
1514     if (LEN) // and Right will be a single value
1515     {
1516     chunksize=rightsize;
1517     leftstep=rightsize;
1518     rightstep=0;
1519     numsteps=m_left->getNumDPPSample();
1520     if (RS)
1521     {
1522     numsteps*=leftsize;
1523     }
1524     }
1525     else // REN
1526     {
1527     chunksize=leftsize;
1528     rightstep=leftsize;
1529     leftstep=0;
1530     numsteps=m_right->getNumDPPSample();
1531     if (LS)
1532     {
1533     numsteps*=rightsize;
1534     }
1535     }
1536     }
1537    
1538     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1539     // Get the values of sub-expressions
1540     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1541     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1542     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1543     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1544     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1545     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1546     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1547     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1548     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1549    
1550     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1551     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1552    
1553    
1554     roffset=m_samplesize*tid;
1555 caltinay 4286 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1556 jfenwick 2500 switch(m_op)
1557     {
1558     case ADD:
1559     PROC_OP(NO_ARG,plus<double>());
1560     break;
1561     case SUB:
1562     PROC_OP(NO_ARG,minus<double>());
1563     break;
1564     case MUL:
1565     PROC_OP(NO_ARG,multiplies<double>());
1566     break;
1567     case DIV:
1568     PROC_OP(NO_ARG,divides<double>());
1569     break;
1570     case POW:
1571     PROC_OP(double (double,double),::pow);
1572     break;
1573     default:
1574     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1575     }
1576     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1577     return &m_samples;
1578     }
1579    
1580    
1581     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1582     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1583     // unlike the other resolve helpers, we must treat these datapoints separately.
1584     const DataTypes::ValueType*
1585 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1586 jfenwick 2500 {
1587     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1588    
1589     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1590     // first work out which of the children are expanded
1591     bool leftExp=(m_left->m_readytype=='E');
1592     bool rightExp=(m_right->m_readytype=='E');
1593     int steps=getNumDPPSample();
1594     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1595     int rightStep=(rightExp?m_right->getNoValues() : 0);
1596    
1597     int resultStep=getNoValues();
1598     roffset=m_samplesize*tid;
1599     size_t offset=roffset;
1600    
1601     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1602    
1603     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1604    
1605     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1606     cout << getNoValues() << endl;)
1607    
1608    
1609     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1610     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1611     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1612     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1613     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1614     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1615     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1616    
1617 caltinay 4286 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1618 jfenwick 2500 switch(m_op)
1619     {
1620     case PROD:
1621     for (int i=0;i<steps;++i,resultp+=resultStep)
1622     {
1623     const double *ptr_0 = &((*left)[lroffset]);
1624     const double *ptr_1 = &((*right)[rroffset]);
1625    
1626     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1627     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1628    
1629     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1630    
1631     lroffset+=leftStep;
1632     rroffset+=rightStep;
1633     }
1634     break;
1635     default:
1636     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1637     }
1638     roffset=offset;
1639     return &m_samples;
1640     }
1641    
1642 jfenwick 1898
1643     const DataTypes::ValueType*
1644 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1645 jfenwick 1879 {
1646 jfenwick 2271 #ifdef _OPENMP
1647     int tid=omp_get_thread_num();
1648     #else
1649     int tid=0;
1650     #endif
1651 jfenwick 2777
1652     #ifdef LAZY_STACK_PROF
1653     stackstart[tid]=&tid;
1654     stackend[tid]=&tid;
1655     const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1656     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1657     #pragma omp critical
1658     if (d>maxstackuse)
1659     {
1660     cout << "Max resolve Stack use " << d << endl;
1661     maxstackuse=d;
1662     }
1663     return r;
1664     #else
1665 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1666 jfenwick 2777 #endif
1667 jfenwick 2271 }
1668    
1669    
1670 jfenwick 2497 // This needs to do the work of the identity constructor
1671 jfenwick 2177 void
1672     DataLazy::resolveToIdentity()
1673     {
1674     if (m_op==IDENTITY)
1675     return;
1676 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1677 jfenwick 2177 makeIdentity(p);
1678     }
1679 jfenwick 1889
1680 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1681     {
1682     m_op=IDENTITY;
1683     m_axis_offset=0;
1684     m_transpose=0;
1685     m_SL=m_SM=m_SR=0;
1686     m_children=m_height=0;
1687     m_id=p;
1688     if(p->isConstant()) {m_readytype='C';}
1689     else if(p->isExpanded()) {m_readytype='E';}
1690     else if (p->isTagged()) {m_readytype='T';}
1691     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1692     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1693     m_left.reset();
1694     m_right.reset();
1695     }
1696    
1697 jfenwick 2497
1698     DataReady_ptr
1699     DataLazy::resolve()
1700     {
1701     resolveToIdentity();
1702     return m_id;
1703     }
1704    
1705 jfenwick 2799
1706     /* This is really a static method but I think that caused problems in windows */
1707     void
1708     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1709     {
1710     if (dats.empty())
1711     {
1712     return;
1713     }
1714     vector<DataLazy*> work;
1715     FunctionSpace fs=dats[0]->getFunctionSpace();
1716     bool match=true;
1717 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1718 jfenwick 2799 {
1719     if (dats[i]->m_readytype!='E')
1720     {
1721     dats[i]->collapse();
1722     }
1723     if (dats[i]->m_op!=IDENTITY)
1724     {
1725     work.push_back(dats[i]);
1726     if (fs!=dats[i]->getFunctionSpace())
1727     {
1728     match=false;
1729     }
1730     }
1731     }
1732     if (work.empty())
1733     {
1734     return; // no work to do
1735     }
1736     if (match) // all functionspaces match. Yes I realise this is overly strict
1737     { // it is possible that dats[0] is one of the objects which we discarded and
1738     // all the other functionspaces match.
1739     vector<DataExpanded*> dep;
1740     vector<ValueType*> vecs;
1741     for (int i=0;i<work.size();++i)
1742     {
1743     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1744     vecs.push_back(&(dep[i]->getVectorRW()));
1745     }
1746     int totalsamples=work[0]->getNumSamples();
1747     const ValueType* res=0; // Storage for answer
1748     int sample;
1749     #pragma omp parallel private(sample, res)
1750     {
1751     size_t roffset=0;
1752     #pragma omp for schedule(static)
1753     for (sample=0;sample<totalsamples;++sample)
1754     {
1755     roffset=0;
1756     int j;
1757 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1758 jfenwick 2799 {
1759     #ifdef _OPENMP
1760     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1761     #else
1762     res=work[j]->resolveNodeSample(0,sample,roffset);
1763     #endif
1764     DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1765     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1766     }
1767     }
1768     }
1769     // Now we need to load the new results as identity ops into the lazy nodes
1770 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1771 jfenwick 2799 {
1772     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1773     }
1774     }
1775     else // functionspaces do not match
1776     {
1777     for (int i=0;i<work.size();++i)
1778     {
1779     work[i]->resolveToIdentity();
1780     }
1781     }
1782     }
1783    
1784    
1785    
1786 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1787     DataReady_ptr
1788     DataLazy::resolveNodeWorker()
1789     {
1790     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1791     {
1792     collapse();
1793     }
1794     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1795     {
1796     return m_id;
1797     }
1798     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1799     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1800     ValueType& resvec=result->getVectorRW();
1801     DataReady_ptr resptr=DataReady_ptr(result);
1802    
1803     int sample;
1804     int totalsamples=getNumSamples();
1805     const ValueType* res=0; // Storage for answer
1806     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1807 jfenwick 2777 #pragma omp parallel private(sample,res)
1808 jfenwick 2500 {
1809 jfenwick 2777 size_t roffset=0;
1810     #ifdef LAZY_STACK_PROF
1811     stackstart[omp_get_thread_num()]=&roffset;
1812     stackend[omp_get_thread_num()]=&roffset;
1813     #endif
1814     #pragma omp for schedule(static)
1815     for (sample=0;sample<totalsamples;++sample)
1816     {
1817     roffset=0;
1818 jfenwick 2500 #ifdef _OPENMP
1819 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1820 jfenwick 2500 #else
1821 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1822 jfenwick 2500 #endif
1823     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1824     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1825 jfenwick 2777 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1826     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1827     }
1828 jfenwick 2500 }
1829 jfenwick 2777 #ifdef LAZY_STACK_PROF
1830     for (int i=0;i<getNumberOfThreads();++i)
1831     {
1832     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1833     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1834     if (r>maxstackuse)
1835     {
1836     maxstackuse=r;
1837     }
1838     }
1839     cout << "Max resolve Stack use=" << maxstackuse << endl;
1840     #endif
1841 jfenwick 2500 return resptr;
1842     }
1843    
1844 jfenwick 1865 std::string
1845     DataLazy::toString() const
1846     {
1847 jfenwick 1886 ostringstream oss;
1848 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1849 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1850 jfenwick 2737 {
1851 jfenwick 2795 case 1: // tree format
1852 jfenwick 2737 oss << endl;
1853 jfenwick 2795 intoTreeString(oss,"");
1854     break;
1855     case 2: // just the depth
1856     break;
1857     default:
1858     intoString(oss);
1859     break;
1860 jfenwick 2737 }
1861 jfenwick 1886 return oss.str();
1862 jfenwick 1865 }
1863    
1864 jfenwick 1899
1865 jfenwick 1886 void
1866     DataLazy::intoString(ostringstream& oss) const
1867     {
1868 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1869 jfenwick 1886 switch (getOpgroup(m_op))
1870     {
1871 jfenwick 1889 case G_IDENTITY:
1872     if (m_id->isExpanded())
1873     {
1874     oss << "E";
1875     }
1876     else if (m_id->isTagged())
1877     {
1878     oss << "T";
1879     }
1880     else if (m_id->isConstant())
1881     {
1882     oss << "C";
1883     }
1884     else
1885     {
1886     oss << "?";
1887     }
1888 jfenwick 1886 oss << '@' << m_id.get();
1889     break;
1890     case G_BINARY:
1891     oss << '(';
1892     m_left->intoString(oss);
1893     oss << ' ' << opToString(m_op) << ' ';
1894     m_right->intoString(oss);
1895     oss << ')';
1896     break;
1897     case G_UNARY:
1898 jfenwick 2147 case G_UNARY_P:
1899 jfenwick 2037 case G_NP1OUT:
1900 jfenwick 2084 case G_NP1OUT_P:
1901 jfenwick 2721 case G_REDUCTION:
1902 jfenwick 1886 oss << opToString(m_op) << '(';
1903     m_left->intoString(oss);
1904     oss << ')';
1905     break;
1906 jfenwick 2066 case G_TENSORPROD:
1907     oss << opToString(m_op) << '(';
1908     m_left->intoString(oss);
1909     oss << ", ";
1910     m_right->intoString(oss);
1911     oss << ')';
1912     break;
1913 jfenwick 2496 case G_NP1OUT_2P:
1914     oss << opToString(m_op) << '(';
1915     m_left->intoString(oss);
1916     oss << ", " << m_axis_offset << ", " << m_transpose;
1917     oss << ')';
1918     break;
1919 jfenwick 3035 case G_CONDEVAL:
1920     oss << opToString(m_op)<< '(' ;
1921     m_mask->intoString(oss);
1922     oss << " ? ";
1923     m_left->intoString(oss);
1924     oss << " : ";
1925     m_right->intoString(oss);
1926     oss << ')';
1927     break;
1928 jfenwick 1886 default:
1929     oss << "UNKNOWN";
1930     }
1931     }
1932    
1933 jfenwick 2737
1934     void
1935     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1936     {
1937     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1938     switch (getOpgroup(m_op))
1939     {
1940     case G_IDENTITY:
1941     if (m_id->isExpanded())
1942     {
1943     oss << "E";
1944     }
1945     else if (m_id->isTagged())
1946     {
1947     oss << "T";
1948     }
1949     else if (m_id->isConstant())
1950     {
1951     oss << "C";
1952     }
1953     else
1954     {
1955     oss << "?";
1956     }
1957     oss << '@' << m_id.get() << endl;
1958     break;
1959     case G_BINARY:
1960     oss << opToString(m_op) << endl;
1961     indent+='.';
1962     m_left->intoTreeString(oss, indent);
1963     m_right->intoTreeString(oss, indent);
1964     break;
1965     case G_UNARY:
1966     case G_UNARY_P:
1967     case G_NP1OUT:
1968     case G_NP1OUT_P:
1969     case G_REDUCTION:
1970     oss << opToString(m_op) << endl;
1971     indent+='.';
1972     m_left->intoTreeString(oss, indent);
1973     break;
1974     case G_TENSORPROD:
1975     oss << opToString(m_op) << endl;
1976     indent+='.';
1977     m_left->intoTreeString(oss, indent);
1978     m_right->intoTreeString(oss, indent);
1979     break;
1980     case G_NP1OUT_2P:
1981     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1982     indent+='.';
1983     m_left->intoTreeString(oss, indent);
1984     break;
1985     default:
1986     oss << "UNKNOWN";
1987     }
1988     }
1989    
1990    
1991 jfenwick 1865 DataAbstract*
1992     DataLazy::deepCopy()
1993     {
1994 jfenwick 1935 switch (getOpgroup(m_op))
1995 jfenwick 1865 {
1996 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1997 jfenwick 2721 case G_UNARY:
1998     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1999     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2000 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2001 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2002     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2004     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005 jfenwick 1935 default:
2006     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2007 jfenwick 1865 }
2008     }
2009    
2010    
2011 jfenwick 2721
2012 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2013     // Instances of DataReady can look at the size of their vectors.
2014     // For lazy though, it could be the size the data would be if it were resolved;
2015     // or it could be some function of the lengths of the DataReady instances which
2016     // form part of the expression.
2017     // Rather than have people making assumptions, I have disabled the method.
2018 jfenwick 1865 DataTypes::ValueType::size_type
2019     DataLazy::getLength() const
2020     {
2021 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2022 jfenwick 1865 }
2023    
2024    
2025     DataAbstract*
2026     DataLazy::getSlice(const DataTypes::RegionType& region) const
2027     {
2028 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2029 jfenwick 1865 }
2030    
2031 jfenwick 1935
2032     // To do this we need to rely on our child nodes
2033 jfenwick 1865 DataTypes::ValueType::size_type
2034     DataLazy::getPointOffset(int sampleNo,
2035 jfenwick 1935 int dataPointNo)
2036     {
2037     if (m_op==IDENTITY)
2038     {
2039     return m_id->getPointOffset(sampleNo,dataPointNo);
2040     }
2041     if (m_readytype!='E')
2042     {
2043     collapse();
2044     return m_id->getPointOffset(sampleNo,dataPointNo);
2045     }
2046     // at this point we do not have an identity node and the expression will be Expanded
2047     // so we only need to know which child to ask
2048     if (m_left->m_readytype=='E')
2049     {
2050     return m_left->getPointOffset(sampleNo,dataPointNo);
2051     }
2052     else
2053     {
2054     return m_right->getPointOffset(sampleNo,dataPointNo);
2055     }
2056     }
2057    
2058     // To do this we need to rely on our child nodes
2059     DataTypes::ValueType::size_type
2060     DataLazy::getPointOffset(int sampleNo,
2061 jfenwick 1865 int dataPointNo) const
2062     {
2063 jfenwick 1935 if (m_op==IDENTITY)
2064     {
2065     return m_id->getPointOffset(sampleNo,dataPointNo);
2066     }
2067     if (m_readytype=='E')
2068     {
2069     // at this point we do not have an identity node and the expression will be Expanded
2070     // so we only need to know which child to ask
2071     if (m_left->m_readytype=='E')
2072     {
2073     return m_left->getPointOffset(sampleNo,dataPointNo);
2074     }
2075     else
2076     {
2077     return m_right->getPointOffset(sampleNo,dataPointNo);
2078     }
2079     }
2080     if (m_readytype=='C')
2081     {
2082     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2083     }
2084     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2085 jfenwick 1865 }
2086    
2087 jfenwick 2271
2088     // I have decided to let Data:: handle this issue.
2089 jfenwick 1901 void
2090     DataLazy::setToZero()
2091     {
2092 jfenwick 2271 // DataTypes::ValueType v(getNoValues(),0);
2093     // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2094     // m_op=IDENTITY;
2095     // m_right.reset();
2096     // m_left.reset();
2097     // m_readytype='C';
2098     // m_buffsRequired=1;
2099    
2100 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2101 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2102 jfenwick 1901 }
2103    
2104 jfenwick 2271 bool
2105     DataLazy::actsExpanded() const
2106     {
2107     return (m_readytype=='E');
2108     }
2109    
2110 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26