/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5938 - (hide annotations)
Thu Feb 18 06:30:35 2016 UTC (3 years, 1 month ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 61055 byte(s)
Merging from 5937 on the complex branch

Some parts of complex work but all of it is
not unit tested and it is certainly not feature
complete (I haven't put any time into dealing with
subworld for complex).

The other important aspect of this merge is that
c++11 is now required to build escript.


1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17 jfenwick 5464 #define ESNEEDPYTHON
18     #include "esysUtils/first.h"
19 jfenwick 1865
20     #include "DataLazy.h"
21 caltinay 3317 #include "esysUtils/Esys_MPI.h"
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 caltinay 3317 #ifdef USE_NETCDF
34     #include <netcdfcpp.h>
35     #endif
36    
37 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
38    
39 jfenwick 5938 using namespace escript::DataTypes;
40    
41 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
42 jfenwick 2092 #define LAZYDEBUG(X)
43 jfenwick 2157 namespace
44     {
45     bool privdebug=false;
46 jfenwick 2092
47 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
48     #define DISABLEDEBUG privdebug=false;
49     }
50    
51 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
52 jfenwick 2177
53 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
54 jfenwick 2472
55 jfenwick 2501
56 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
57    
58 jfenwick 1899 /*
59     How does DataLazy work?
60     ~~~~~~~~~~~~~~~~~~~~~~~
61    
62     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
63     denoted left and right.
64    
65     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
66     This means that all "internal" nodes in the structure are instances of DataLazy.
67    
68     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
69 caltinay 4286 Note that IDENTITY is not considered a unary operation.
70 jfenwick 1899
71     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
72     It must however form a DAG (directed acyclic graph).
73     I will refer to individual DataLazy objects with the structure as nodes.
74    
75     Each node also stores:
76     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
77     evaluated.
78 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
79 jfenwick 1899 evaluate the expression.
80     - m_samplesize ~ the number of doubles stored in a sample.
81    
82     When a new node is created, the above values are computed based on the values in the child nodes.
83     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
84    
85     The resolve method, which produces a DataReady from a DataLazy, does the following:
86     1) Create a DataReady to hold the new result.
87     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
88     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
89    
90     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
91    
92     resolveSample returns a Vector* and an offset within that vector where the result is stored.
93     Normally, this would be v, but for identity nodes their internal vector is returned instead.
94    
95     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
96    
97     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
98     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
99 jfenwick 2037
100 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
101 jfenwick 2037 1) Add to the ES_optype.
102     2) determine what opgroup your operation belongs to (X)
103     3) add a string for the op to the end of ES_opstrings
104     4) increase ES_opcount
105     5) add an entry (X) to opgroups
106     6) add an entry to the switch in collapseToReady
107     7) add an entry to resolveX
108 jfenwick 1899 */
109    
110    
111 jfenwick 1865 using namespace std;
112 jfenwick 1868 using namespace boost;
113 jfenwick 1865
114     namespace escript
115     {
116    
117     namespace
118     {
119    
120 jfenwick 2777
121     // enabling this will print out when ever the maximum stacksize used by resolve increases
122     // it assumes _OPENMP is also in use
123     //#define LAZY_STACK_PROF
124    
125    
126    
127     #ifndef _OPENMP
128     #ifdef LAZY_STACK_PROF
129     #undef LAZY_STACK_PROF
130     #endif
131     #endif
132    
133    
134     #ifdef LAZY_STACK_PROF
135     std::vector<void*> stackstart(getNumberOfThreads());
136     std::vector<void*> stackend(getNumberOfThreads());
137     size_t maxstackuse=0;
138     #endif
139    
140 jfenwick 1886 enum ES_opgroup
141     {
142     G_UNKNOWN,
143     G_IDENTITY,
144 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
145 jfenwick 2037 G_UNARY, // pointwise operations with one argument
146 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
147 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
148 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
149 jfenwick 2496 G_TENSORPROD, // general tensor product
150 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
151 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
152     G_CONDEVAL
153 jfenwick 1886 };
154    
155    
156    
157    
158 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
159     "sin","cos","tan",
160 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
161     "asinh","acosh","atanh",
162 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
163 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
164 jfenwick 2066 "symmetric","nonsymmetric",
165 jfenwick 2084 "prod",
166 jfenwick 2496 "transpose", "trace",
167 jfenwick 2721 "swapaxes",
168 jfenwick 3035 "minval", "maxval",
169     "condEval"};
170     int ES_opcount=44;
171 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
172     G_UNARY,G_UNARY,G_UNARY, //10
173     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
174     G_UNARY,G_UNARY,G_UNARY, // 20
175 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
176     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
177 jfenwick 2066 G_NP1OUT,G_NP1OUT,
178 jfenwick 2084 G_TENSORPROD,
179 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
180 jfenwick 2721 G_NP1OUT_2P,
181 jfenwick 3035 G_REDUCTION, G_REDUCTION,
182     G_CONDEVAL};
183 jfenwick 1886 inline
184     ES_opgroup
185     getOpgroup(ES_optype op)
186     {
187     return opgroups[op];
188     }
189    
190 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
191     FunctionSpace
192     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
193     {
194     // perhaps this should call interpolate and throw or something?
195     // maybe we need an interpolate node -
196     // that way, if interpolate is required in any other op we can just throw a
197     // programming error exception.
198 jfenwick 1879
199 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
200     FunctionSpace r=right->getFunctionSpace();
201     if (l!=r)
202     {
203 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
204     if (res==1)
205 jfenwick 1943 {
206     return l;
207     }
208 jfenwick 4264 if (res==-1)
209 jfenwick 1943 {
210     return r;
211     }
212     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
213     }
214     return l;
215 jfenwick 1865 }
216    
217     // return the shape of the result of "left op right"
218 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
219 jfenwick 1865 DataTypes::ShapeType
220     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
221     {
222 jfenwick 1879 if (left->getShape()!=right->getShape())
223     {
224 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
225 jfenwick 1908 {
226     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
227     }
228 jfenwick 2721
229 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
230     {
231     return right->getShape();
232     }
233     if (right->getRank()==0)
234     {
235     return left->getShape();
236     }
237     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
238 jfenwick 1879 }
239     return left->getShape();
240 jfenwick 1865 }
241    
242 jfenwick 2084 // return the shape for "op left"
243    
244     DataTypes::ShapeType
245 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
246 jfenwick 2084 {
247     switch(op)
248     {
249     case TRANS:
250 jfenwick 2199 { // for the scoping of variables
251     const DataTypes::ShapeType& s=left->getShape();
252     DataTypes::ShapeType sh;
253     int rank=left->getRank();
254     if (axis_offset<0 || axis_offset>rank)
255     {
256 caltinay 2779 stringstream e;
257     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
258     throw DataException(e.str());
259     }
260     for (int i=0; i<rank; i++)
261 jfenwick 2199 {
262     int index = (axis_offset+i)%rank;
263 caltinay 2779 sh.push_back(s[index]); // Append to new shape
264     }
265 jfenwick 2199 return sh;
266     }
267 jfenwick 2084 break;
268     case TRACE:
269 jfenwick 2199 {
270     int rank=left->getRank();
271     if (rank<2)
272     {
273     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
274     }
275     if ((axis_offset>rank-2) || (axis_offset<0))
276     {
277     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
278     }
279     if (rank==2)
280     {
281     return DataTypes::scalarShape;
282     }
283     else if (rank==3)
284     {
285     DataTypes::ShapeType sh;
286     if (axis_offset==0)
287     {
288     sh.push_back(left->getShape()[2]);
289     }
290     else // offset==1
291     {
292     sh.push_back(left->getShape()[0]);
293     }
294     return sh;
295     }
296     else if (rank==4)
297     {
298     DataTypes::ShapeType sh;
299     const DataTypes::ShapeType& s=left->getShape();
300     if (axis_offset==0)
301     {
302     sh.push_back(s[2]);
303     sh.push_back(s[3]);
304     }
305     else if (axis_offset==1)
306     {
307     sh.push_back(s[0]);
308     sh.push_back(s[3]);
309     }
310     else // offset==2
311     {
312     sh.push_back(s[0]);
313     sh.push_back(s[1]);
314     }
315     return sh;
316     }
317     else // unknown rank
318     {
319     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
320     }
321     }
322 jfenwick 2084 break;
323     default:
324     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
325     }
326     }
327    
328 jfenwick 2496 DataTypes::ShapeType
329     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
330     {
331     // This code taken from the Data.cpp swapaxes() method
332     // Some of the checks are probably redundant here
333     int axis0_tmp,axis1_tmp;
334     const DataTypes::ShapeType& s=left->getShape();
335     DataTypes::ShapeType out_shape;
336     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
337     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
338     int rank=left->getRank();
339     if (rank<2) {
340     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
341     }
342     if (axis0<0 || axis0>rank-1) {
343 caltinay 2779 stringstream e;
344     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
345     throw DataException(e.str());
346 jfenwick 2496 }
347     if (axis1<0 || axis1>rank-1) {
348 caltinay 2782 stringstream e;
349 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
350     throw DataException(e.str());
351 jfenwick 2496 }
352     if (axis0 == axis1) {
353     throw DataException("Error - Data::swapaxes: axis indices must be different.");
354     }
355     if (axis0 > axis1) {
356     axis0_tmp=axis1;
357     axis1_tmp=axis0;
358     } else {
359     axis0_tmp=axis0;
360     axis1_tmp=axis1;
361     }
362     for (int i=0; i<rank; i++) {
363     if (i == axis0_tmp) {
364     out_shape.push_back(s[axis1_tmp]);
365     } else if (i == axis1_tmp) {
366     out_shape.push_back(s[axis0_tmp]);
367     } else {
368     out_shape.push_back(s[i]);
369     }
370     }
371     return out_shape;
372     }
373    
374    
375 jfenwick 2066 // determine the output shape for the general tensor product operation
376     // the additional parameters return information required later for the product
377     // the majority of this code is copy pasted from C_General_Tensor_Product
378     DataTypes::ShapeType
379     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
380 jfenwick 1865 {
381 jfenwick 2066
382     // Get rank and shape of inputs
383     int rank0 = left->getRank();
384     int rank1 = right->getRank();
385     const DataTypes::ShapeType& shape0 = left->getShape();
386     const DataTypes::ShapeType& shape1 = right->getShape();
387    
388     // Prepare for the loops of the product and verify compatibility of shapes
389     int start0=0, start1=0;
390     if (transpose == 0) {}
391     else if (transpose == 1) { start0 = axis_offset; }
392     else if (transpose == 2) { start1 = rank1-axis_offset; }
393     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
394    
395 jfenwick 2085 if (rank0<axis_offset)
396     {
397     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
398     }
399 jfenwick 2066
400     // Adjust the shapes for transpose
401     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
402     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
403     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
404     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
405    
406     // Prepare for the loops of the product
407     SL=1, SM=1, SR=1;
408     for (int i=0; i<rank0-axis_offset; i++) {
409     SL *= tmpShape0[i];
410     }
411     for (int i=rank0-axis_offset; i<rank0; i++) {
412     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
413     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
414     }
415     SM *= tmpShape0[i];
416     }
417     for (int i=axis_offset; i<rank1; i++) {
418     SR *= tmpShape1[i];
419     }
420    
421     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
422     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
423     { // block to limit the scope of out_index
424     int out_index=0;
425     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
426     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
427     }
428 jfenwick 2086
429     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
430     {
431     ostringstream os;
432     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
433     throw DataException(os.str());
434     }
435    
436 jfenwick 2066 return shape2;
437 jfenwick 1865 }
438    
439     } // end anonymous namespace
440    
441    
442 jfenwick 1899
443     // Return a string representing the operation
444 jfenwick 1865 const std::string&
445     opToString(ES_optype op)
446     {
447     if (op<0 || op>=ES_opcount)
448     {
449     op=UNKNOWNOP;
450     }
451     return ES_opstrings[op];
452     }
453    
454 jfenwick 2500 void DataLazy::LazyNodeSetup()
455     {
456     #ifdef _OPENMP
457     int numthreads=omp_get_max_threads();
458     m_samples.resize(numthreads*m_samplesize);
459     m_sampleids=new int[numthreads];
460     for (int i=0;i<numthreads;++i)
461     {
462     m_sampleids[i]=-1;
463     }
464     #else
465     m_samples.resize(m_samplesize);
466     m_sampleids=new int[1];
467     m_sampleids[0]=-1;
468     #endif // _OPENMP
469     }
470 jfenwick 1865
471 jfenwick 2500
472     // Creates an identity node
473 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
474 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
475 jfenwick 2500 ,m_sampleids(0),
476     m_samples(1)
477 jfenwick 1865 {
478 jfenwick 1879 if (p->isLazy())
479     {
480     // I don't want identity of Lazy.
481     // Question: Why would that be so bad?
482     // Answer: We assume that the child of ID is something we can call getVector on
483     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
484     }
485     else
486     {
487 jfenwick 2271 p->makeLazyShared();
488 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
489     makeIdentity(dr);
490 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
491 jfenwick 1879 }
492 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
493 jfenwick 1865 }
494    
495 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
496 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
497 jfenwick 2066 m_op(op),
498     m_axis_offset(0),
499     m_transpose(0),
500     m_SL(0), m_SM(0), m_SR(0)
501 jfenwick 1886 {
502 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
503 jfenwick 1886 {
504     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
505     }
506 jfenwick 2066
507 jfenwick 1886 DataLazy_ptr lleft;
508     if (!left->isLazy())
509     {
510     lleft=DataLazy_ptr(new DataLazy(left));
511     }
512     else
513     {
514     lleft=dynamic_pointer_cast<DataLazy>(left);
515     }
516 jfenwick 1889 m_readytype=lleft->m_readytype;
517 jfenwick 1886 m_left=lleft;
518     m_samplesize=getNumDPPSample()*getNoValues();
519 jfenwick 2177 m_children=m_left->m_children+1;
520     m_height=m_left->m_height+1;
521 jfenwick 2500 LazyNodeSetup();
522 jfenwick 2177 SIZELIMIT
523 jfenwick 1886 }
524    
525    
526 jfenwick 1943 // In this constructor we need to consider interpolation
527 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
528     : parent(resultFS(left,right,op), resultShape(left,right,op)),
529 jfenwick 2066 m_op(op),
530     m_SL(0), m_SM(0), m_SR(0)
531 jfenwick 1879 {
532 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
533 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
534 jfenwick 1886 {
535 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
536 jfenwick 1886 }
537 jfenwick 1943
538     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
539     {
540     FunctionSpace fs=getFunctionSpace();
541     Data ltemp(left);
542     Data tmp(ltemp,fs);
543     left=tmp.borrowDataPtr();
544     }
545 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
546 jfenwick 1943 {
547     Data tmp(Data(right),getFunctionSpace());
548     right=tmp.borrowDataPtr();
549 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
550 jfenwick 1943 }
551     left->operandCheck(*right);
552    
553 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
554 jfenwick 1879 {
555     m_left=dynamic_pointer_cast<DataLazy>(left);
556 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
557 jfenwick 1879 }
558     else
559     {
560     m_left=DataLazy_ptr(new DataLazy(left));
561 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
562 jfenwick 1879 }
563     if (right->isLazy())
564     {
565     m_right=dynamic_pointer_cast<DataLazy>(right);
566 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
567 jfenwick 1879 }
568     else
569     {
570     m_right=DataLazy_ptr(new DataLazy(right));
571 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
572 jfenwick 1879 }
573 jfenwick 1889 char lt=m_left->m_readytype;
574     char rt=m_right->m_readytype;
575     if (lt=='E' || rt=='E')
576     {
577     m_readytype='E';
578     }
579     else if (lt=='T' || rt=='T')
580     {
581     m_readytype='T';
582     }
583     else
584     {
585     m_readytype='C';
586     }
587 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
588 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
589     m_height=max(m_left->m_height,m_right->m_height)+1;
590 jfenwick 2500 LazyNodeSetup();
591 jfenwick 2177 SIZELIMIT
592 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593 jfenwick 1879 }
594    
595 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597     m_op(op),
598     m_axis_offset(axis_offset),
599     m_transpose(transpose)
600     {
601     if ((getOpgroup(op)!=G_TENSORPROD))
602     {
603     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604     }
605     if ((transpose>2) || (transpose<0))
606     {
607     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608     }
609     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
610     {
611     FunctionSpace fs=getFunctionSpace();
612     Data ltemp(left);
613     Data tmp(ltemp,fs);
614     left=tmp.borrowDataPtr();
615     }
616     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
617     {
618     Data tmp(Data(right),getFunctionSpace());
619     right=tmp.borrowDataPtr();
620     }
621 jfenwick 2195 // left->operandCheck(*right);
622 jfenwick 1879
623 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {
625     m_left=dynamic_pointer_cast<DataLazy>(left);
626     }
627     else
628     {
629     m_left=DataLazy_ptr(new DataLazy(left));
630     }
631     if (right->isLazy())
632     {
633     m_right=dynamic_pointer_cast<DataLazy>(right);
634     }
635     else
636     {
637     m_right=DataLazy_ptr(new DataLazy(right));
638     }
639     char lt=m_left->m_readytype;
640     char rt=m_right->m_readytype;
641     if (lt=='E' || rt=='E')
642     {
643     m_readytype='E';
644     }
645     else if (lt=='T' || rt=='T')
646     {
647     m_readytype='T';
648     }
649     else
650     {
651     m_readytype='C';
652     }
653     m_samplesize=getNumDPPSample()*getNoValues();
654 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
655     m_height=max(m_left->m_height,m_right->m_height)+1;
656 jfenwick 2500 LazyNodeSetup();
657 jfenwick 2177 SIZELIMIT
658 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659 jfenwick 2066 }
660    
661    
662 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
663 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
664 jfenwick 2084 m_op(op),
665     m_axis_offset(axis_offset),
666 jfenwick 2147 m_transpose(0),
667     m_tol(0)
668 jfenwick 2084 {
669     if ((getOpgroup(op)!=G_NP1OUT_P))
670     {
671     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
672     }
673     DataLazy_ptr lleft;
674     if (!left->isLazy())
675     {
676     lleft=DataLazy_ptr(new DataLazy(left));
677     }
678     else
679     {
680     lleft=dynamic_pointer_cast<DataLazy>(left);
681     }
682     m_readytype=lleft->m_readytype;
683     m_left=lleft;
684     m_samplesize=getNumDPPSample()*getNoValues();
685 jfenwick 2177 m_children=m_left->m_children+1;
686     m_height=m_left->m_height+1;
687 jfenwick 2500 LazyNodeSetup();
688 jfenwick 2177 SIZELIMIT
689 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
690 jfenwick 2084 }
691    
692 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
693     : parent(left->getFunctionSpace(), left->getShape()),
694     m_op(op),
695     m_axis_offset(0),
696     m_transpose(0),
697     m_tol(tol)
698     {
699     if ((getOpgroup(op)!=G_UNARY_P))
700     {
701     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
702     }
703     DataLazy_ptr lleft;
704     if (!left->isLazy())
705     {
706     lleft=DataLazy_ptr(new DataLazy(left));
707     }
708     else
709     {
710     lleft=dynamic_pointer_cast<DataLazy>(left);
711     }
712     m_readytype=lleft->m_readytype;
713     m_left=lleft;
714     m_samplesize=getNumDPPSample()*getNoValues();
715 jfenwick 2177 m_children=m_left->m_children+1;
716     m_height=m_left->m_height+1;
717 jfenwick 2500 LazyNodeSetup();
718 jfenwick 2177 SIZELIMIT
719 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
720     }
721 jfenwick 2084
722 jfenwick 2496
723     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
724     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
725     m_op(op),
726     m_axis_offset(axis0),
727     m_transpose(axis1),
728     m_tol(0)
729     {
730     if ((getOpgroup(op)!=G_NP1OUT_2P))
731     {
732     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
733     }
734     DataLazy_ptr lleft;
735     if (!left->isLazy())
736     {
737     lleft=DataLazy_ptr(new DataLazy(left));
738     }
739     else
740     {
741     lleft=dynamic_pointer_cast<DataLazy>(left);
742     }
743     m_readytype=lleft->m_readytype;
744     m_left=lleft;
745     m_samplesize=getNumDPPSample()*getNoValues();
746     m_children=m_left->m_children+1;
747     m_height=m_left->m_height+1;
748 jfenwick 2500 LazyNodeSetup();
749 jfenwick 2496 SIZELIMIT
750     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
751     }
752    
753 jfenwick 3035
754     namespace
755     {
756    
757     inline int max3(int a, int b, int c)
758     {
759     int t=(a>b?a:b);
760     return (t>c?t:c);
761    
762     }
763     }
764    
765     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
766     : parent(left->getFunctionSpace(), left->getShape()),
767     m_op(CONDEVAL),
768     m_axis_offset(0),
769     m_transpose(0),
770     m_tol(0)
771     {
772    
773     DataLazy_ptr lmask;
774     DataLazy_ptr lleft;
775     DataLazy_ptr lright;
776     if (!mask->isLazy())
777     {
778     lmask=DataLazy_ptr(new DataLazy(mask));
779     }
780     else
781     {
782     lmask=dynamic_pointer_cast<DataLazy>(mask);
783     }
784     if (!left->isLazy())
785     {
786     lleft=DataLazy_ptr(new DataLazy(left));
787     }
788     else
789     {
790     lleft=dynamic_pointer_cast<DataLazy>(left);
791     }
792     if (!right->isLazy())
793     {
794     lright=DataLazy_ptr(new DataLazy(right));
795     }
796     else
797     {
798     lright=dynamic_pointer_cast<DataLazy>(right);
799     }
800     m_readytype=lmask->m_readytype;
801     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
802     {
803     throw DataException("Programmer Error - condEval arguments must have the same readytype");
804     }
805     m_left=lleft;
806     m_right=lright;
807     m_mask=lmask;
808     m_samplesize=getNumDPPSample()*getNoValues();
809     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
810     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
811     LazyNodeSetup();
812     SIZELIMIT
813     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
814     }
815    
816    
817    
818 jfenwick 1865 DataLazy::~DataLazy()
819     {
820 jfenwick 2500 delete[] m_sampleids;
821 jfenwick 1865 }
822    
823 jfenwick 1879
824 jfenwick 1899 /*
825     \brief Evaluates the expression using methods on Data.
826     This does the work for the collapse method.
827     For reasons of efficiency do not call this method on DataExpanded nodes.
828     */
829 jfenwick 1889 DataReady_ptr
830 jfenwick 4621 DataLazy::collapseToReady() const
831 jfenwick 1889 {
832     if (m_readytype=='E')
833     { // this is more an efficiency concern than anything else
834     throw DataException("Programmer Error - do not use collapse on Expanded data.");
835     }
836     if (m_op==IDENTITY)
837     {
838     return m_id;
839     }
840     DataReady_ptr pleft=m_left->collapseToReady();
841     Data left(pleft);
842     Data right;
843 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
844 jfenwick 1889 {
845     right=Data(m_right->collapseToReady());
846     }
847     Data result;
848     switch(m_op)
849     {
850     case ADD:
851     result=left+right;
852     break;
853     case SUB:
854     result=left-right;
855     break;
856     case MUL:
857     result=left*right;
858     break;
859     case DIV:
860     result=left/right;
861     break;
862     case SIN:
863     result=left.sin();
864     break;
865     case COS:
866     result=left.cos();
867     break;
868     case TAN:
869     result=left.tan();
870     break;
871     case ASIN:
872     result=left.asin();
873     break;
874     case ACOS:
875     result=left.acos();
876     break;
877     case ATAN:
878     result=left.atan();
879     break;
880     case SINH:
881     result=left.sinh();
882     break;
883     case COSH:
884     result=left.cosh();
885     break;
886     case TANH:
887     result=left.tanh();
888     break;
889     case ERF:
890     result=left.erf();
891     break;
892     case ASINH:
893     result=left.asinh();
894     break;
895     case ACOSH:
896     result=left.acosh();
897     break;
898     case ATANH:
899     result=left.atanh();
900     break;
901     case LOG10:
902     result=left.log10();
903     break;
904     case LOG:
905     result=left.log();
906     break;
907     case SIGN:
908     result=left.sign();
909     break;
910     case ABS:
911     result=left.abs();
912     break;
913     case NEG:
914     result=left.neg();
915     break;
916     case POS:
917     // it doesn't mean anything for delayed.
918     // it will just trigger a deep copy of the lazy object
919     throw DataException("Programmer error - POS not supported for lazy data.");
920     break;
921     case EXP:
922     result=left.exp();
923     break;
924     case SQRT:
925     result=left.sqrt();
926     break;
927     case RECIP:
928     result=left.oneOver();
929     break;
930     case GZ:
931     result=left.wherePositive();
932     break;
933     case LZ:
934     result=left.whereNegative();
935     break;
936     case GEZ:
937     result=left.whereNonNegative();
938     break;
939     case LEZ:
940     result=left.whereNonPositive();
941     break;
942 jfenwick 2147 case NEZ:
943     result=left.whereNonZero(m_tol);
944     break;
945     case EZ:
946     result=left.whereZero(m_tol);
947     break;
948 jfenwick 2037 case SYM:
949     result=left.symmetric();
950     break;
951     case NSYM:
952     result=left.nonsymmetric();
953     break;
954 jfenwick 2066 case PROD:
955     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
956     break;
957 jfenwick 2084 case TRANS:
958     result=left.transpose(m_axis_offset);
959     break;
960     case TRACE:
961     result=left.trace(m_axis_offset);
962     break;
963 jfenwick 2496 case SWAP:
964     result=left.swapaxes(m_axis_offset, m_transpose);
965     break;
966 jfenwick 2721 case MINVAL:
967     result=left.minval();
968     break;
969     case MAXVAL:
970     result=left.minval();
971     break;
972 jfenwick 1889 default:
973 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974 jfenwick 1889 }
975     return result.borrowReadyPtr();
976     }
977    
978 jfenwick 1899 /*
979     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
980     This method uses the original methods on the Data class to evaluate the expressions.
981     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
982     the purpose of using DataLazy in the first place).
983     */
984 jfenwick 1889 void
985 jfenwick 4621 DataLazy::collapse() const
986 jfenwick 1889 {
987     if (m_op==IDENTITY)
988     {
989     return;
990     }
991     if (m_readytype=='E')
992     { // this is more an efficiency concern than anything else
993     throw DataException("Programmer Error - do not use collapse on Expanded data.");
994     }
995     m_id=collapseToReady();
996     m_op=IDENTITY;
997     }
998    
999 jfenwick 1899
1000 jfenwick 1889
1001    
1002    
1003 jfenwick 2147
1004 phornby 1987 #define PROC_OP(TYPE,X) \
1005 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1006     {\
1007     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1008     { \
1009     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1010 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1011 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1012 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1013 jfenwick 2152 lroffset+=leftstep; \
1014     rroffset+=rightstep; \
1015     }\
1016     lroffset+=oleftstep;\
1017     rroffset+=orightstep;\
1018 jfenwick 1889 }
1019    
1020 jfenwick 1899
1021 jfenwick 2500 // The result will be stored in m_samples
1022     // The return value is a pointer to the DataVector, offset is the offset within the return value
1023 jfenwick 5938 const DataTypes::RealVectorType*
1024 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1025 jfenwick 2500 {
1026     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1027     // collapse so we have a 'E' node or an IDENTITY for some other type
1028     if (m_readytype!='E' && m_op!=IDENTITY)
1029     {
1030     collapse();
1031     }
1032     if (m_op==IDENTITY)
1033     {
1034     const ValueType& vec=m_id->getVectorRO();
1035     roffset=m_id->getPointOffset(sampleNo, 0);
1036 jfenwick 2777 #ifdef LAZY_STACK_PROF
1037     int x;
1038     if (&x<stackend[omp_get_thread_num()])
1039     {
1040     stackend[omp_get_thread_num()]=&x;
1041     }
1042     #endif
1043 jfenwick 2500 return &(vec);
1044     }
1045     if (m_readytype!='E')
1046     {
1047     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1048     }
1049 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1050     {
1051     roffset=tid*m_samplesize;
1052     return &(m_samples); // sample is already resolved
1053     }
1054     m_sampleids[tid]=sampleNo;
1055 jfenwick 3035
1056 jfenwick 2500 switch (getOpgroup(m_op))
1057     {
1058     case G_UNARY:
1059     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1060     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1061     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1062     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1063     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1064     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1065 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1066 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1067 jfenwick 2500 default:
1068     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1069     }
1070     }
1071    
1072 jfenwick 5938 const DataTypes::RealVectorType*
1073 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1074 jfenwick 2500 {
1075     // we assume that any collapsing has been done before we get here
1076     // since we only have one argument we don't need to think about only
1077     // processing single points.
1078     // we will also know we won't get identity nodes
1079     if (m_readytype!='E')
1080     {
1081     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1082     }
1083     if (m_op==IDENTITY)
1084     {
1085     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1086     }
1087 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1088 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1089     roffset=m_samplesize*tid;
1090     double* result=&(m_samples[roffset]);
1091     switch (m_op)
1092     {
1093     case SIN:
1094     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1095     break;
1096     case COS:
1097     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1098     break;
1099     case TAN:
1100     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1101     break;
1102     case ASIN:
1103     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1104     break;
1105     case ACOS:
1106     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1107     break;
1108     case ATAN:
1109     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1110     break;
1111     case SINH:
1112     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1113     break;
1114     case COSH:
1115     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1116     break;
1117     case TANH:
1118     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1119     break;
1120     case ERF:
1121     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1122     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1123     #else
1124     tensor_unary_operation(m_samplesize, left, result, ::erf);
1125     break;
1126     #endif
1127     case ASINH:
1128     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1129     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1130     #else
1131     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1132     #endif
1133     break;
1134     case ACOSH:
1135     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1136     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1137     #else
1138     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1139     #endif
1140     break;
1141     case ATANH:
1142     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1143     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1144     #else
1145     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1146     #endif
1147     break;
1148     case LOG10:
1149     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1150     break;
1151     case LOG:
1152     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1153     break;
1154     case SIGN:
1155     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1156     break;
1157     case ABS:
1158     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1159     break;
1160     case NEG:
1161     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1162     break;
1163     case POS:
1164     // it doesn't mean anything for delayed.
1165     // it will just trigger a deep copy of the lazy object
1166     throw DataException("Programmer error - POS not supported for lazy data.");
1167     break;
1168     case EXP:
1169     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1170     break;
1171     case SQRT:
1172     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1173     break;
1174     case RECIP:
1175     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1176     break;
1177     case GZ:
1178     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1179     break;
1180     case LZ:
1181     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1182     break;
1183     case GEZ:
1184     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1185     break;
1186     case LEZ:
1187     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1188     break;
1189     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1190     case NEZ:
1191     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1192     break;
1193     case EZ:
1194     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1195     break;
1196    
1197     default:
1198     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1199     }
1200     return &(m_samples);
1201     }
1202    
1203    
1204 jfenwick 5938 const DataTypes::RealVectorType*
1205 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1206 jfenwick 2721 {
1207     // we assume that any collapsing has been done before we get here
1208     // since we only have one argument we don't need to think about only
1209     // processing single points.
1210     // we will also know we won't get identity nodes
1211     if (m_readytype!='E')
1212     {
1213     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1214     }
1215     if (m_op==IDENTITY)
1216     {
1217     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1218     }
1219     size_t loffset=0;
1220 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1221 jfenwick 2721
1222     roffset=m_samplesize*tid;
1223 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1224 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1225 jfenwick 2721 double* result=&(m_samples[roffset]);
1226     switch (m_op)
1227     {
1228     case MINVAL:
1229     {
1230 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1231     {
1232     FMin op;
1233     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1234     loffset+=psize;
1235     result++;
1236     }
1237 jfenwick 2721 }
1238     break;
1239     case MAXVAL:
1240     {
1241 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1242     {
1243 jfenwick 2721 FMax op;
1244     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1245 jfenwick 2734 loffset+=psize;
1246     result++;
1247     }
1248 jfenwick 2721 }
1249     break;
1250     default:
1251     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1252     }
1253     return &(m_samples);
1254     }
1255    
1256 jfenwick 5938 const DataTypes::RealVectorType*
1257 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1258 jfenwick 2500 {
1259     // we assume that any collapsing has been done before we get here
1260     // since we only have one argument we don't need to think about only
1261     // processing single points.
1262     if (m_readytype!='E')
1263     {
1264     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1265     }
1266     if (m_op==IDENTITY)
1267     {
1268     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1269     }
1270     size_t subroffset;
1271     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1272     roffset=m_samplesize*tid;
1273     size_t loop=0;
1274     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1275     size_t step=getNoValues();
1276     size_t offset=roffset;
1277     switch (m_op)
1278     {
1279     case SYM:
1280     for (loop=0;loop<numsteps;++loop)
1281     {
1282     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283     subroffset+=step;
1284     offset+=step;
1285     }
1286     break;
1287     case NSYM:
1288     for (loop=0;loop<numsteps;++loop)
1289     {
1290     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1291     subroffset+=step;
1292     offset+=step;
1293     }
1294     break;
1295     default:
1296     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1297     }
1298     return &m_samples;
1299     }
1300    
1301 jfenwick 5938 const DataTypes::RealVectorType*
1302 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1303 jfenwick 2500 {
1304     // we assume that any collapsing has been done before we get here
1305     // since we only have one argument we don't need to think about only
1306     // processing single points.
1307     if (m_readytype!='E')
1308     {
1309     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1310     }
1311     if (m_op==IDENTITY)
1312     {
1313     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1314     }
1315     size_t subroffset;
1316     size_t offset;
1317     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1318     roffset=m_samplesize*tid;
1319     offset=roffset;
1320     size_t loop=0;
1321     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1322     size_t outstep=getNoValues();
1323     size_t instep=m_left->getNoValues();
1324     switch (m_op)
1325     {
1326     case TRACE:
1327     for (loop=0;loop<numsteps;++loop)
1328     {
1329     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1330     subroffset+=instep;
1331     offset+=outstep;
1332     }
1333     break;
1334     case TRANS:
1335     for (loop=0;loop<numsteps;++loop)
1336     {
1337     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1338     subroffset+=instep;
1339     offset+=outstep;
1340     }
1341     break;
1342     default:
1343     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1344     }
1345     return &m_samples;
1346     }
1347    
1348    
1349 jfenwick 5938 const DataTypes::RealVectorType*
1350 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1351 jfenwick 2500 {
1352     if (m_readytype!='E')
1353     {
1354     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1355     }
1356     if (m_op==IDENTITY)
1357     {
1358     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1359     }
1360     size_t subroffset;
1361     size_t offset;
1362     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1363     roffset=m_samplesize*tid;
1364     offset=roffset;
1365     size_t loop=0;
1366     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1367     size_t outstep=getNoValues();
1368     size_t instep=m_left->getNoValues();
1369     switch (m_op)
1370     {
1371     case SWAP:
1372     for (loop=0;loop<numsteps;++loop)
1373     {
1374     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1375     subroffset+=instep;
1376     offset+=outstep;
1377     }
1378     break;
1379     default:
1380     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1381     }
1382     return &m_samples;
1383     }
1384    
1385 jfenwick 5938 const DataTypes::RealVectorType*
1386 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1387 jfenwick 3035 {
1388     if (m_readytype!='E')
1389     {
1390     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1391     }
1392     if (m_op!=CONDEVAL)
1393     {
1394     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1395     }
1396     size_t subroffset;
1397 jfenwick 2500
1398 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1399     const ValueType* srcres=0;
1400     if ((*maskres)[subroffset]>0)
1401     {
1402     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1403     }
1404     else
1405     {
1406     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1407     }
1408    
1409     // Now we need to copy the result
1410    
1411     roffset=m_samplesize*tid;
1412     for (int i=0;i<m_samplesize;++i)
1413     {
1414     m_samples[roffset+i]=(*srcres)[subroffset+i];
1415     }
1416    
1417     return &m_samples;
1418     }
1419    
1420 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1421     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1422     // If both children are expanded, then we can process them in a single operation (we treat
1423     // the whole sample as one big datapoint.
1424     // If one of the children is not expanded, then we need to treat each point in the sample
1425     // individually.
1426     // There is an additional complication when scalar operations are considered.
1427     // For example, 2+Vector.
1428     // In this case each double within the point is treated individually
1429 jfenwick 5938 const DataTypes::RealVectorType*
1430 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1431 jfenwick 2500 {
1432     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1433    
1434     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1435     // first work out which of the children are expanded
1436     bool leftExp=(m_left->m_readytype=='E');
1437     bool rightExp=(m_right->m_readytype=='E');
1438     if (!leftExp && !rightExp)
1439     {
1440     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1441     }
1442     bool leftScalar=(m_left->getRank()==0);
1443     bool rightScalar=(m_right->getRank()==0);
1444     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1445     {
1446     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1447     }
1448     size_t leftsize=m_left->getNoValues();
1449     size_t rightsize=m_right->getNoValues();
1450     size_t chunksize=1; // how many doubles will be processed in one go
1451     int leftstep=0; // how far should the left offset advance after each step
1452     int rightstep=0;
1453     int numsteps=0; // total number of steps for the inner loop
1454     int oleftstep=0; // the o variables refer to the outer loop
1455     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1456     int onumsteps=1;
1457    
1458     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1459     bool RES=(rightExp && rightScalar);
1460     bool LS=(!leftExp && leftScalar); // left is a single scalar
1461     bool RS=(!rightExp && rightScalar);
1462     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1463     bool RN=(!rightExp && !rightScalar);
1464     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1465     bool REN=(rightExp && !rightScalar);
1466    
1467     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1468     {
1469     chunksize=m_left->getNumDPPSample()*leftsize;
1470     leftstep=0;
1471     rightstep=0;
1472     numsteps=1;
1473     }
1474     else if (LES || RES)
1475     {
1476     chunksize=1;
1477     if (LES) // left is an expanded scalar
1478     {
1479     if (RS)
1480     {
1481     leftstep=1;
1482     rightstep=0;
1483     numsteps=m_left->getNumDPPSample();
1484     }
1485     else // RN or REN
1486     {
1487     leftstep=0;
1488     oleftstep=1;
1489     rightstep=1;
1490     orightstep=(RN ? -(int)rightsize : 0);
1491     numsteps=rightsize;
1492     onumsteps=m_left->getNumDPPSample();
1493     }
1494     }
1495     else // right is an expanded scalar
1496     {
1497     if (LS)
1498     {
1499     rightstep=1;
1500     leftstep=0;
1501     numsteps=m_right->getNumDPPSample();
1502     }
1503     else
1504     {
1505     rightstep=0;
1506     orightstep=1;
1507     leftstep=1;
1508     oleftstep=(LN ? -(int)leftsize : 0);
1509     numsteps=leftsize;
1510     onumsteps=m_right->getNumDPPSample();
1511     }
1512     }
1513     }
1514     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1515     {
1516     if (LEN) // and Right will be a single value
1517     {
1518     chunksize=rightsize;
1519     leftstep=rightsize;
1520     rightstep=0;
1521     numsteps=m_left->getNumDPPSample();
1522     if (RS)
1523     {
1524     numsteps*=leftsize;
1525     }
1526     }
1527     else // REN
1528     {
1529     chunksize=leftsize;
1530     rightstep=leftsize;
1531     leftstep=0;
1532     numsteps=m_right->getNumDPPSample();
1533     if (LS)
1534     {
1535     numsteps*=rightsize;
1536     }
1537     }
1538     }
1539    
1540     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1541     // Get the values of sub-expressions
1542     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1543     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1544     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1545     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1546     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1547     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1548     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1549     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1550     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1551    
1552     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1553     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1554    
1555    
1556     roffset=m_samplesize*tid;
1557 caltinay 4286 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1558 jfenwick 2500 switch(m_op)
1559     {
1560     case ADD:
1561     PROC_OP(NO_ARG,plus<double>());
1562     break;
1563     case SUB:
1564     PROC_OP(NO_ARG,minus<double>());
1565     break;
1566     case MUL:
1567     PROC_OP(NO_ARG,multiplies<double>());
1568     break;
1569     case DIV:
1570     PROC_OP(NO_ARG,divides<double>());
1571     break;
1572     case POW:
1573     PROC_OP(double (double,double),::pow);
1574     break;
1575     default:
1576     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1577     }
1578     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1579     return &m_samples;
1580     }
1581    
1582    
1583     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1584     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1585     // unlike the other resolve helpers, we must treat these datapoints separately.
1586 jfenwick 5938 const DataTypes::RealVectorType*
1587 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1588 jfenwick 2500 {
1589     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1590    
1591     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1592     // first work out which of the children are expanded
1593     bool leftExp=(m_left->m_readytype=='E');
1594     bool rightExp=(m_right->m_readytype=='E');
1595     int steps=getNumDPPSample();
1596     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1597     int rightStep=(rightExp?m_right->getNoValues() : 0);
1598    
1599     int resultStep=getNoValues();
1600     roffset=m_samplesize*tid;
1601     size_t offset=roffset;
1602    
1603     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1604    
1605     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1606    
1607     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1608     cout << getNoValues() << endl;)
1609    
1610    
1611     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1612     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1613     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1614     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1615     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1616     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1617     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1618    
1619 caltinay 4286 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1620 jfenwick 2500 switch(m_op)
1621     {
1622     case PROD:
1623     for (int i=0;i<steps;++i,resultp+=resultStep)
1624     {
1625     const double *ptr_0 = &((*left)[lroffset]);
1626     const double *ptr_1 = &((*right)[rroffset]);
1627    
1628     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1629     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1630    
1631     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1632    
1633     lroffset+=leftStep;
1634     rroffset+=rightStep;
1635     }
1636     break;
1637     default:
1638     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1639     }
1640     roffset=offset;
1641     return &m_samples;
1642     }
1643    
1644 jfenwick 1898
1645 jfenwick 5938 const DataTypes::RealVectorType*
1646 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1647 jfenwick 1879 {
1648 jfenwick 2271 #ifdef _OPENMP
1649     int tid=omp_get_thread_num();
1650     #else
1651     int tid=0;
1652     #endif
1653 jfenwick 2777
1654     #ifdef LAZY_STACK_PROF
1655     stackstart[tid]=&tid;
1656     stackend[tid]=&tid;
1657 jfenwick 5938 const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1658 jfenwick 2777 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1659     #pragma omp critical
1660     if (d>maxstackuse)
1661     {
1662     cout << "Max resolve Stack use " << d << endl;
1663     maxstackuse=d;
1664     }
1665     return r;
1666     #else
1667 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1668 jfenwick 2777 #endif
1669 jfenwick 2271 }
1670    
1671    
1672 jfenwick 2497 // This needs to do the work of the identity constructor
1673 jfenwick 2177 void
1674     DataLazy::resolveToIdentity()
1675     {
1676     if (m_op==IDENTITY)
1677     return;
1678 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1679 jfenwick 2177 makeIdentity(p);
1680     }
1681 jfenwick 1889
1682 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1683     {
1684     m_op=IDENTITY;
1685     m_axis_offset=0;
1686     m_transpose=0;
1687     m_SL=m_SM=m_SR=0;
1688     m_children=m_height=0;
1689     m_id=p;
1690     if(p->isConstant()) {m_readytype='C';}
1691     else if(p->isExpanded()) {m_readytype='E';}
1692     else if (p->isTagged()) {m_readytype='T';}
1693     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1694     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1695     m_left.reset();
1696     m_right.reset();
1697     }
1698    
1699 jfenwick 2497
1700     DataReady_ptr
1701     DataLazy::resolve()
1702     {
1703     resolveToIdentity();
1704     return m_id;
1705     }
1706    
1707 jfenwick 2799
1708     /* This is really a static method but I think that caused problems in windows */
1709     void
1710     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1711     {
1712     if (dats.empty())
1713     {
1714     return;
1715     }
1716     vector<DataLazy*> work;
1717     FunctionSpace fs=dats[0]->getFunctionSpace();
1718     bool match=true;
1719 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1720 jfenwick 2799 {
1721     if (dats[i]->m_readytype!='E')
1722     {
1723     dats[i]->collapse();
1724     }
1725     if (dats[i]->m_op!=IDENTITY)
1726     {
1727     work.push_back(dats[i]);
1728     if (fs!=dats[i]->getFunctionSpace())
1729     {
1730     match=false;
1731     }
1732     }
1733     }
1734     if (work.empty())
1735     {
1736     return; // no work to do
1737     }
1738     if (match) // all functionspaces match. Yes I realise this is overly strict
1739     { // it is possible that dats[0] is one of the objects which we discarded and
1740     // all the other functionspaces match.
1741     vector<DataExpanded*> dep;
1742     vector<ValueType*> vecs;
1743     for (int i=0;i<work.size();++i)
1744     {
1745     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1746     vecs.push_back(&(dep[i]->getVectorRW()));
1747     }
1748     int totalsamples=work[0]->getNumSamples();
1749     const ValueType* res=0; // Storage for answer
1750     int sample;
1751     #pragma omp parallel private(sample, res)
1752     {
1753     size_t roffset=0;
1754     #pragma omp for schedule(static)
1755     for (sample=0;sample<totalsamples;++sample)
1756     {
1757     roffset=0;
1758     int j;
1759 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1760 jfenwick 2799 {
1761     #ifdef _OPENMP
1762     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1763     #else
1764     res=work[j]->resolveNodeSample(0,sample,roffset);
1765     #endif
1766 jfenwick 5938 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1767     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1768 jfenwick 2799 }
1769     }
1770     }
1771     // Now we need to load the new results as identity ops into the lazy nodes
1772 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1773 jfenwick 2799 {
1774     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1775     }
1776     }
1777     else // functionspaces do not match
1778     {
1779     for (int i=0;i<work.size();++i)
1780     {
1781     work[i]->resolveToIdentity();
1782     }
1783     }
1784     }
1785    
1786    
1787    
1788 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1789     DataReady_ptr
1790     DataLazy::resolveNodeWorker()
1791     {
1792     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1793     {
1794     collapse();
1795     }
1796     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1797     {
1798     return m_id;
1799     }
1800     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1801     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1802     ValueType& resvec=result->getVectorRW();
1803     DataReady_ptr resptr=DataReady_ptr(result);
1804    
1805     int sample;
1806     int totalsamples=getNumSamples();
1807     const ValueType* res=0; // Storage for answer
1808     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1809 jfenwick 2777 #pragma omp parallel private(sample,res)
1810 jfenwick 2500 {
1811 jfenwick 2777 size_t roffset=0;
1812     #ifdef LAZY_STACK_PROF
1813     stackstart[omp_get_thread_num()]=&roffset;
1814     stackend[omp_get_thread_num()]=&roffset;
1815     #endif
1816     #pragma omp for schedule(static)
1817     for (sample=0;sample<totalsamples;++sample)
1818     {
1819     roffset=0;
1820 jfenwick 2500 #ifdef _OPENMP
1821 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1822 jfenwick 2500 #else
1823 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1824 jfenwick 2500 #endif
1825     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1826     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1827 jfenwick 5938 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1828     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1829 jfenwick 2777 }
1830 jfenwick 2500 }
1831 jfenwick 2777 #ifdef LAZY_STACK_PROF
1832     for (int i=0;i<getNumberOfThreads();++i)
1833     {
1834     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1835     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1836     if (r>maxstackuse)
1837     {
1838     maxstackuse=r;
1839     }
1840     }
1841     cout << "Max resolve Stack use=" << maxstackuse << endl;
1842     #endif
1843 jfenwick 2500 return resptr;
1844     }
1845    
1846 jfenwick 1865 std::string
1847     DataLazy::toString() const
1848     {
1849 jfenwick 1886 ostringstream oss;
1850 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1851 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1852 jfenwick 2737 {
1853 jfenwick 2795 case 1: // tree format
1854 jfenwick 2737 oss << endl;
1855 jfenwick 2795 intoTreeString(oss,"");
1856     break;
1857     case 2: // just the depth
1858     break;
1859     default:
1860     intoString(oss);
1861     break;
1862 jfenwick 2737 }
1863 jfenwick 1886 return oss.str();
1864 jfenwick 1865 }
1865    
1866 jfenwick 1899
1867 jfenwick 1886 void
1868     DataLazy::intoString(ostringstream& oss) const
1869     {
1870 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1871 jfenwick 1886 switch (getOpgroup(m_op))
1872     {
1873 jfenwick 1889 case G_IDENTITY:
1874     if (m_id->isExpanded())
1875     {
1876     oss << "E";
1877     }
1878     else if (m_id->isTagged())
1879     {
1880     oss << "T";
1881     }
1882     else if (m_id->isConstant())
1883     {
1884     oss << "C";
1885     }
1886     else
1887     {
1888     oss << "?";
1889     }
1890 jfenwick 1886 oss << '@' << m_id.get();
1891     break;
1892     case G_BINARY:
1893     oss << '(';
1894     m_left->intoString(oss);
1895     oss << ' ' << opToString(m_op) << ' ';
1896     m_right->intoString(oss);
1897     oss << ')';
1898     break;
1899     case G_UNARY:
1900 jfenwick 2147 case G_UNARY_P:
1901 jfenwick 2037 case G_NP1OUT:
1902 jfenwick 2084 case G_NP1OUT_P:
1903 jfenwick 2721 case G_REDUCTION:
1904 jfenwick 1886 oss << opToString(m_op) << '(';
1905     m_left->intoString(oss);
1906     oss << ')';
1907     break;
1908 jfenwick 2066 case G_TENSORPROD:
1909     oss << opToString(m_op) << '(';
1910     m_left->intoString(oss);
1911     oss << ", ";
1912     m_right->intoString(oss);
1913     oss << ')';
1914     break;
1915 jfenwick 2496 case G_NP1OUT_2P:
1916     oss << opToString(m_op) << '(';
1917     m_left->intoString(oss);
1918     oss << ", " << m_axis_offset << ", " << m_transpose;
1919     oss << ')';
1920     break;
1921 jfenwick 3035 case G_CONDEVAL:
1922     oss << opToString(m_op)<< '(' ;
1923     m_mask->intoString(oss);
1924     oss << " ? ";
1925     m_left->intoString(oss);
1926     oss << " : ";
1927     m_right->intoString(oss);
1928     oss << ')';
1929     break;
1930 jfenwick 1886 default:
1931     oss << "UNKNOWN";
1932     }
1933     }
1934    
1935 jfenwick 2737
1936     void
1937     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1938     {
1939     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1940     switch (getOpgroup(m_op))
1941     {
1942     case G_IDENTITY:
1943     if (m_id->isExpanded())
1944     {
1945     oss << "E";
1946     }
1947     else if (m_id->isTagged())
1948     {
1949     oss << "T";
1950     }
1951     else if (m_id->isConstant())
1952     {
1953     oss << "C";
1954     }
1955     else
1956     {
1957     oss << "?";
1958     }
1959     oss << '@' << m_id.get() << endl;
1960     break;
1961     case G_BINARY:
1962     oss << opToString(m_op) << endl;
1963     indent+='.';
1964     m_left->intoTreeString(oss, indent);
1965     m_right->intoTreeString(oss, indent);
1966     break;
1967     case G_UNARY:
1968     case G_UNARY_P:
1969     case G_NP1OUT:
1970     case G_NP1OUT_P:
1971     case G_REDUCTION:
1972     oss << opToString(m_op) << endl;
1973     indent+='.';
1974     m_left->intoTreeString(oss, indent);
1975     break;
1976     case G_TENSORPROD:
1977     oss << opToString(m_op) << endl;
1978     indent+='.';
1979     m_left->intoTreeString(oss, indent);
1980     m_right->intoTreeString(oss, indent);
1981     break;
1982     case G_NP1OUT_2P:
1983     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1984     indent+='.';
1985     m_left->intoTreeString(oss, indent);
1986     break;
1987     default:
1988     oss << "UNKNOWN";
1989     }
1990     }
1991    
1992    
1993 jfenwick 1865 DataAbstract*
1994 jfenwick 5938 DataLazy::deepCopy() const
1995 jfenwick 1865 {
1996 jfenwick 1935 switch (getOpgroup(m_op))
1997 jfenwick 1865 {
1998 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1999 jfenwick 2721 case G_UNARY:
2000     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2001     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2002 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2003 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2004     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2006     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2007 jfenwick 1935 default:
2008     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2009 jfenwick 1865 }
2010     }
2011    
2012    
2013 jfenwick 2721
2014 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2015     // Instances of DataReady can look at the size of their vectors.
2016     // For lazy though, it could be the size the data would be if it were resolved;
2017     // or it could be some function of the lengths of the DataReady instances which
2018     // form part of the expression.
2019     // Rather than have people making assumptions, I have disabled the method.
2020 jfenwick 5938 DataTypes::RealVectorType::size_type
2021 jfenwick 1865 DataLazy::getLength() const
2022     {
2023 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2024 jfenwick 1865 }
2025    
2026    
2027     DataAbstract*
2028     DataLazy::getSlice(const DataTypes::RegionType& region) const
2029     {
2030 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2031 jfenwick 1865 }
2032    
2033 jfenwick 1935
2034     // To do this we need to rely on our child nodes
2035 jfenwick 5938 DataTypes::RealVectorType::size_type
2036 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2037 jfenwick 1935 int dataPointNo)
2038     {
2039     if (m_op==IDENTITY)
2040     {
2041     return m_id->getPointOffset(sampleNo,dataPointNo);
2042     }
2043     if (m_readytype!='E')
2044     {
2045     collapse();
2046     return m_id->getPointOffset(sampleNo,dataPointNo);
2047     }
2048     // at this point we do not have an identity node and the expression will be Expanded
2049     // so we only need to know which child to ask
2050     if (m_left->m_readytype=='E')
2051     {
2052     return m_left->getPointOffset(sampleNo,dataPointNo);
2053     }
2054     else
2055     {
2056     return m_right->getPointOffset(sampleNo,dataPointNo);
2057     }
2058     }
2059    
2060     // To do this we need to rely on our child nodes
2061 jfenwick 5938 DataTypes::RealVectorType::size_type
2062 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2063 jfenwick 1865 int dataPointNo) const
2064     {
2065 jfenwick 1935 if (m_op==IDENTITY)
2066     {
2067     return m_id->getPointOffset(sampleNo,dataPointNo);
2068     }
2069     if (m_readytype=='E')
2070     {
2071     // at this point we do not have an identity node and the expression will be Expanded
2072     // so we only need to know which child to ask
2073     if (m_left->m_readytype=='E')
2074     {
2075     return m_left->getPointOffset(sampleNo,dataPointNo);
2076     }
2077     else
2078     {
2079     return m_right->getPointOffset(sampleNo,dataPointNo);
2080     }
2081     }
2082     if (m_readytype=='C')
2083     {
2084     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2085     }
2086     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2087 jfenwick 1865 }
2088    
2089 jfenwick 2271
2090     // I have decided to let Data:: handle this issue.
2091 jfenwick 1901 void
2092     DataLazy::setToZero()
2093     {
2094 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2095 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2096     // m_op=IDENTITY;
2097     // m_right.reset();
2098     // m_left.reset();
2099     // m_readytype='C';
2100     // m_buffsRequired=1;
2101    
2102 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2103 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2104 jfenwick 1901 }
2105    
2106 jfenwick 2271 bool
2107     DataLazy::actsExpanded() const
2108     {
2109     return (m_readytype=='E');
2110     }
2111    
2112 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26