/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3911 - (hide annotations)
Thu Jun 14 01:01:03 2012 UTC (6 years, 10 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 60526 byte(s)
Copyright changes
1 jfenwick 1865
2     /*******************************************************
3     *
4 jfenwick 3911 * Copyright (c) 2003-2012 by University of Queensland
5 jfenwick 1865 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16 caltinay 3317 #include "esysUtils/Esys_MPI.h"
17 jfenwick 1885 #ifdef _OPENMP
18     #include <omp.h>
19     #endif
20 jfenwick 1865 #include "FunctionSpace.h"
21     #include "DataTypes.h"
22 jfenwick 1868 #include "Data.h"
23 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
24 jfenwick 1950 #include "Utils.h"
25 jfenwick 1865
26 jfenwick 2472 #include "EscriptParams.h"
27    
28 caltinay 3317 #ifdef USE_NETCDF
29     #include <netcdfcpp.h>
30     #endif
31    
32 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
33    
34 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
35 jfenwick 2092 #define LAZYDEBUG(X)
36 jfenwick 2157 namespace
37     {
38     bool privdebug=false;
39 jfenwick 2092
40 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
41     #define DISABLEDEBUG privdebug=false;
42     }
43    
44 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
45 jfenwick 2177
46 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
47 jfenwick 2472
48 jfenwick 2501
49 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
50    
51 jfenwick 1899 /*
52     How does DataLazy work?
53     ~~~~~~~~~~~~~~~~~~~~~~~
54    
55     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
56     denoted left and right.
57    
58     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
59     This means that all "internal" nodes in the structure are instances of DataLazy.
60    
61     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
62     Note that IDENITY is not considered a unary operation.
63    
64     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
65     It must however form a DAG (directed acyclic graph).
66     I will refer to individual DataLazy objects with the structure as nodes.
67    
68     Each node also stores:
69     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70     evaluated.
71     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72     evaluate the expression.
73     - m_samplesize ~ the number of doubles stored in a sample.
74    
75     When a new node is created, the above values are computed based on the values in the child nodes.
76     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
77    
78     The resolve method, which produces a DataReady from a DataLazy, does the following:
79     1) Create a DataReady to hold the new result.
80     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
81     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
82    
83     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
84    
85     resolveSample returns a Vector* and an offset within that vector where the result is stored.
86     Normally, this would be v, but for identity nodes their internal vector is returned instead.
87    
88     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
89    
90     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92 jfenwick 2037
93 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94 jfenwick 2037 1) Add to the ES_optype.
95     2) determine what opgroup your operation belongs to (X)
96     3) add a string for the op to the end of ES_opstrings
97     4) increase ES_opcount
98     5) add an entry (X) to opgroups
99     6) add an entry to the switch in collapseToReady
100     7) add an entry to resolveX
101 jfenwick 1899 */
102    
103    
104 jfenwick 1865 using namespace std;
105 jfenwick 1868 using namespace boost;
106 jfenwick 1865
107     namespace escript
108     {
109    
110     namespace
111     {
112    
113 jfenwick 2777
114     // enabling this will print out when ever the maximum stacksize used by resolve increases
115     // it assumes _OPENMP is also in use
116     //#define LAZY_STACK_PROF
117    
118    
119    
120     #ifndef _OPENMP
121     #ifdef LAZY_STACK_PROF
122     #undef LAZY_STACK_PROF
123     #endif
124     #endif
125    
126    
127     #ifdef LAZY_STACK_PROF
128     std::vector<void*> stackstart(getNumberOfThreads());
129     std::vector<void*> stackend(getNumberOfThreads());
130     size_t maxstackuse=0;
131     #endif
132    
133 jfenwick 1886 enum ES_opgroup
134     {
135     G_UNKNOWN,
136     G_IDENTITY,
137 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
138 jfenwick 2037 G_UNARY, // pointwise operations with one argument
139 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
140 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
141 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
142 jfenwick 2496 G_TENSORPROD, // general tensor product
143 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
144 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
145     G_CONDEVAL
146 jfenwick 1886 };
147    
148    
149    
150    
151 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
152     "sin","cos","tan",
153 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
154     "asinh","acosh","atanh",
155 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
156 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
157 jfenwick 2066 "symmetric","nonsymmetric",
158 jfenwick 2084 "prod",
159 jfenwick 2496 "transpose", "trace",
160 jfenwick 2721 "swapaxes",
161 jfenwick 3035 "minval", "maxval",
162     "condEval"};
163     int ES_opcount=44;
164 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
165     G_UNARY,G_UNARY,G_UNARY, //10
166     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
167     G_UNARY,G_UNARY,G_UNARY, // 20
168 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
170 jfenwick 2066 G_NP1OUT,G_NP1OUT,
171 jfenwick 2084 G_TENSORPROD,
172 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
173 jfenwick 2721 G_NP1OUT_2P,
174 jfenwick 3035 G_REDUCTION, G_REDUCTION,
175     G_CONDEVAL};
176 jfenwick 1886 inline
177     ES_opgroup
178     getOpgroup(ES_optype op)
179     {
180     return opgroups[op];
181     }
182    
183 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
184     FunctionSpace
185     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
186     {
187     // perhaps this should call interpolate and throw or something?
188     // maybe we need an interpolate node -
189     // that way, if interpolate is required in any other op we can just throw a
190     // programming error exception.
191 jfenwick 1879
192 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
193     FunctionSpace r=right->getFunctionSpace();
194     if (l!=r)
195     {
196     if (r.probeInterpolation(l))
197     {
198     return l;
199     }
200     if (l.probeInterpolation(r))
201     {
202     return r;
203     }
204     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
205     }
206     return l;
207 jfenwick 1865 }
208    
209     // return the shape of the result of "left op right"
210 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
211 jfenwick 1865 DataTypes::ShapeType
212     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
213     {
214 jfenwick 1879 if (left->getShape()!=right->getShape())
215     {
216 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
217 jfenwick 1908 {
218     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
219     }
220 jfenwick 2721
221 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
222     {
223     return right->getShape();
224     }
225     if (right->getRank()==0)
226     {
227     return left->getShape();
228     }
229     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
230 jfenwick 1879 }
231     return left->getShape();
232 jfenwick 1865 }
233    
234 jfenwick 2084 // return the shape for "op left"
235    
236     DataTypes::ShapeType
237 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
238 jfenwick 2084 {
239     switch(op)
240     {
241     case TRANS:
242 jfenwick 2199 { // for the scoping of variables
243     const DataTypes::ShapeType& s=left->getShape();
244     DataTypes::ShapeType sh;
245     int rank=left->getRank();
246     if (axis_offset<0 || axis_offset>rank)
247     {
248 caltinay 2779 stringstream e;
249     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
250     throw DataException(e.str());
251     }
252     for (int i=0; i<rank; i++)
253 jfenwick 2199 {
254     int index = (axis_offset+i)%rank;
255 caltinay 2779 sh.push_back(s[index]); // Append to new shape
256     }
257 jfenwick 2199 return sh;
258     }
259 jfenwick 2084 break;
260     case TRACE:
261 jfenwick 2199 {
262     int rank=left->getRank();
263     if (rank<2)
264     {
265     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
266     }
267     if ((axis_offset>rank-2) || (axis_offset<0))
268     {
269     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
270     }
271     if (rank==2)
272     {
273     return DataTypes::scalarShape;
274     }
275     else if (rank==3)
276     {
277     DataTypes::ShapeType sh;
278     if (axis_offset==0)
279     {
280     sh.push_back(left->getShape()[2]);
281     }
282     else // offset==1
283     {
284     sh.push_back(left->getShape()[0]);
285     }
286     return sh;
287     }
288     else if (rank==4)
289     {
290     DataTypes::ShapeType sh;
291     const DataTypes::ShapeType& s=left->getShape();
292     if (axis_offset==0)
293     {
294     sh.push_back(s[2]);
295     sh.push_back(s[3]);
296     }
297     else if (axis_offset==1)
298     {
299     sh.push_back(s[0]);
300     sh.push_back(s[3]);
301     }
302     else // offset==2
303     {
304     sh.push_back(s[0]);
305     sh.push_back(s[1]);
306     }
307     return sh;
308     }
309     else // unknown rank
310     {
311     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
312     }
313     }
314 jfenwick 2084 break;
315     default:
316     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
317     }
318     }
319    
320 jfenwick 2496 DataTypes::ShapeType
321     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
322     {
323     // This code taken from the Data.cpp swapaxes() method
324     // Some of the checks are probably redundant here
325     int axis0_tmp,axis1_tmp;
326     const DataTypes::ShapeType& s=left->getShape();
327     DataTypes::ShapeType out_shape;
328     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
329     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
330     int rank=left->getRank();
331     if (rank<2) {
332     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
333     }
334     if (axis0<0 || axis0>rank-1) {
335 caltinay 2779 stringstream e;
336     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
337     throw DataException(e.str());
338 jfenwick 2496 }
339     if (axis1<0 || axis1>rank-1) {
340 caltinay 2782 stringstream e;
341 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
342     throw DataException(e.str());
343 jfenwick 2496 }
344     if (axis0 == axis1) {
345     throw DataException("Error - Data::swapaxes: axis indices must be different.");
346     }
347     if (axis0 > axis1) {
348     axis0_tmp=axis1;
349     axis1_tmp=axis0;
350     } else {
351     axis0_tmp=axis0;
352     axis1_tmp=axis1;
353     }
354     for (int i=0; i<rank; i++) {
355     if (i == axis0_tmp) {
356     out_shape.push_back(s[axis1_tmp]);
357     } else if (i == axis1_tmp) {
358     out_shape.push_back(s[axis0_tmp]);
359     } else {
360     out_shape.push_back(s[i]);
361     }
362     }
363     return out_shape;
364     }
365    
366    
367 jfenwick 2066 // determine the output shape for the general tensor product operation
368     // the additional parameters return information required later for the product
369     // the majority of this code is copy pasted from C_General_Tensor_Product
370     DataTypes::ShapeType
371     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
372 jfenwick 1865 {
373 jfenwick 2066
374     // Get rank and shape of inputs
375     int rank0 = left->getRank();
376     int rank1 = right->getRank();
377     const DataTypes::ShapeType& shape0 = left->getShape();
378     const DataTypes::ShapeType& shape1 = right->getShape();
379    
380     // Prepare for the loops of the product and verify compatibility of shapes
381     int start0=0, start1=0;
382     if (transpose == 0) {}
383     else if (transpose == 1) { start0 = axis_offset; }
384     else if (transpose == 2) { start1 = rank1-axis_offset; }
385     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
386    
387 jfenwick 2085 if (rank0<axis_offset)
388     {
389     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
390     }
391 jfenwick 2066
392     // Adjust the shapes for transpose
393     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
394     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
395     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
396     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
397    
398     // Prepare for the loops of the product
399     SL=1, SM=1, SR=1;
400     for (int i=0; i<rank0-axis_offset; i++) {
401     SL *= tmpShape0[i];
402     }
403     for (int i=rank0-axis_offset; i<rank0; i++) {
404     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
405     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
406     }
407     SM *= tmpShape0[i];
408     }
409     for (int i=axis_offset; i<rank1; i++) {
410     SR *= tmpShape1[i];
411     }
412    
413     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
414     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
415     { // block to limit the scope of out_index
416     int out_index=0;
417     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
418     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
419     }
420 jfenwick 2086
421     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
422     {
423     ostringstream os;
424     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
425     throw DataException(os.str());
426     }
427    
428 jfenwick 2066 return shape2;
429 jfenwick 1865 }
430    
431     } // end anonymous namespace
432    
433    
434 jfenwick 1899
435     // Return a string representing the operation
436 jfenwick 1865 const std::string&
437     opToString(ES_optype op)
438     {
439     if (op<0 || op>=ES_opcount)
440     {
441     op=UNKNOWNOP;
442     }
443     return ES_opstrings[op];
444     }
445    
446 jfenwick 2500 void DataLazy::LazyNodeSetup()
447     {
448     #ifdef _OPENMP
449     int numthreads=omp_get_max_threads();
450     m_samples.resize(numthreads*m_samplesize);
451     m_sampleids=new int[numthreads];
452     for (int i=0;i<numthreads;++i)
453     {
454     m_sampleids[i]=-1;
455     }
456     #else
457     m_samples.resize(m_samplesize);
458     m_sampleids=new int[1];
459     m_sampleids[0]=-1;
460     #endif // _OPENMP
461     }
462 jfenwick 1865
463 jfenwick 2500
464     // Creates an identity node
465 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
466 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
467 jfenwick 2500 ,m_sampleids(0),
468     m_samples(1)
469 jfenwick 1865 {
470 jfenwick 1879 if (p->isLazy())
471     {
472     // I don't want identity of Lazy.
473     // Question: Why would that be so bad?
474     // Answer: We assume that the child of ID is something we can call getVector on
475     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
476     }
477     else
478     {
479 jfenwick 2271 p->makeLazyShared();
480 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481     makeIdentity(dr);
482 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483 jfenwick 1879 }
484 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
485 jfenwick 1865 }
486    
487 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489 jfenwick 2066 m_op(op),
490     m_axis_offset(0),
491     m_transpose(0),
492     m_SL(0), m_SM(0), m_SR(0)
493 jfenwick 1886 {
494 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495 jfenwick 1886 {
496     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }
498 jfenwick 2066
499 jfenwick 1886 DataLazy_ptr lleft;
500     if (!left->isLazy())
501     {
502     lleft=DataLazy_ptr(new DataLazy(left));
503     }
504     else
505     {
506     lleft=dynamic_pointer_cast<DataLazy>(left);
507     }
508 jfenwick 1889 m_readytype=lleft->m_readytype;
509 jfenwick 1886 m_left=lleft;
510     m_samplesize=getNumDPPSample()*getNoValues();
511 jfenwick 2177 m_children=m_left->m_children+1;
512     m_height=m_left->m_height+1;
513 jfenwick 2500 LazyNodeSetup();
514 jfenwick 2177 SIZELIMIT
515 jfenwick 1886 }
516    
517    
518 jfenwick 1943 // In this constructor we need to consider interpolation
519 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
520     : parent(resultFS(left,right,op), resultShape(left,right,op)),
521 jfenwick 2066 m_op(op),
522     m_SL(0), m_SM(0), m_SR(0)
523 jfenwick 1879 {
524 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
525 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
526 jfenwick 1886 {
527 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
528 jfenwick 1886 }
529 jfenwick 1943
530     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
531     {
532     FunctionSpace fs=getFunctionSpace();
533     Data ltemp(left);
534     Data tmp(ltemp,fs);
535     left=tmp.borrowDataPtr();
536     }
537 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
538 jfenwick 1943 {
539     Data tmp(Data(right),getFunctionSpace());
540     right=tmp.borrowDataPtr();
541 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
542 jfenwick 1943 }
543     left->operandCheck(*right);
544    
545 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
546 jfenwick 1879 {
547     m_left=dynamic_pointer_cast<DataLazy>(left);
548 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
549 jfenwick 1879 }
550     else
551     {
552     m_left=DataLazy_ptr(new DataLazy(left));
553 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
554 jfenwick 1879 }
555     if (right->isLazy())
556     {
557     m_right=dynamic_pointer_cast<DataLazy>(right);
558 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
559 jfenwick 1879 }
560     else
561     {
562     m_right=DataLazy_ptr(new DataLazy(right));
563 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
564 jfenwick 1879 }
565 jfenwick 1889 char lt=m_left->m_readytype;
566     char rt=m_right->m_readytype;
567     if (lt=='E' || rt=='E')
568     {
569     m_readytype='E';
570     }
571     else if (lt=='T' || rt=='T')
572     {
573     m_readytype='T';
574     }
575     else
576     {
577     m_readytype='C';
578     }
579 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
580 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
581     m_height=max(m_left->m_height,m_right->m_height)+1;
582 jfenwick 2500 LazyNodeSetup();
583 jfenwick 2177 SIZELIMIT
584 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
585 jfenwick 1879 }
586    
587 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
588     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
589     m_op(op),
590     m_axis_offset(axis_offset),
591     m_transpose(transpose)
592     {
593     if ((getOpgroup(op)!=G_TENSORPROD))
594     {
595     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
596     }
597     if ((transpose>2) || (transpose<0))
598     {
599     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
600     }
601     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
602     {
603     FunctionSpace fs=getFunctionSpace();
604     Data ltemp(left);
605     Data tmp(ltemp,fs);
606     left=tmp.borrowDataPtr();
607     }
608     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
609     {
610     Data tmp(Data(right),getFunctionSpace());
611     right=tmp.borrowDataPtr();
612     }
613 jfenwick 2195 // left->operandCheck(*right);
614 jfenwick 1879
615 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
616     {
617     m_left=dynamic_pointer_cast<DataLazy>(left);
618     }
619     else
620     {
621     m_left=DataLazy_ptr(new DataLazy(left));
622     }
623     if (right->isLazy())
624     {
625     m_right=dynamic_pointer_cast<DataLazy>(right);
626     }
627     else
628     {
629     m_right=DataLazy_ptr(new DataLazy(right));
630     }
631     char lt=m_left->m_readytype;
632     char rt=m_right->m_readytype;
633     if (lt=='E' || rt=='E')
634     {
635     m_readytype='E';
636     }
637     else if (lt=='T' || rt=='T')
638     {
639     m_readytype='T';
640     }
641     else
642     {
643     m_readytype='C';
644     }
645     m_samplesize=getNumDPPSample()*getNoValues();
646 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
647     m_height=max(m_left->m_height,m_right->m_height)+1;
648 jfenwick 2500 LazyNodeSetup();
649 jfenwick 2177 SIZELIMIT
650 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
651 jfenwick 2066 }
652    
653    
654 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
655 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
656 jfenwick 2084 m_op(op),
657     m_axis_offset(axis_offset),
658 jfenwick 2147 m_transpose(0),
659     m_tol(0)
660 jfenwick 2084 {
661     if ((getOpgroup(op)!=G_NP1OUT_P))
662     {
663     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
664     }
665     DataLazy_ptr lleft;
666     if (!left->isLazy())
667     {
668     lleft=DataLazy_ptr(new DataLazy(left));
669     }
670     else
671     {
672     lleft=dynamic_pointer_cast<DataLazy>(left);
673     }
674     m_readytype=lleft->m_readytype;
675     m_left=lleft;
676     m_samplesize=getNumDPPSample()*getNoValues();
677 jfenwick 2177 m_children=m_left->m_children+1;
678     m_height=m_left->m_height+1;
679 jfenwick 2500 LazyNodeSetup();
680 jfenwick 2177 SIZELIMIT
681 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
682 jfenwick 2084 }
683    
684 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
685     : parent(left->getFunctionSpace(), left->getShape()),
686     m_op(op),
687     m_axis_offset(0),
688     m_transpose(0),
689     m_tol(tol)
690     {
691     if ((getOpgroup(op)!=G_UNARY_P))
692     {
693     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
694     }
695     DataLazy_ptr lleft;
696     if (!left->isLazy())
697     {
698     lleft=DataLazy_ptr(new DataLazy(left));
699     }
700     else
701     {
702     lleft=dynamic_pointer_cast<DataLazy>(left);
703     }
704     m_readytype=lleft->m_readytype;
705     m_left=lleft;
706     m_samplesize=getNumDPPSample()*getNoValues();
707 jfenwick 2177 m_children=m_left->m_children+1;
708     m_height=m_left->m_height+1;
709 jfenwick 2500 LazyNodeSetup();
710 jfenwick 2177 SIZELIMIT
711 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
712     }
713 jfenwick 2084
714 jfenwick 2496
715     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
716     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
717     m_op(op),
718     m_axis_offset(axis0),
719     m_transpose(axis1),
720     m_tol(0)
721     {
722     if ((getOpgroup(op)!=G_NP1OUT_2P))
723     {
724     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
725     }
726     DataLazy_ptr lleft;
727     if (!left->isLazy())
728     {
729     lleft=DataLazy_ptr(new DataLazy(left));
730     }
731     else
732     {
733     lleft=dynamic_pointer_cast<DataLazy>(left);
734     }
735     m_readytype=lleft->m_readytype;
736     m_left=lleft;
737     m_samplesize=getNumDPPSample()*getNoValues();
738     m_children=m_left->m_children+1;
739     m_height=m_left->m_height+1;
740 jfenwick 2500 LazyNodeSetup();
741 jfenwick 2496 SIZELIMIT
742     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
743     }
744    
745 jfenwick 3035
746     namespace
747     {
748    
749     inline int max3(int a, int b, int c)
750     {
751     int t=(a>b?a:b);
752     return (t>c?t:c);
753    
754     }
755     }
756    
757     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
758     : parent(left->getFunctionSpace(), left->getShape()),
759     m_op(CONDEVAL),
760     m_axis_offset(0),
761     m_transpose(0),
762     m_tol(0)
763     {
764    
765     DataLazy_ptr lmask;
766     DataLazy_ptr lleft;
767     DataLazy_ptr lright;
768     if (!mask->isLazy())
769     {
770     lmask=DataLazy_ptr(new DataLazy(mask));
771     }
772     else
773     {
774     lmask=dynamic_pointer_cast<DataLazy>(mask);
775     }
776     if (!left->isLazy())
777     {
778     lleft=DataLazy_ptr(new DataLazy(left));
779     }
780     else
781     {
782     lleft=dynamic_pointer_cast<DataLazy>(left);
783     }
784     if (!right->isLazy())
785     {
786     lright=DataLazy_ptr(new DataLazy(right));
787     }
788     else
789     {
790     lright=dynamic_pointer_cast<DataLazy>(right);
791     }
792     m_readytype=lmask->m_readytype;
793     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
794     {
795     throw DataException("Programmer Error - condEval arguments must have the same readytype");
796     }
797     m_left=lleft;
798     m_right=lright;
799     m_mask=lmask;
800     m_samplesize=getNumDPPSample()*getNoValues();
801     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
802     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
803     LazyNodeSetup();
804     SIZELIMIT
805     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
806     }
807    
808    
809    
810 jfenwick 1865 DataLazy::~DataLazy()
811     {
812 jfenwick 2500 delete[] m_sampleids;
813 jfenwick 1865 }
814    
815 jfenwick 1879
816 jfenwick 1899 /*
817     \brief Evaluates the expression using methods on Data.
818     This does the work for the collapse method.
819     For reasons of efficiency do not call this method on DataExpanded nodes.
820     */
821 jfenwick 1889 DataReady_ptr
822     DataLazy::collapseToReady()
823     {
824     if (m_readytype=='E')
825     { // this is more an efficiency concern than anything else
826     throw DataException("Programmer Error - do not use collapse on Expanded data.");
827     }
828     if (m_op==IDENTITY)
829     {
830     return m_id;
831     }
832     DataReady_ptr pleft=m_left->collapseToReady();
833     Data left(pleft);
834     Data right;
835 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
836 jfenwick 1889 {
837     right=Data(m_right->collapseToReady());
838     }
839     Data result;
840     switch(m_op)
841     {
842     case ADD:
843     result=left+right;
844     break;
845     case SUB:
846     result=left-right;
847     break;
848     case MUL:
849     result=left*right;
850     break;
851     case DIV:
852     result=left/right;
853     break;
854     case SIN:
855     result=left.sin();
856     break;
857     case COS:
858     result=left.cos();
859     break;
860     case TAN:
861     result=left.tan();
862     break;
863     case ASIN:
864     result=left.asin();
865     break;
866     case ACOS:
867     result=left.acos();
868     break;
869     case ATAN:
870     result=left.atan();
871     break;
872     case SINH:
873     result=left.sinh();
874     break;
875     case COSH:
876     result=left.cosh();
877     break;
878     case TANH:
879     result=left.tanh();
880     break;
881     case ERF:
882     result=left.erf();
883     break;
884     case ASINH:
885     result=left.asinh();
886     break;
887     case ACOSH:
888     result=left.acosh();
889     break;
890     case ATANH:
891     result=left.atanh();
892     break;
893     case LOG10:
894     result=left.log10();
895     break;
896     case LOG:
897     result=left.log();
898     break;
899     case SIGN:
900     result=left.sign();
901     break;
902     case ABS:
903     result=left.abs();
904     break;
905     case NEG:
906     result=left.neg();
907     break;
908     case POS:
909     // it doesn't mean anything for delayed.
910     // it will just trigger a deep copy of the lazy object
911     throw DataException("Programmer error - POS not supported for lazy data.");
912     break;
913     case EXP:
914     result=left.exp();
915     break;
916     case SQRT:
917     result=left.sqrt();
918     break;
919     case RECIP:
920     result=left.oneOver();
921     break;
922     case GZ:
923     result=left.wherePositive();
924     break;
925     case LZ:
926     result=left.whereNegative();
927     break;
928     case GEZ:
929     result=left.whereNonNegative();
930     break;
931     case LEZ:
932     result=left.whereNonPositive();
933     break;
934 jfenwick 2147 case NEZ:
935     result=left.whereNonZero(m_tol);
936     break;
937     case EZ:
938     result=left.whereZero(m_tol);
939     break;
940 jfenwick 2037 case SYM:
941     result=left.symmetric();
942     break;
943     case NSYM:
944     result=left.nonsymmetric();
945     break;
946 jfenwick 2066 case PROD:
947     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
948     break;
949 jfenwick 2084 case TRANS:
950     result=left.transpose(m_axis_offset);
951     break;
952     case TRACE:
953     result=left.trace(m_axis_offset);
954     break;
955 jfenwick 2496 case SWAP:
956     result=left.swapaxes(m_axis_offset, m_transpose);
957     break;
958 jfenwick 2721 case MINVAL:
959     result=left.minval();
960     break;
961     case MAXVAL:
962     result=left.minval();
963     break;
964 jfenwick 1889 default:
965 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
966 jfenwick 1889 }
967     return result.borrowReadyPtr();
968     }
969    
970 jfenwick 1899 /*
971     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
972     This method uses the original methods on the Data class to evaluate the expressions.
973     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
974     the purpose of using DataLazy in the first place).
975     */
976 jfenwick 1889 void
977     DataLazy::collapse()
978     {
979     if (m_op==IDENTITY)
980     {
981     return;
982     }
983     if (m_readytype=='E')
984     { // this is more an efficiency concern than anything else
985     throw DataException("Programmer Error - do not use collapse on Expanded data.");
986     }
987     m_id=collapseToReady();
988     m_op=IDENTITY;
989     }
990    
991 jfenwick 1899
992 jfenwick 1889
993    
994    
995 jfenwick 2147
996 phornby 1987 #define PROC_OP(TYPE,X) \
997 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
998     {\
999     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1000     { \
1001     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1002 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1003 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1004 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1005 jfenwick 2152 lroffset+=leftstep; \
1006     rroffset+=rightstep; \
1007     }\
1008     lroffset+=oleftstep;\
1009     rroffset+=orightstep;\
1010 jfenwick 1889 }
1011    
1012 jfenwick 1899
1013 jfenwick 2500 // The result will be stored in m_samples
1014     // The return value is a pointer to the DataVector, offset is the offset within the return value
1015     const DataTypes::ValueType*
1016     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1017     {
1018     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1019     // collapse so we have a 'E' node or an IDENTITY for some other type
1020     if (m_readytype!='E' && m_op!=IDENTITY)
1021     {
1022     collapse();
1023     }
1024     if (m_op==IDENTITY)
1025     {
1026     const ValueType& vec=m_id->getVectorRO();
1027     roffset=m_id->getPointOffset(sampleNo, 0);
1028 jfenwick 2777 #ifdef LAZY_STACK_PROF
1029     int x;
1030     if (&x<stackend[omp_get_thread_num()])
1031     {
1032     stackend[omp_get_thread_num()]=&x;
1033     }
1034     #endif
1035 jfenwick 2500 return &(vec);
1036     }
1037     if (m_readytype!='E')
1038     {
1039     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1040     }
1041 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1042     {
1043     roffset=tid*m_samplesize;
1044     return &(m_samples); // sample is already resolved
1045     }
1046     m_sampleids[tid]=sampleNo;
1047 jfenwick 3035
1048 jfenwick 2500 switch (getOpgroup(m_op))
1049     {
1050     case G_UNARY:
1051     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1052     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1053     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1054     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1055     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1056     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1057 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1058 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1059 jfenwick 2500 default:
1060     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1061     }
1062     }
1063    
1064     const DataTypes::ValueType*
1065     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1066     {
1067     // we assume that any collapsing has been done before we get here
1068     // since we only have one argument we don't need to think about only
1069     // processing single points.
1070     // we will also know we won't get identity nodes
1071     if (m_readytype!='E')
1072     {
1073     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1074     }
1075     if (m_op==IDENTITY)
1076     {
1077     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1078     }
1079     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1080     const double* left=&((*leftres)[roffset]);
1081     roffset=m_samplesize*tid;
1082     double* result=&(m_samples[roffset]);
1083     switch (m_op)
1084     {
1085     case SIN:
1086     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1087     break;
1088     case COS:
1089     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1090     break;
1091     case TAN:
1092     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1093     break;
1094     case ASIN:
1095     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1096     break;
1097     case ACOS:
1098     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1099     break;
1100     case ATAN:
1101     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1102     break;
1103     case SINH:
1104     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1105     break;
1106     case COSH:
1107     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1108     break;
1109     case TANH:
1110     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1111     break;
1112     case ERF:
1113     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1114     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1115     #else
1116     tensor_unary_operation(m_samplesize, left, result, ::erf);
1117     break;
1118     #endif
1119     case ASINH:
1120     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1121     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1122     #else
1123     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1124     #endif
1125     break;
1126     case ACOSH:
1127     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1128     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1129     #else
1130     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1131     #endif
1132     break;
1133     case ATANH:
1134     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1135     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1136     #else
1137     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1138     #endif
1139     break;
1140     case LOG10:
1141     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1142     break;
1143     case LOG:
1144     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1145     break;
1146     case SIGN:
1147     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1148     break;
1149     case ABS:
1150     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1151     break;
1152     case NEG:
1153     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1154     break;
1155     case POS:
1156     // it doesn't mean anything for delayed.
1157     // it will just trigger a deep copy of the lazy object
1158     throw DataException("Programmer error - POS not supported for lazy data.");
1159     break;
1160     case EXP:
1161     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1162     break;
1163     case SQRT:
1164     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1165     break;
1166     case RECIP:
1167     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1168     break;
1169     case GZ:
1170     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1171     break;
1172     case LZ:
1173     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1174     break;
1175     case GEZ:
1176     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1177     break;
1178     case LEZ:
1179     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1180     break;
1181     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1182     case NEZ:
1183     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1184     break;
1185     case EZ:
1186     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1187     break;
1188    
1189     default:
1190     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1191     }
1192     return &(m_samples);
1193     }
1194    
1195    
1196     const DataTypes::ValueType*
1197 jfenwick 2721 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1198     {
1199     // we assume that any collapsing has been done before we get here
1200     // since we only have one argument we don't need to think about only
1201     // processing single points.
1202     // we will also know we won't get identity nodes
1203     if (m_readytype!='E')
1204     {
1205     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1206     }
1207     if (m_op==IDENTITY)
1208     {
1209     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1210     }
1211     size_t loffset=0;
1212     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1213    
1214     roffset=m_samplesize*tid;
1215 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1216     unsigned int psize=DataTypes::noValues(getShape());
1217 jfenwick 2721 double* result=&(m_samples[roffset]);
1218     switch (m_op)
1219     {
1220     case MINVAL:
1221     {
1222 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1223     {
1224     FMin op;
1225     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1226     loffset+=psize;
1227     result++;
1228     }
1229 jfenwick 2721 }
1230     break;
1231     case MAXVAL:
1232     {
1233 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1234     {
1235 jfenwick 2721 FMax op;
1236     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1237 jfenwick 2734 loffset+=psize;
1238     result++;
1239     }
1240 jfenwick 2721 }
1241     break;
1242     default:
1243     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1244     }
1245     return &(m_samples);
1246     }
1247    
1248     const DataTypes::ValueType*
1249 jfenwick 2500 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1250     {
1251     // we assume that any collapsing has been done before we get here
1252     // since we only have one argument we don't need to think about only
1253     // processing single points.
1254     if (m_readytype!='E')
1255     {
1256     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1257     }
1258     if (m_op==IDENTITY)
1259     {
1260     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1261     }
1262     size_t subroffset;
1263     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1264     roffset=m_samplesize*tid;
1265     size_t loop=0;
1266     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1267     size_t step=getNoValues();
1268     size_t offset=roffset;
1269     switch (m_op)
1270     {
1271     case SYM:
1272     for (loop=0;loop<numsteps;++loop)
1273     {
1274     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1275     subroffset+=step;
1276     offset+=step;
1277     }
1278     break;
1279     case NSYM:
1280     for (loop=0;loop<numsteps;++loop)
1281     {
1282     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283     subroffset+=step;
1284     offset+=step;
1285     }
1286     break;
1287     default:
1288     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1289     }
1290     return &m_samples;
1291     }
1292    
1293     const DataTypes::ValueType*
1294     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1295     {
1296     // we assume that any collapsing has been done before we get here
1297     // since we only have one argument we don't need to think about only
1298     // processing single points.
1299     if (m_readytype!='E')
1300     {
1301     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1302     }
1303     if (m_op==IDENTITY)
1304     {
1305     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1306     }
1307     size_t subroffset;
1308     size_t offset;
1309     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1310     roffset=m_samplesize*tid;
1311     offset=roffset;
1312     size_t loop=0;
1313     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1314     size_t outstep=getNoValues();
1315     size_t instep=m_left->getNoValues();
1316     switch (m_op)
1317     {
1318     case TRACE:
1319     for (loop=0;loop<numsteps;++loop)
1320     {
1321     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1322     subroffset+=instep;
1323     offset+=outstep;
1324     }
1325     break;
1326     case TRANS:
1327     for (loop=0;loop<numsteps;++loop)
1328     {
1329     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1330     subroffset+=instep;
1331     offset+=outstep;
1332     }
1333     break;
1334     default:
1335     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1336     }
1337     return &m_samples;
1338     }
1339    
1340    
1341     const DataTypes::ValueType*
1342     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1343     {
1344     if (m_readytype!='E')
1345     {
1346     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1347     }
1348     if (m_op==IDENTITY)
1349     {
1350     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1351     }
1352     size_t subroffset;
1353     size_t offset;
1354     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1355     roffset=m_samplesize*tid;
1356     offset=roffset;
1357     size_t loop=0;
1358     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1359     size_t outstep=getNoValues();
1360     size_t instep=m_left->getNoValues();
1361     switch (m_op)
1362     {
1363     case SWAP:
1364     for (loop=0;loop<numsteps;++loop)
1365     {
1366     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1367     subroffset+=instep;
1368     offset+=outstep;
1369     }
1370     break;
1371     default:
1372     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1373     }
1374     return &m_samples;
1375     }
1376    
1377 jfenwick 3035 const DataTypes::ValueType*
1378     DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1379     {
1380     if (m_readytype!='E')
1381     {
1382     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1383     }
1384     if (m_op!=CONDEVAL)
1385     {
1386     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1387     }
1388     size_t subroffset;
1389 jfenwick 2500
1390 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1391     const ValueType* srcres=0;
1392     if ((*maskres)[subroffset]>0)
1393     {
1394     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1395     }
1396     else
1397     {
1398     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1399     }
1400    
1401     // Now we need to copy the result
1402    
1403     roffset=m_samplesize*tid;
1404     for (int i=0;i<m_samplesize;++i)
1405     {
1406     m_samples[roffset+i]=(*srcres)[subroffset+i];
1407     }
1408    
1409     return &m_samples;
1410     }
1411    
1412 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1413     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1414     // If both children are expanded, then we can process them in a single operation (we treat
1415     // the whole sample as one big datapoint.
1416     // If one of the children is not expanded, then we need to treat each point in the sample
1417     // individually.
1418     // There is an additional complication when scalar operations are considered.
1419     // For example, 2+Vector.
1420     // In this case each double within the point is treated individually
1421     const DataTypes::ValueType*
1422     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1423     {
1424     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1425    
1426     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1427     // first work out which of the children are expanded
1428     bool leftExp=(m_left->m_readytype=='E');
1429     bool rightExp=(m_right->m_readytype=='E');
1430     if (!leftExp && !rightExp)
1431     {
1432     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1433     }
1434     bool leftScalar=(m_left->getRank()==0);
1435     bool rightScalar=(m_right->getRank()==0);
1436     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1437     {
1438     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1439     }
1440     size_t leftsize=m_left->getNoValues();
1441     size_t rightsize=m_right->getNoValues();
1442     size_t chunksize=1; // how many doubles will be processed in one go
1443     int leftstep=0; // how far should the left offset advance after each step
1444     int rightstep=0;
1445     int numsteps=0; // total number of steps for the inner loop
1446     int oleftstep=0; // the o variables refer to the outer loop
1447     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1448     int onumsteps=1;
1449    
1450     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1451     bool RES=(rightExp && rightScalar);
1452     bool LS=(!leftExp && leftScalar); // left is a single scalar
1453     bool RS=(!rightExp && rightScalar);
1454     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1455     bool RN=(!rightExp && !rightScalar);
1456     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1457     bool REN=(rightExp && !rightScalar);
1458    
1459     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1460     {
1461     chunksize=m_left->getNumDPPSample()*leftsize;
1462     leftstep=0;
1463     rightstep=0;
1464     numsteps=1;
1465     }
1466     else if (LES || RES)
1467     {
1468     chunksize=1;
1469     if (LES) // left is an expanded scalar
1470     {
1471     if (RS)
1472     {
1473     leftstep=1;
1474     rightstep=0;
1475     numsteps=m_left->getNumDPPSample();
1476     }
1477     else // RN or REN
1478     {
1479     leftstep=0;
1480     oleftstep=1;
1481     rightstep=1;
1482     orightstep=(RN ? -(int)rightsize : 0);
1483     numsteps=rightsize;
1484     onumsteps=m_left->getNumDPPSample();
1485     }
1486     }
1487     else // right is an expanded scalar
1488     {
1489     if (LS)
1490     {
1491     rightstep=1;
1492     leftstep=0;
1493     numsteps=m_right->getNumDPPSample();
1494     }
1495     else
1496     {
1497     rightstep=0;
1498     orightstep=1;
1499     leftstep=1;
1500     oleftstep=(LN ? -(int)leftsize : 0);
1501     numsteps=leftsize;
1502     onumsteps=m_right->getNumDPPSample();
1503     }
1504     }
1505     }
1506     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1507     {
1508     if (LEN) // and Right will be a single value
1509     {
1510     chunksize=rightsize;
1511     leftstep=rightsize;
1512     rightstep=0;
1513     numsteps=m_left->getNumDPPSample();
1514     if (RS)
1515     {
1516     numsteps*=leftsize;
1517     }
1518     }
1519     else // REN
1520     {
1521     chunksize=leftsize;
1522     rightstep=leftsize;
1523     leftstep=0;
1524     numsteps=m_right->getNumDPPSample();
1525     if (LS)
1526     {
1527     numsteps*=rightsize;
1528     }
1529     }
1530     }
1531    
1532     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1533     // Get the values of sub-expressions
1534     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1535     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1536     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1537     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1538     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1539     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1540     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1541     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1542     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1543    
1544     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1545     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1546    
1547    
1548     roffset=m_samplesize*tid;
1549     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1550     switch(m_op)
1551     {
1552     case ADD:
1553     PROC_OP(NO_ARG,plus<double>());
1554     break;
1555     case SUB:
1556     PROC_OP(NO_ARG,minus<double>());
1557     break;
1558     case MUL:
1559     PROC_OP(NO_ARG,multiplies<double>());
1560     break;
1561     case DIV:
1562     PROC_OP(NO_ARG,divides<double>());
1563     break;
1564     case POW:
1565     PROC_OP(double (double,double),::pow);
1566     break;
1567     default:
1568     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1569     }
1570     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1571     return &m_samples;
1572     }
1573    
1574    
1575     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1576     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1577     // unlike the other resolve helpers, we must treat these datapoints separately.
1578     const DataTypes::ValueType*
1579     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1580     {
1581     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1582    
1583     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1584     // first work out which of the children are expanded
1585     bool leftExp=(m_left->m_readytype=='E');
1586     bool rightExp=(m_right->m_readytype=='E');
1587     int steps=getNumDPPSample();
1588     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1589     int rightStep=(rightExp?m_right->getNoValues() : 0);
1590    
1591     int resultStep=getNoValues();
1592     roffset=m_samplesize*tid;
1593     size_t offset=roffset;
1594    
1595     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1596    
1597     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1598    
1599     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1600     cout << getNoValues() << endl;)
1601    
1602    
1603     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1604     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1605     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1606     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1607     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1608     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1609     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1610    
1611     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1612     switch(m_op)
1613     {
1614     case PROD:
1615     for (int i=0;i<steps;++i,resultp+=resultStep)
1616     {
1617     const double *ptr_0 = &((*left)[lroffset]);
1618     const double *ptr_1 = &((*right)[rroffset]);
1619    
1620     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1621     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1622    
1623     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1624    
1625     lroffset+=leftStep;
1626     rroffset+=rightStep;
1627     }
1628     break;
1629     default:
1630     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1631     }
1632     roffset=offset;
1633     return &m_samples;
1634     }
1635    
1636 jfenwick 1898
1637     const DataTypes::ValueType*
1638 jfenwick 2770 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1639 jfenwick 1879 {
1640 jfenwick 2271 #ifdef _OPENMP
1641     int tid=omp_get_thread_num();
1642     #else
1643     int tid=0;
1644     #endif
1645 jfenwick 2777
1646     #ifdef LAZY_STACK_PROF
1647     stackstart[tid]=&tid;
1648     stackend[tid]=&tid;
1649     const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1650     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1651     #pragma omp critical
1652     if (d>maxstackuse)
1653     {
1654     cout << "Max resolve Stack use " << d << endl;
1655     maxstackuse=d;
1656     }
1657     return r;
1658     #else
1659 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1660 jfenwick 2777 #endif
1661 jfenwick 2271 }
1662    
1663    
1664 jfenwick 2497 // This needs to do the work of the identity constructor
1665 jfenwick 2177 void
1666     DataLazy::resolveToIdentity()
1667     {
1668     if (m_op==IDENTITY)
1669     return;
1670 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1671 jfenwick 2177 makeIdentity(p);
1672     }
1673 jfenwick 1889
1674 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1675     {
1676     m_op=IDENTITY;
1677     m_axis_offset=0;
1678     m_transpose=0;
1679     m_SL=m_SM=m_SR=0;
1680     m_children=m_height=0;
1681     m_id=p;
1682     if(p->isConstant()) {m_readytype='C';}
1683     else if(p->isExpanded()) {m_readytype='E';}
1684     else if (p->isTagged()) {m_readytype='T';}
1685     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1686     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1687     m_left.reset();
1688     m_right.reset();
1689     }
1690    
1691 jfenwick 2497
1692     DataReady_ptr
1693     DataLazy::resolve()
1694     {
1695     resolveToIdentity();
1696     return m_id;
1697     }
1698    
1699 jfenwick 2799
1700     /* This is really a static method but I think that caused problems in windows */
1701     void
1702     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1703     {
1704     if (dats.empty())
1705     {
1706     return;
1707     }
1708     vector<DataLazy*> work;
1709     FunctionSpace fs=dats[0]->getFunctionSpace();
1710     bool match=true;
1711 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1712 jfenwick 2799 {
1713     if (dats[i]->m_readytype!='E')
1714     {
1715     dats[i]->collapse();
1716     }
1717     if (dats[i]->m_op!=IDENTITY)
1718     {
1719     work.push_back(dats[i]);
1720     if (fs!=dats[i]->getFunctionSpace())
1721     {
1722     match=false;
1723     }
1724     }
1725     }
1726     if (work.empty())
1727     {
1728     return; // no work to do
1729     }
1730     if (match) // all functionspaces match. Yes I realise this is overly strict
1731     { // it is possible that dats[0] is one of the objects which we discarded and
1732     // all the other functionspaces match.
1733     vector<DataExpanded*> dep;
1734     vector<ValueType*> vecs;
1735     for (int i=0;i<work.size();++i)
1736     {
1737     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1738     vecs.push_back(&(dep[i]->getVectorRW()));
1739     }
1740     int totalsamples=work[0]->getNumSamples();
1741     const ValueType* res=0; // Storage for answer
1742     int sample;
1743     #pragma omp parallel private(sample, res)
1744     {
1745     size_t roffset=0;
1746     #pragma omp for schedule(static)
1747     for (sample=0;sample<totalsamples;++sample)
1748     {
1749     roffset=0;
1750     int j;
1751 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1752 jfenwick 2799 {
1753     #ifdef _OPENMP
1754     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1755     #else
1756     res=work[j]->resolveNodeSample(0,sample,roffset);
1757     #endif
1758     DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1759     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1760     }
1761     }
1762     }
1763     // Now we need to load the new results as identity ops into the lazy nodes
1764 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1765 jfenwick 2799 {
1766     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1767     }
1768     }
1769     else // functionspaces do not match
1770     {
1771     for (int i=0;i<work.size();++i)
1772     {
1773     work[i]->resolveToIdentity();
1774     }
1775     }
1776     }
1777    
1778    
1779    
1780 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1781     DataReady_ptr
1782     DataLazy::resolveNodeWorker()
1783     {
1784     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1785     {
1786     collapse();
1787     }
1788     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1789     {
1790     return m_id;
1791     }
1792     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1793     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1794     ValueType& resvec=result->getVectorRW();
1795     DataReady_ptr resptr=DataReady_ptr(result);
1796    
1797     int sample;
1798     int totalsamples=getNumSamples();
1799     const ValueType* res=0; // Storage for answer
1800     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1801 jfenwick 2777 #pragma omp parallel private(sample,res)
1802 jfenwick 2500 {
1803 jfenwick 2777 size_t roffset=0;
1804     #ifdef LAZY_STACK_PROF
1805     stackstart[omp_get_thread_num()]=&roffset;
1806     stackend[omp_get_thread_num()]=&roffset;
1807     #endif
1808     #pragma omp for schedule(static)
1809     for (sample=0;sample<totalsamples;++sample)
1810     {
1811     roffset=0;
1812 jfenwick 2500 #ifdef _OPENMP
1813 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1814 jfenwick 2500 #else
1815 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1816 jfenwick 2500 #endif
1817     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1818     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1819 jfenwick 2777 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1820     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1821     }
1822 jfenwick 2500 }
1823 jfenwick 2777 #ifdef LAZY_STACK_PROF
1824     for (int i=0;i<getNumberOfThreads();++i)
1825     {
1826     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1827     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1828     if (r>maxstackuse)
1829     {
1830     maxstackuse=r;
1831     }
1832     }
1833     cout << "Max resolve Stack use=" << maxstackuse << endl;
1834     #endif
1835 jfenwick 2500 return resptr;
1836     }
1837    
1838 jfenwick 1865 std::string
1839     DataLazy::toString() const
1840     {
1841 jfenwick 1886 ostringstream oss;
1842 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1843 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1844 jfenwick 2737 {
1845 jfenwick 2795 case 1: // tree format
1846 jfenwick 2737 oss << endl;
1847 jfenwick 2795 intoTreeString(oss,"");
1848     break;
1849     case 2: // just the depth
1850     break;
1851     default:
1852     intoString(oss);
1853     break;
1854 jfenwick 2737 }
1855 jfenwick 1886 return oss.str();
1856 jfenwick 1865 }
1857    
1858 jfenwick 1899
1859 jfenwick 1886 void
1860     DataLazy::intoString(ostringstream& oss) const
1861     {
1862 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1863 jfenwick 1886 switch (getOpgroup(m_op))
1864     {
1865 jfenwick 1889 case G_IDENTITY:
1866     if (m_id->isExpanded())
1867     {
1868     oss << "E";
1869     }
1870     else if (m_id->isTagged())
1871     {
1872     oss << "T";
1873     }
1874     else if (m_id->isConstant())
1875     {
1876     oss << "C";
1877     }
1878     else
1879     {
1880     oss << "?";
1881     }
1882 jfenwick 1886 oss << '@' << m_id.get();
1883     break;
1884     case G_BINARY:
1885     oss << '(';
1886     m_left->intoString(oss);
1887     oss << ' ' << opToString(m_op) << ' ';
1888     m_right->intoString(oss);
1889     oss << ')';
1890     break;
1891     case G_UNARY:
1892 jfenwick 2147 case G_UNARY_P:
1893 jfenwick 2037 case G_NP1OUT:
1894 jfenwick 2084 case G_NP1OUT_P:
1895 jfenwick 2721 case G_REDUCTION:
1896 jfenwick 1886 oss << opToString(m_op) << '(';
1897     m_left->intoString(oss);
1898     oss << ')';
1899     break;
1900 jfenwick 2066 case G_TENSORPROD:
1901     oss << opToString(m_op) << '(';
1902     m_left->intoString(oss);
1903     oss << ", ";
1904     m_right->intoString(oss);
1905     oss << ')';
1906     break;
1907 jfenwick 2496 case G_NP1OUT_2P:
1908     oss << opToString(m_op) << '(';
1909     m_left->intoString(oss);
1910     oss << ", " << m_axis_offset << ", " << m_transpose;
1911     oss << ')';
1912     break;
1913 jfenwick 3035 case G_CONDEVAL:
1914     oss << opToString(m_op)<< '(' ;
1915     m_mask->intoString(oss);
1916     oss << " ? ";
1917     m_left->intoString(oss);
1918     oss << " : ";
1919     m_right->intoString(oss);
1920     oss << ')';
1921     break;
1922 jfenwick 1886 default:
1923     oss << "UNKNOWN";
1924     }
1925     }
1926    
1927 jfenwick 2737
1928     void
1929     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1930     {
1931     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1932     switch (getOpgroup(m_op))
1933     {
1934     case G_IDENTITY:
1935     if (m_id->isExpanded())
1936     {
1937     oss << "E";
1938     }
1939     else if (m_id->isTagged())
1940     {
1941     oss << "T";
1942     }
1943     else if (m_id->isConstant())
1944     {
1945     oss << "C";
1946     }
1947     else
1948     {
1949     oss << "?";
1950     }
1951     oss << '@' << m_id.get() << endl;
1952     break;
1953     case G_BINARY:
1954     oss << opToString(m_op) << endl;
1955     indent+='.';
1956     m_left->intoTreeString(oss, indent);
1957     m_right->intoTreeString(oss, indent);
1958     break;
1959     case G_UNARY:
1960     case G_UNARY_P:
1961     case G_NP1OUT:
1962     case G_NP1OUT_P:
1963     case G_REDUCTION:
1964     oss << opToString(m_op) << endl;
1965     indent+='.';
1966     m_left->intoTreeString(oss, indent);
1967     break;
1968     case G_TENSORPROD:
1969     oss << opToString(m_op) << endl;
1970     indent+='.';
1971     m_left->intoTreeString(oss, indent);
1972     m_right->intoTreeString(oss, indent);
1973     break;
1974     case G_NP1OUT_2P:
1975     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1976     indent+='.';
1977     m_left->intoTreeString(oss, indent);
1978     break;
1979     default:
1980     oss << "UNKNOWN";
1981     }
1982     }
1983    
1984    
1985 jfenwick 1865 DataAbstract*
1986     DataLazy::deepCopy()
1987     {
1988 jfenwick 1935 switch (getOpgroup(m_op))
1989 jfenwick 1865 {
1990 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1991 jfenwick 2721 case G_UNARY:
1992     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1993     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1994 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1995 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1996     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1997 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1998     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999 jfenwick 1935 default:
2000     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2001 jfenwick 1865 }
2002     }
2003    
2004    
2005 jfenwick 2721
2006 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2007     // Instances of DataReady can look at the size of their vectors.
2008     // For lazy though, it could be the size the data would be if it were resolved;
2009     // or it could be some function of the lengths of the DataReady instances which
2010     // form part of the expression.
2011     // Rather than have people making assumptions, I have disabled the method.
2012 jfenwick 1865 DataTypes::ValueType::size_type
2013     DataLazy::getLength() const
2014     {
2015 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2016 jfenwick 1865 }
2017    
2018    
2019     DataAbstract*
2020     DataLazy::getSlice(const DataTypes::RegionType& region) const
2021     {
2022 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2023 jfenwick 1865 }
2024    
2025 jfenwick 1935
2026     // To do this we need to rely on our child nodes
2027 jfenwick 1865 DataTypes::ValueType::size_type
2028     DataLazy::getPointOffset(int sampleNo,
2029 jfenwick 1935 int dataPointNo)
2030     {
2031     if (m_op==IDENTITY)
2032     {
2033     return m_id->getPointOffset(sampleNo,dataPointNo);
2034     }
2035     if (m_readytype!='E')
2036     {
2037     collapse();
2038     return m_id->getPointOffset(sampleNo,dataPointNo);
2039     }
2040     // at this point we do not have an identity node and the expression will be Expanded
2041     // so we only need to know which child to ask
2042     if (m_left->m_readytype=='E')
2043     {
2044     return m_left->getPointOffset(sampleNo,dataPointNo);
2045     }
2046     else
2047     {
2048     return m_right->getPointOffset(sampleNo,dataPointNo);
2049     }
2050     }
2051    
2052     // To do this we need to rely on our child nodes
2053     DataTypes::ValueType::size_type
2054     DataLazy::getPointOffset(int sampleNo,
2055 jfenwick 1865 int dataPointNo) const
2056     {
2057 jfenwick 1935 if (m_op==IDENTITY)
2058     {
2059     return m_id->getPointOffset(sampleNo,dataPointNo);
2060     }
2061     if (m_readytype=='E')
2062     {
2063     // at this point we do not have an identity node and the expression will be Expanded
2064     // so we only need to know which child to ask
2065     if (m_left->m_readytype=='E')
2066     {
2067     return m_left->getPointOffset(sampleNo,dataPointNo);
2068     }
2069     else
2070     {
2071     return m_right->getPointOffset(sampleNo,dataPointNo);
2072     }
2073     }
2074     if (m_readytype=='C')
2075     {
2076     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2077     }
2078     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2079 jfenwick 1865 }
2080    
2081 jfenwick 2271
2082     // I have decided to let Data:: handle this issue.
2083 jfenwick 1901 void
2084     DataLazy::setToZero()
2085     {
2086 jfenwick 2271 // DataTypes::ValueType v(getNoValues(),0);
2087     // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2088     // m_op=IDENTITY;
2089     // m_right.reset();
2090     // m_left.reset();
2091     // m_readytype='C';
2092     // m_buffsRequired=1;
2093    
2094 jfenwick 2514 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
2095 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2096 jfenwick 1901 }
2097    
2098 jfenwick 2271 bool
2099     DataLazy::actsExpanded() const
2100     {
2101     return (m_readytype=='E');
2102     }
2103    
2104 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26