/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4507 - (hide annotations)
Wed Jul 24 02:50:22 2013 UTC (5 years, 8 months ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 60721 byte(s)
It begins
1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 jfenwick 1865
16    
17     #include "DataLazy.h"
18 caltinay 3317 #include "esysUtils/Esys_MPI.h"
19 jfenwick 1885 #ifdef _OPENMP
20     #include <omp.h>
21     #endif
22 jfenwick 1865 #include "FunctionSpace.h"
23     #include "DataTypes.h"
24 jfenwick 1868 #include "Data.h"
25 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
26 jfenwick 1950 #include "Utils.h"
27 jfenwick 1865
28 jfenwick 2472 #include "EscriptParams.h"
29    
30 caltinay 3317 #ifdef USE_NETCDF
31     #include <netcdfcpp.h>
32     #endif
33    
34 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
35    
36 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
37 jfenwick 2092 #define LAZYDEBUG(X)
38 jfenwick 2157 namespace
39     {
40     bool privdebug=false;
41 jfenwick 2092
42 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
43     #define DISABLEDEBUG privdebug=false;
44     }
45    
46 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47 jfenwick 2177
48 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49 jfenwick 2472
50 jfenwick 2501
51 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53 jfenwick 1899 /*
54     How does DataLazy work?
55     ~~~~~~~~~~~~~~~~~~~~~~~
56    
57     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
58     denoted left and right.
59    
60     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
61     This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64 caltinay 4286 Note that IDENTITY is not considered a unary operation.
65 jfenwick 1899
66     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67     It must however form a DAG (directed acyclic graph).
68     I will refer to individual DataLazy objects with the structure as nodes.
69    
70     Each node also stores:
71     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72     evaluated.
73 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74 jfenwick 1899 evaluate the expression.
75     - m_samplesize ~ the number of doubles stored in a sample.
76    
77     When a new node is created, the above values are computed based on the values in the child nodes.
78     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
79    
80     The resolve method, which produces a DataReady from a DataLazy, does the following:
81     1) Create a DataReady to hold the new result.
82     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
83     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
84    
85     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
86    
87     resolveSample returns a Vector* and an offset within that vector where the result is stored.
88     Normally, this would be v, but for identity nodes their internal vector is returned instead.
89    
90     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
91    
92     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94 jfenwick 2037
95 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96 jfenwick 2037 1) Add to the ES_optype.
97     2) determine what opgroup your operation belongs to (X)
98     3) add a string for the op to the end of ES_opstrings
99     4) increase ES_opcount
100     5) add an entry (X) to opgroups
101     6) add an entry to the switch in collapseToReady
102     7) add an entry to resolveX
103 jfenwick 1899 */
104    
105    
106 jfenwick 1865 using namespace std;
107 jfenwick 1868 using namespace boost;
108 jfenwick 1865
109     namespace escript
110     {
111    
112     namespace
113     {
114    
115 jfenwick 2777
116     // enabling this will print out when ever the maximum stacksize used by resolve increases
117     // it assumes _OPENMP is also in use
118     //#define LAZY_STACK_PROF
119    
120    
121    
122     #ifndef _OPENMP
123     #ifdef LAZY_STACK_PROF
124     #undef LAZY_STACK_PROF
125     #endif
126     #endif
127    
128    
129     #ifdef LAZY_STACK_PROF
130     std::vector<void*> stackstart(getNumberOfThreads());
131     std::vector<void*> stackend(getNumberOfThreads());
132     size_t maxstackuse=0;
133     #endif
134    
135 jfenwick 1886 enum ES_opgroup
136     {
137     G_UNKNOWN,
138     G_IDENTITY,
139 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
140 jfenwick 2037 G_UNARY, // pointwise operations with one argument
141 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
142 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
143 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
144 jfenwick 2496 G_TENSORPROD, // general tensor product
145 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
146 jfenwick 3035 G_REDUCTION, // non-pointwise unary op with a scalar output
147     G_CONDEVAL
148 jfenwick 1886 };
149    
150    
151    
152    
153 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154     "sin","cos","tan",
155 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
156     "asinh","acosh","atanh",
157 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
158 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159 jfenwick 2066 "symmetric","nonsymmetric",
160 jfenwick 2084 "prod",
161 jfenwick 2496 "transpose", "trace",
162 jfenwick 2721 "swapaxes",
163 jfenwick 3035 "minval", "maxval",
164     "condEval"};
165     int ES_opcount=44;
166 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167     G_UNARY,G_UNARY,G_UNARY, //10
168     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
169     G_UNARY,G_UNARY,G_UNARY, // 20
170 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
171     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
172 jfenwick 2066 G_NP1OUT,G_NP1OUT,
173 jfenwick 2084 G_TENSORPROD,
174 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
175 jfenwick 2721 G_NP1OUT_2P,
176 jfenwick 3035 G_REDUCTION, G_REDUCTION,
177     G_CONDEVAL};
178 jfenwick 1886 inline
179     ES_opgroup
180     getOpgroup(ES_optype op)
181     {
182     return opgroups[op];
183     }
184    
185 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
186     FunctionSpace
187     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188     {
189     // perhaps this should call interpolate and throw or something?
190     // maybe we need an interpolate node -
191     // that way, if interpolate is required in any other op we can just throw a
192     // programming error exception.
193 jfenwick 1879
194 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
195     FunctionSpace r=right->getFunctionSpace();
196     if (l!=r)
197     {
198 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199     if (res==1)
200 jfenwick 1943 {
201     return l;
202     }
203 jfenwick 4264 if (res==-1)
204 jfenwick 1943 {
205     return r;
206     }
207     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208     }
209     return l;
210 jfenwick 1865 }
211    
212     // return the shape of the result of "left op right"
213 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
214 jfenwick 1865 DataTypes::ShapeType
215     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216     {
217 jfenwick 1879 if (left->getShape()!=right->getShape())
218     {
219 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220 jfenwick 1908 {
221     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222     }
223 jfenwick 2721
224 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
225     {
226     return right->getShape();
227     }
228     if (right->getRank()==0)
229     {
230     return left->getShape();
231     }
232     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233 jfenwick 1879 }
234     return left->getShape();
235 jfenwick 1865 }
236    
237 jfenwick 2084 // return the shape for "op left"
238    
239     DataTypes::ShapeType
240 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241 jfenwick 2084 {
242     switch(op)
243     {
244     case TRANS:
245 jfenwick 2199 { // for the scoping of variables
246     const DataTypes::ShapeType& s=left->getShape();
247     DataTypes::ShapeType sh;
248     int rank=left->getRank();
249     if (axis_offset<0 || axis_offset>rank)
250     {
251 caltinay 2779 stringstream e;
252     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253     throw DataException(e.str());
254     }
255     for (int i=0; i<rank; i++)
256 jfenwick 2199 {
257     int index = (axis_offset+i)%rank;
258 caltinay 2779 sh.push_back(s[index]); // Append to new shape
259     }
260 jfenwick 2199 return sh;
261     }
262 jfenwick 2084 break;
263     case TRACE:
264 jfenwick 2199 {
265     int rank=left->getRank();
266     if (rank<2)
267     {
268     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269     }
270     if ((axis_offset>rank-2) || (axis_offset<0))
271     {
272     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273     }
274     if (rank==2)
275     {
276     return DataTypes::scalarShape;
277     }
278     else if (rank==3)
279     {
280     DataTypes::ShapeType sh;
281     if (axis_offset==0)
282     {
283     sh.push_back(left->getShape()[2]);
284     }
285     else // offset==1
286     {
287     sh.push_back(left->getShape()[0]);
288     }
289     return sh;
290     }
291     else if (rank==4)
292     {
293     DataTypes::ShapeType sh;
294     const DataTypes::ShapeType& s=left->getShape();
295     if (axis_offset==0)
296     {
297     sh.push_back(s[2]);
298     sh.push_back(s[3]);
299     }
300     else if (axis_offset==1)
301     {
302     sh.push_back(s[0]);
303     sh.push_back(s[3]);
304     }
305     else // offset==2
306     {
307     sh.push_back(s[0]);
308     sh.push_back(s[1]);
309     }
310     return sh;
311     }
312     else // unknown rank
313     {
314     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315     }
316     }
317 jfenwick 2084 break;
318     default:
319     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320     }
321     }
322    
323 jfenwick 2496 DataTypes::ShapeType
324     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
325     {
326     // This code taken from the Data.cpp swapaxes() method
327     // Some of the checks are probably redundant here
328     int axis0_tmp,axis1_tmp;
329     const DataTypes::ShapeType& s=left->getShape();
330     DataTypes::ShapeType out_shape;
331     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
332     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
333     int rank=left->getRank();
334     if (rank<2) {
335     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336     }
337     if (axis0<0 || axis0>rank-1) {
338 caltinay 2779 stringstream e;
339     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340     throw DataException(e.str());
341 jfenwick 2496 }
342     if (axis1<0 || axis1>rank-1) {
343 caltinay 2782 stringstream e;
344 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345     throw DataException(e.str());
346 jfenwick 2496 }
347     if (axis0 == axis1) {
348     throw DataException("Error - Data::swapaxes: axis indices must be different.");
349     }
350     if (axis0 > axis1) {
351     axis0_tmp=axis1;
352     axis1_tmp=axis0;
353     } else {
354     axis0_tmp=axis0;
355     axis1_tmp=axis1;
356     }
357     for (int i=0; i<rank; i++) {
358     if (i == axis0_tmp) {
359     out_shape.push_back(s[axis1_tmp]);
360     } else if (i == axis1_tmp) {
361     out_shape.push_back(s[axis0_tmp]);
362     } else {
363     out_shape.push_back(s[i]);
364     }
365     }
366     return out_shape;
367     }
368    
369    
370 jfenwick 2066 // determine the output shape for the general tensor product operation
371     // the additional parameters return information required later for the product
372     // the majority of this code is copy pasted from C_General_Tensor_Product
373     DataTypes::ShapeType
374     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375 jfenwick 1865 {
376 jfenwick 2066
377     // Get rank and shape of inputs
378     int rank0 = left->getRank();
379     int rank1 = right->getRank();
380     const DataTypes::ShapeType& shape0 = left->getShape();
381     const DataTypes::ShapeType& shape1 = right->getShape();
382    
383     // Prepare for the loops of the product and verify compatibility of shapes
384     int start0=0, start1=0;
385     if (transpose == 0) {}
386     else if (transpose == 1) { start0 = axis_offset; }
387     else if (transpose == 2) { start1 = rank1-axis_offset; }
388     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390 jfenwick 2085 if (rank0<axis_offset)
391     {
392     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393     }
394 jfenwick 2066
395     // Adjust the shapes for transpose
396     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
397     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
398     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401     // Prepare for the loops of the product
402     SL=1, SM=1, SR=1;
403     for (int i=0; i<rank0-axis_offset; i++) {
404     SL *= tmpShape0[i];
405     }
406     for (int i=rank0-axis_offset; i<rank0; i++) {
407     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409     }
410     SM *= tmpShape0[i];
411     }
412     for (int i=axis_offset; i<rank1; i++) {
413     SR *= tmpShape1[i];
414     }
415    
416     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
418     { // block to limit the scope of out_index
419     int out_index=0;
420     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
422     }
423 jfenwick 2086
424     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
425     {
426     ostringstream os;
427     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
428     throw DataException(os.str());
429     }
430    
431 jfenwick 2066 return shape2;
432 jfenwick 1865 }
433    
434     } // end anonymous namespace
435    
436    
437 jfenwick 1899
438     // Return a string representing the operation
439 jfenwick 1865 const std::string&
440     opToString(ES_optype op)
441     {
442     if (op<0 || op>=ES_opcount)
443     {
444     op=UNKNOWNOP;
445     }
446     return ES_opstrings[op];
447     }
448    
449 jfenwick 2500 void DataLazy::LazyNodeSetup()
450     {
451     #ifdef _OPENMP
452     int numthreads=omp_get_max_threads();
453     m_samples.resize(numthreads*m_samplesize);
454     m_sampleids=new int[numthreads];
455     for (int i=0;i<numthreads;++i)
456     {
457     m_sampleids[i]=-1;
458     }
459     #else
460     m_samples.resize(m_samplesize);
461     m_sampleids=new int[1];
462     m_sampleids[0]=-1;
463     #endif // _OPENMP
464     }
465 jfenwick 1865
466 jfenwick 2500
467     // Creates an identity node
468 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
469 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
470 jfenwick 2500 ,m_sampleids(0),
471     m_samples(1)
472 jfenwick 1865 {
473 jfenwick 1879 if (p->isLazy())
474     {
475     // I don't want identity of Lazy.
476     // Question: Why would that be so bad?
477     // Answer: We assume that the child of ID is something we can call getVector on
478     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
479     }
480     else
481     {
482 jfenwick 2271 p->makeLazyShared();
483 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484     makeIdentity(dr);
485 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486 jfenwick 1879 }
487 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
488 jfenwick 1865 }
489    
490 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492 jfenwick 2066 m_op(op),
493     m_axis_offset(0),
494     m_transpose(0),
495     m_SL(0), m_SM(0), m_SR(0)
496 jfenwick 1886 {
497 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498 jfenwick 1886 {
499     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }
501 jfenwick 2066
502 jfenwick 1886 DataLazy_ptr lleft;
503     if (!left->isLazy())
504     {
505     lleft=DataLazy_ptr(new DataLazy(left));
506     }
507     else
508     {
509     lleft=dynamic_pointer_cast<DataLazy>(left);
510     }
511 jfenwick 1889 m_readytype=lleft->m_readytype;
512 jfenwick 1886 m_left=lleft;
513     m_samplesize=getNumDPPSample()*getNoValues();
514 jfenwick 2177 m_children=m_left->m_children+1;
515     m_height=m_left->m_height+1;
516 jfenwick 2500 LazyNodeSetup();
517 jfenwick 2177 SIZELIMIT
518 jfenwick 1886 }
519    
520    
521 jfenwick 1943 // In this constructor we need to consider interpolation
522 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523     : parent(resultFS(left,right,op), resultShape(left,right,op)),
524 jfenwick 2066 m_op(op),
525     m_SL(0), m_SM(0), m_SR(0)
526 jfenwick 1879 {
527 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
529 jfenwick 1886 {
530 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531 jfenwick 1886 }
532 jfenwick 1943
533     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
534     {
535     FunctionSpace fs=getFunctionSpace();
536     Data ltemp(left);
537     Data tmp(ltemp,fs);
538     left=tmp.borrowDataPtr();
539     }
540 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
541 jfenwick 1943 {
542     Data tmp(Data(right),getFunctionSpace());
543     right=tmp.borrowDataPtr();
544 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545 jfenwick 1943 }
546     left->operandCheck(*right);
547    
548 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
549 jfenwick 1879 {
550     m_left=dynamic_pointer_cast<DataLazy>(left);
551 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552 jfenwick 1879 }
553     else
554     {
555     m_left=DataLazy_ptr(new DataLazy(left));
556 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557 jfenwick 1879 }
558     if (right->isLazy())
559     {
560     m_right=dynamic_pointer_cast<DataLazy>(right);
561 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562 jfenwick 1879 }
563     else
564     {
565     m_right=DataLazy_ptr(new DataLazy(right));
566 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567 jfenwick 1879 }
568 jfenwick 1889 char lt=m_left->m_readytype;
569     char rt=m_right->m_readytype;
570     if (lt=='E' || rt=='E')
571     {
572     m_readytype='E';
573     }
574     else if (lt=='T' || rt=='T')
575     {
576     m_readytype='T';
577     }
578     else
579     {
580     m_readytype='C';
581     }
582 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
583 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
584     m_height=max(m_left->m_height,m_right->m_height)+1;
585 jfenwick 2500 LazyNodeSetup();
586 jfenwick 2177 SIZELIMIT
587 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588 jfenwick 1879 }
589    
590 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592     m_op(op),
593     m_axis_offset(axis_offset),
594     m_transpose(transpose)
595     {
596     if ((getOpgroup(op)!=G_TENSORPROD))
597     {
598     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599     }
600     if ((transpose>2) || (transpose<0))
601     {
602     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603     }
604     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
605     {
606     FunctionSpace fs=getFunctionSpace();
607     Data ltemp(left);
608     Data tmp(ltemp,fs);
609     left=tmp.borrowDataPtr();
610     }
611     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
612     {
613     Data tmp(Data(right),getFunctionSpace());
614     right=tmp.borrowDataPtr();
615     }
616 jfenwick 2195 // left->operandCheck(*right);
617 jfenwick 1879
618 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {
620     m_left=dynamic_pointer_cast<DataLazy>(left);
621     }
622     else
623     {
624     m_left=DataLazy_ptr(new DataLazy(left));
625     }
626     if (right->isLazy())
627     {
628     m_right=dynamic_pointer_cast<DataLazy>(right);
629     }
630     else
631     {
632     m_right=DataLazy_ptr(new DataLazy(right));
633     }
634     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')
637     {
638     m_readytype='E';
639     }
640     else if (lt=='T' || rt=='T')
641     {
642     m_readytype='T';
643     }
644     else
645     {
646     m_readytype='C';
647     }
648     m_samplesize=getNumDPPSample()*getNoValues();
649 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
650     m_height=max(m_left->m_height,m_right->m_height)+1;
651 jfenwick 2500 LazyNodeSetup();
652 jfenwick 2177 SIZELIMIT
653 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654 jfenwick 2066 }
655    
656    
657 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659 jfenwick 2084 m_op(op),
660     m_axis_offset(axis_offset),
661 jfenwick 2147 m_transpose(0),
662     m_tol(0)
663 jfenwick 2084 {
664     if ((getOpgroup(op)!=G_NP1OUT_P))
665     {
666     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667     }
668     DataLazy_ptr lleft;
669     if (!left->isLazy())
670     {
671     lleft=DataLazy_ptr(new DataLazy(left));
672     }
673     else
674     {
675     lleft=dynamic_pointer_cast<DataLazy>(left);
676     }
677     m_readytype=lleft->m_readytype;
678     m_left=lleft;
679     m_samplesize=getNumDPPSample()*getNoValues();
680 jfenwick 2177 m_children=m_left->m_children+1;
681     m_height=m_left->m_height+1;
682 jfenwick 2500 LazyNodeSetup();
683 jfenwick 2177 SIZELIMIT
684 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685 jfenwick 2084 }
686    
687 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688     : parent(left->getFunctionSpace(), left->getShape()),
689     m_op(op),
690     m_axis_offset(0),
691     m_transpose(0),
692     m_tol(tol)
693     {
694     if ((getOpgroup(op)!=G_UNARY_P))
695     {
696     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697     }
698     DataLazy_ptr lleft;
699     if (!left->isLazy())
700     {
701     lleft=DataLazy_ptr(new DataLazy(left));
702     }
703     else
704     {
705     lleft=dynamic_pointer_cast<DataLazy>(left);
706     }
707     m_readytype=lleft->m_readytype;
708     m_left=lleft;
709     m_samplesize=getNumDPPSample()*getNoValues();
710 jfenwick 2177 m_children=m_left->m_children+1;
711     m_height=m_left->m_height+1;
712 jfenwick 2500 LazyNodeSetup();
713 jfenwick 2177 SIZELIMIT
714 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715     }
716 jfenwick 2084
717 jfenwick 2496
718     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720     m_op(op),
721     m_axis_offset(axis0),
722     m_transpose(axis1),
723     m_tol(0)
724     {
725     if ((getOpgroup(op)!=G_NP1OUT_2P))
726     {
727     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728     }
729     DataLazy_ptr lleft;
730     if (!left->isLazy())
731     {
732     lleft=DataLazy_ptr(new DataLazy(left));
733     }
734     else
735     {
736     lleft=dynamic_pointer_cast<DataLazy>(left);
737     }
738     m_readytype=lleft->m_readytype;
739     m_left=lleft;
740     m_samplesize=getNumDPPSample()*getNoValues();
741     m_children=m_left->m_children+1;
742     m_height=m_left->m_height+1;
743 jfenwick 2500 LazyNodeSetup();
744 jfenwick 2496 SIZELIMIT
745     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746     }
747    
748 jfenwick 3035
749     namespace
750     {
751    
752     inline int max3(int a, int b, int c)
753     {
754     int t=(a>b?a:b);
755     return (t>c?t:c);
756    
757     }
758     }
759    
760     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761     : parent(left->getFunctionSpace(), left->getShape()),
762     m_op(CONDEVAL),
763     m_axis_offset(0),
764     m_transpose(0),
765     m_tol(0)
766     {
767    
768     DataLazy_ptr lmask;
769     DataLazy_ptr lleft;
770     DataLazy_ptr lright;
771     if (!mask->isLazy())
772     {
773     lmask=DataLazy_ptr(new DataLazy(mask));
774     }
775     else
776     {
777     lmask=dynamic_pointer_cast<DataLazy>(mask);
778     }
779     if (!left->isLazy())
780     {
781     lleft=DataLazy_ptr(new DataLazy(left));
782     }
783     else
784     {
785     lleft=dynamic_pointer_cast<DataLazy>(left);
786     }
787     if (!right->isLazy())
788     {
789     lright=DataLazy_ptr(new DataLazy(right));
790     }
791     else
792     {
793     lright=dynamic_pointer_cast<DataLazy>(right);
794     }
795     m_readytype=lmask->m_readytype;
796     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797     {
798     throw DataException("Programmer Error - condEval arguments must have the same readytype");
799     }
800     m_left=lleft;
801     m_right=lright;
802     m_mask=lmask;
803     m_samplesize=getNumDPPSample()*getNoValues();
804     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806     LazyNodeSetup();
807     SIZELIMIT
808     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809     }
810    
811    
812    
813 jfenwick 1865 DataLazy::~DataLazy()
814     {
815 jfenwick 2500 delete[] m_sampleids;
816 jfenwick 1865 }
817    
818 jfenwick 1879
819 jfenwick 1899 /*
820     \brief Evaluates the expression using methods on Data.
821     This does the work for the collapse method.
822     For reasons of efficiency do not call this method on DataExpanded nodes.
823     */
824 jfenwick 1889 DataReady_ptr
825     DataLazy::collapseToReady()
826     {
827     if (m_readytype=='E')
828     { // this is more an efficiency concern than anything else
829     throw DataException("Programmer Error - do not use collapse on Expanded data.");
830     }
831     if (m_op==IDENTITY)
832     {
833     return m_id;
834     }
835     DataReady_ptr pleft=m_left->collapseToReady();
836     Data left(pleft);
837     Data right;
838 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
839 jfenwick 1889 {
840     right=Data(m_right->collapseToReady());
841     }
842     Data result;
843     switch(m_op)
844     {
845     case ADD:
846     result=left+right;
847     break;
848     case SUB:
849     result=left-right;
850     break;
851     case MUL:
852     result=left*right;
853     break;
854     case DIV:
855     result=left/right;
856     break;
857     case SIN:
858     result=left.sin();
859     break;
860     case COS:
861     result=left.cos();
862     break;
863     case TAN:
864     result=left.tan();
865     break;
866     case ASIN:
867     result=left.asin();
868     break;
869     case ACOS:
870     result=left.acos();
871     break;
872     case ATAN:
873     result=left.atan();
874     break;
875     case SINH:
876     result=left.sinh();
877     break;
878     case COSH:
879     result=left.cosh();
880     break;
881     case TANH:
882     result=left.tanh();
883     break;
884     case ERF:
885     result=left.erf();
886     break;
887     case ASINH:
888     result=left.asinh();
889     break;
890     case ACOSH:
891     result=left.acosh();
892     break;
893     case ATANH:
894     result=left.atanh();
895     break;
896     case LOG10:
897     result=left.log10();
898     break;
899     case LOG:
900     result=left.log();
901     break;
902     case SIGN:
903     result=left.sign();
904     break;
905     case ABS:
906     result=left.abs();
907     break;
908     case NEG:
909     result=left.neg();
910     break;
911     case POS:
912     // it doesn't mean anything for delayed.
913     // it will just trigger a deep copy of the lazy object
914     throw DataException("Programmer error - POS not supported for lazy data.");
915     break;
916     case EXP:
917     result=left.exp();
918     break;
919     case SQRT:
920     result=left.sqrt();
921     break;
922     case RECIP:
923     result=left.oneOver();
924     break;
925     case GZ:
926     result=left.wherePositive();
927     break;
928     case LZ:
929     result=left.whereNegative();
930     break;
931     case GEZ:
932     result=left.whereNonNegative();
933     break;
934     case LEZ:
935     result=left.whereNonPositive();
936     break;
937 jfenwick 2147 case NEZ:
938     result=left.whereNonZero(m_tol);
939     break;
940     case EZ:
941     result=left.whereZero(m_tol);
942     break;
943 jfenwick 2037 case SYM:
944     result=left.symmetric();
945     break;
946     case NSYM:
947     result=left.nonsymmetric();
948     break;
949 jfenwick 2066 case PROD:
950     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951     break;
952 jfenwick 2084 case TRANS:
953     result=left.transpose(m_axis_offset);
954     break;
955     case TRACE:
956     result=left.trace(m_axis_offset);
957     break;
958 jfenwick 2496 case SWAP:
959     result=left.swapaxes(m_axis_offset, m_transpose);
960     break;
961 jfenwick 2721 case MINVAL:
962     result=left.minval();
963     break;
964     case MAXVAL:
965     result=left.minval();
966     break;
967 jfenwick 1889 default:
968 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969 jfenwick 1889 }
970     return result.borrowReadyPtr();
971     }
972    
973 jfenwick 1899 /*
974     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
975     This method uses the original methods on the Data class to evaluate the expressions.
976     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
977     the purpose of using DataLazy in the first place).
978     */
979 jfenwick 1889 void
980     DataLazy::collapse()
981     {
982     if (m_op==IDENTITY)
983     {
984     return;
985     }
986     if (m_readytype=='E')
987     { // this is more an efficiency concern than anything else
988     throw DataException("Programmer Error - do not use collapse on Expanded data.");
989     }
990     m_id=collapseToReady();
991     m_op=IDENTITY;
992     }
993    
994 jfenwick 1899
995 jfenwick 1889
996    
997    
998 jfenwick 2147
999 phornby 1987 #define PROC_OP(TYPE,X) \
1000 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1001     {\
1002     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1003     { \
1004     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1005 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1006 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1007 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1008 jfenwick 2152 lroffset+=leftstep; \
1009     rroffset+=rightstep; \
1010     }\
1011     lroffset+=oleftstep;\
1012     rroffset+=orightstep;\
1013 jfenwick 1889 }
1014    
1015 jfenwick 1899
1016 jfenwick 2500 // The result will be stored in m_samples
1017     // The return value is a pointer to the DataVector, offset is the offset within the return value
1018     const DataTypes::ValueType*
1019     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1020     {
1021     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1022     // collapse so we have a 'E' node or an IDENTITY for some other type
1023     if (m_readytype!='E' && m_op!=IDENTITY)
1024     {
1025     collapse();
1026     }
1027     if (m_op==IDENTITY)
1028     {
1029     const ValueType& vec=m_id->getVectorRO();
1030     roffset=m_id->getPointOffset(sampleNo, 0);
1031 jfenwick 2777 #ifdef LAZY_STACK_PROF
1032     int x;
1033     if (&x<stackend[omp_get_thread_num()])
1034     {
1035     stackend[omp_get_thread_num()]=&x;
1036     }
1037     #endif
1038 jfenwick 2500 return &(vec);
1039     }
1040     if (m_readytype!='E')
1041     {
1042     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1043     }
1044 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1045     {
1046     roffset=tid*m_samplesize;
1047     return &(m_samples); // sample is already resolved
1048     }
1049     m_sampleids[tid]=sampleNo;
1050 jfenwick 3035
1051 jfenwick 2500 switch (getOpgroup(m_op))
1052     {
1053     case G_UNARY:
1054     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1055     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1056     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1057     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1058     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1059     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1060 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1061 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1062 jfenwick 2500 default:
1063     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1064     }
1065     }
1066    
1067     const DataTypes::ValueType*
1068     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1069     {
1070     // we assume that any collapsing has been done before we get here
1071     // since we only have one argument we don't need to think about only
1072     // processing single points.
1073     // we will also know we won't get identity nodes
1074     if (m_readytype!='E')
1075     {
1076     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1077     }
1078     if (m_op==IDENTITY)
1079     {
1080     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1081     }
1082     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1083     const double* left=&((*leftres)[roffset]);
1084     roffset=m_samplesize*tid;
1085     double* result=&(m_samples[roffset]);
1086     switch (m_op)
1087     {
1088     case SIN:
1089     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1090     break;
1091     case COS:
1092     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1093     break;
1094     case TAN:
1095     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1096     break;
1097     case ASIN:
1098     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1099     break;
1100     case ACOS:
1101     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1102     break;
1103     case ATAN:
1104     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1105     break;
1106     case SINH:
1107     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1108     break;
1109     case COSH:
1110     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1111     break;
1112     case TANH:
1113     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1114     break;
1115     case ERF:
1116     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1117     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1118     #else
1119     tensor_unary_operation(m_samplesize, left, result, ::erf);
1120     break;
1121     #endif
1122     case ASINH:
1123     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1124     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1125     #else
1126     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1127     #endif
1128     break;
1129     case ACOSH:
1130     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1131     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1132     #else
1133     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1134     #endif
1135     break;
1136     case ATANH:
1137     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1138     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1139     #else
1140     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1141     #endif
1142     break;
1143     case LOG10:
1144     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1145     break;
1146     case LOG:
1147     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1148     break;
1149     case SIGN:
1150     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1151     break;
1152     case ABS:
1153     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1154     break;
1155     case NEG:
1156     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1157     break;
1158     case POS:
1159     // it doesn't mean anything for delayed.
1160     // it will just trigger a deep copy of the lazy object
1161     throw DataException("Programmer error - POS not supported for lazy data.");
1162     break;
1163     case EXP:
1164     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1165     break;
1166     case SQRT:
1167     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1168     break;
1169     case RECIP:
1170     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1171     break;
1172     case GZ:
1173     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1174     break;
1175     case LZ:
1176     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1177     break;
1178     case GEZ:
1179     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1180     break;
1181     case LEZ:
1182     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1183     break;
1184     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1185     case NEZ:
1186     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1187     break;
1188     case EZ:
1189     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1190     break;
1191    
1192     default:
1193     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1194     }
1195     return &(m_samples);
1196     }
1197    
1198    
1199     const DataTypes::ValueType*
1200 jfenwick 2721 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1201     {
1202     // we assume that any collapsing has been done before we get here
1203     // since we only have one argument we don't need to think about only
1204     // processing single points.
1205     // we will also know we won't get identity nodes
1206     if (m_readytype!='E')
1207     {
1208     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1209     }
1210     if (m_op==IDENTITY)
1211     {
1212     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1213     }
1214     size_t loffset=0;
1215     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1216    
1217     roffset=m_samplesize*tid;
1218 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1219 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1220 jfenwick 2721 double* result=&(m_samples[roffset]);
1221     switch (m_op)
1222     {
1223     case MINVAL:
1224     {
1225 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1226     {
1227     FMin op;
1228     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1229     loffset+=psize;
1230     result++;
1231     }
1232 jfenwick 2721 }
1233     break;
1234     case MAXVAL:
1235     {
1236 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1237     {
1238 jfenwick 2721 FMax op;
1239     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1240 jfenwick 2734 loffset+=psize;
1241     result++;
1242     }
1243 jfenwick 2721 }
1244     break;
1245     default:
1246     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1247     }
1248     return &(m_samples);
1249     }
1250    
1251     const DataTypes::ValueType*
1252 jfenwick 2500 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1253     {
1254     // we assume that any collapsing has been done before we get here
1255     // since we only have one argument we don't need to think about only
1256     // processing single points.
1257     if (m_readytype!='E')
1258     {
1259     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1260     }
1261     if (m_op==IDENTITY)
1262     {
1263     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1264     }
1265     size_t subroffset;
1266     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1267     roffset=m_samplesize*tid;
1268     size_t loop=0;
1269     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1270     size_t step=getNoValues();
1271     size_t offset=roffset;
1272     switch (m_op)
1273     {
1274     case SYM:
1275     for (loop=0;loop<numsteps;++loop)
1276     {
1277     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1278     subroffset+=step;
1279     offset+=step;
1280     }
1281     break;
1282     case NSYM:
1283     for (loop=0;loop<numsteps;++loop)
1284     {
1285     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1286     subroffset+=step;
1287     offset+=step;
1288     }
1289     break;
1290     default:
1291     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1292     }
1293     return &m_samples;
1294     }
1295    
1296     const DataTypes::ValueType*
1297     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1298     {
1299     // we assume that any collapsing has been done before we get here
1300     // since we only have one argument we don't need to think about only
1301     // processing single points.
1302     if (m_readytype!='E')
1303     {
1304     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1305     }
1306     if (m_op==IDENTITY)
1307     {
1308     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1309     }
1310     size_t subroffset;
1311     size_t offset;
1312     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1313     roffset=m_samplesize*tid;
1314     offset=roffset;
1315     size_t loop=0;
1316     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1317     size_t outstep=getNoValues();
1318     size_t instep=m_left->getNoValues();
1319     switch (m_op)
1320     {
1321     case TRACE:
1322     for (loop=0;loop<numsteps;++loop)
1323     {
1324     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1325     subroffset+=instep;
1326     offset+=outstep;
1327     }
1328     break;
1329     case TRANS:
1330     for (loop=0;loop<numsteps;++loop)
1331     {
1332     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1333     subroffset+=instep;
1334     offset+=outstep;
1335     }
1336     break;
1337     default:
1338     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1339     }
1340     return &m_samples;
1341     }
1342    
1343    
1344     const DataTypes::ValueType*
1345     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1346     {
1347     if (m_readytype!='E')
1348     {
1349     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1350     }
1351     if (m_op==IDENTITY)
1352     {
1353     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1354     }
1355     size_t subroffset;
1356     size_t offset;
1357     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1358     roffset=m_samplesize*tid;
1359     offset=roffset;
1360     size_t loop=0;
1361     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1362     size_t outstep=getNoValues();
1363     size_t instep=m_left->getNoValues();
1364     switch (m_op)
1365     {
1366     case SWAP:
1367     for (loop=0;loop<numsteps;++loop)
1368     {
1369     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1370     subroffset+=instep;
1371     offset+=outstep;
1372     }
1373     break;
1374     default:
1375     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1376     }
1377     return &m_samples;
1378     }
1379    
1380 jfenwick 3035 const DataTypes::ValueType*
1381     DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1382     {
1383     if (m_readytype!='E')
1384     {
1385     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1386     }
1387     if (m_op!=CONDEVAL)
1388     {
1389     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1390     }
1391     size_t subroffset;
1392 jfenwick 2500
1393 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394     const ValueType* srcres=0;
1395     if ((*maskres)[subroffset]>0)
1396     {
1397     srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398     }
1399     else
1400     {
1401     srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402     }
1403    
1404     // Now we need to copy the result
1405    
1406     roffset=m_samplesize*tid;
1407     for (int i=0;i<m_samplesize;++i)
1408     {
1409     m_samples[roffset+i]=(*srcres)[subroffset+i];
1410     }
1411    
1412     return &m_samples;
1413     }
1414    
1415 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1417     // If both children are expanded, then we can process them in a single operation (we treat
1418     // the whole sample as one big datapoint.
1419     // If one of the children is not expanded, then we need to treat each point in the sample
1420     // individually.
1421     // There is an additional complication when scalar operations are considered.
1422     // For example, 2+Vector.
1423     // In this case each double within the point is treated individually
1424     const DataTypes::ValueType*
1425     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1426     {
1427     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1428    
1429     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1430     // first work out which of the children are expanded
1431     bool leftExp=(m_left->m_readytype=='E');
1432     bool rightExp=(m_right->m_readytype=='E');
1433     if (!leftExp && !rightExp)
1434     {
1435     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1436     }
1437     bool leftScalar=(m_left->getRank()==0);
1438     bool rightScalar=(m_right->getRank()==0);
1439     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1440     {
1441     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1442     }
1443     size_t leftsize=m_left->getNoValues();
1444     size_t rightsize=m_right->getNoValues();
1445     size_t chunksize=1; // how many doubles will be processed in one go
1446     int leftstep=0; // how far should the left offset advance after each step
1447     int rightstep=0;
1448     int numsteps=0; // total number of steps for the inner loop
1449     int oleftstep=0; // the o variables refer to the outer loop
1450     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1451     int onumsteps=1;
1452    
1453     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1454     bool RES=(rightExp && rightScalar);
1455     bool LS=(!leftExp && leftScalar); // left is a single scalar
1456     bool RS=(!rightExp && rightScalar);
1457     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1458     bool RN=(!rightExp && !rightScalar);
1459     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1460     bool REN=(rightExp && !rightScalar);
1461    
1462     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1463     {
1464     chunksize=m_left->getNumDPPSample()*leftsize;
1465     leftstep=0;
1466     rightstep=0;
1467     numsteps=1;
1468     }
1469     else if (LES || RES)
1470     {
1471     chunksize=1;
1472     if (LES) // left is an expanded scalar
1473     {
1474     if (RS)
1475     {
1476     leftstep=1;
1477     rightstep=0;
1478     numsteps=m_left->getNumDPPSample();
1479     }
1480     else // RN or REN
1481     {
1482     leftstep=0;
1483     oleftstep=1;
1484     rightstep=1;
1485     orightstep=(RN ? -(int)rightsize : 0);
1486     numsteps=rightsize;
1487     onumsteps=m_left->getNumDPPSample();
1488     }
1489     }
1490     else // right is an expanded scalar
1491     {
1492     if (LS)
1493     {
1494     rightstep=1;
1495     leftstep=0;
1496     numsteps=m_right->getNumDPPSample();
1497     }
1498     else
1499     {
1500     rightstep=0;
1501     orightstep=1;
1502     leftstep=1;
1503     oleftstep=(LN ? -(int)leftsize : 0);
1504     numsteps=leftsize;
1505     onumsteps=m_right->getNumDPPSample();
1506     }
1507     }
1508     }
1509     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1510     {
1511     if (LEN) // and Right will be a single value
1512     {
1513     chunksize=rightsize;
1514     leftstep=rightsize;
1515     rightstep=0;
1516     numsteps=m_left->getNumDPPSample();
1517     if (RS)
1518     {
1519     numsteps*=leftsize;
1520     }
1521     }
1522     else // REN
1523     {
1524     chunksize=leftsize;
1525     rightstep=leftsize;
1526     leftstep=0;
1527     numsteps=m_right->getNumDPPSample();
1528     if (LS)
1529     {
1530     numsteps*=rightsize;
1531     }
1532     }
1533     }
1534    
1535     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1536     // Get the values of sub-expressions
1537     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1538     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1539     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1540     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1541     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1542     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1543     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1544     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1545     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1546    
1547     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1548     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1549    
1550    
1551     roffset=m_samplesize*tid;
1552 caltinay 4286 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1553 jfenwick 2500 switch(m_op)
1554     {
1555     case ADD:
1556     PROC_OP(NO_ARG,plus<double>());
1557     break;
1558     case SUB:
1559     PROC_OP(NO_ARG,minus<double>());
1560     break;
1561     case MUL:
1562     PROC_OP(NO_ARG,multiplies<double>());
1563     break;
1564     case DIV:
1565     PROC_OP(NO_ARG,divides<double>());
1566     break;
1567     case POW:
1568     PROC_OP(double (double,double),::pow);
1569     break;
1570     default:
1571     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1572     }
1573     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1574     return &m_samples;
1575     }
1576    
1577    
1578     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1579     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1580     // unlike the other resolve helpers, we must treat these datapoints separately.
1581     const DataTypes::ValueType*
1582     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1583     {
1584     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1585    
1586     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1587     // first work out which of the children are expanded
1588     bool leftExp=(m_left->m_readytype=='E');
1589     bool rightExp=(m_right->m_readytype=='E');
1590     int steps=getNumDPPSample();
1591     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1592     int rightStep=(rightExp?m_right->getNoValues() : 0);
1593    
1594     int resultStep=getNoValues();
1595     roffset=m_samplesize*tid;
1596     size_t offset=roffset;
1597    
1598     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1599    
1600     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1601    
1602     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1603     cout << getNoValues() << endl;)
1604    
1605    
1606     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1607     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1608     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1609     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1610     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1611     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1612     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1613    
1614 caltinay 4286 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1615 jfenwick 2500 switch(m_op)
1616     {
1617     case PROD:
1618     for (int i=0;i<steps;++i,resultp+=resultStep)
1619     {
1620     const double *ptr_0 = &((*left)[lroffset]);
1621     const double *ptr_1 = &((*right)[rroffset]);
1622    
1623     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1624     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1625    
1626     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1627    
1628     lroffset+=leftStep;
1629     rroffset+=rightStep;
1630     }
1631     break;
1632     default:
1633     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1634     }
1635     roffset=offset;
1636     return &m_samples;
1637     }
1638    
1639 jfenwick 1898
1640     const DataTypes::ValueType*
1641 jfenwick 2770 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1642 jfenwick 1879 {
1643 jfenwick 2271 #ifdef _OPENMP
1644     int tid=omp_get_thread_num();
1645     #else
1646     int tid=0;
1647     #endif
1648 jfenwick 2777
1649     #ifdef LAZY_STACK_PROF
1650     stackstart[tid]=&tid;
1651     stackend[tid]=&tid;
1652     const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1653     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1654     #pragma omp critical
1655     if (d>maxstackuse)
1656     {
1657     cout << "Max resolve Stack use " << d << endl;
1658     maxstackuse=d;
1659     }
1660     return r;
1661     #else
1662 jfenwick 2521 return resolveNodeSample(tid, sampleNo, roffset);
1663 jfenwick 2777 #endif
1664 jfenwick 2271 }
1665    
1666    
1667 jfenwick 2497 // This needs to do the work of the identity constructor
1668 jfenwick 2177 void
1669     DataLazy::resolveToIdentity()
1670     {
1671     if (m_op==IDENTITY)
1672     return;
1673 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1674 jfenwick 2177 makeIdentity(p);
1675     }
1676 jfenwick 1889
1677 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1678     {
1679     m_op=IDENTITY;
1680     m_axis_offset=0;
1681     m_transpose=0;
1682     m_SL=m_SM=m_SR=0;
1683     m_children=m_height=0;
1684     m_id=p;
1685     if(p->isConstant()) {m_readytype='C';}
1686     else if(p->isExpanded()) {m_readytype='E';}
1687     else if (p->isTagged()) {m_readytype='T';}
1688     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1689     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1690     m_left.reset();
1691     m_right.reset();
1692     }
1693    
1694 jfenwick 2497
1695     DataReady_ptr
1696     DataLazy::resolve()
1697     {
1698     resolveToIdentity();
1699     return m_id;
1700     }
1701    
1702 jfenwick 2799
1703     /* This is really a static method but I think that caused problems in windows */
1704     void
1705     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1706     {
1707     if (dats.empty())
1708     {
1709     return;
1710     }
1711     vector<DataLazy*> work;
1712     FunctionSpace fs=dats[0]->getFunctionSpace();
1713     bool match=true;
1714 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1715 jfenwick 2799 {
1716     if (dats[i]->m_readytype!='E')
1717     {
1718     dats[i]->collapse();
1719     }
1720     if (dats[i]->m_op!=IDENTITY)
1721     {
1722     work.push_back(dats[i]);
1723     if (fs!=dats[i]->getFunctionSpace())
1724     {
1725     match=false;
1726     }
1727     }
1728     }
1729     if (work.empty())
1730     {
1731     return; // no work to do
1732     }
1733     if (match) // all functionspaces match. Yes I realise this is overly strict
1734     { // it is possible that dats[0] is one of the objects which we discarded and
1735     // all the other functionspaces match.
1736     vector<DataExpanded*> dep;
1737     vector<ValueType*> vecs;
1738     for (int i=0;i<work.size();++i)
1739     {
1740     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1741     vecs.push_back(&(dep[i]->getVectorRW()));
1742     }
1743     int totalsamples=work[0]->getNumSamples();
1744     const ValueType* res=0; // Storage for answer
1745     int sample;
1746     #pragma omp parallel private(sample, res)
1747     {
1748     size_t roffset=0;
1749     #pragma omp for schedule(static)
1750     for (sample=0;sample<totalsamples;++sample)
1751     {
1752     roffset=0;
1753     int j;
1754 jfenwick 2825 for (j=work.size()-1;j>=0;--j)
1755 jfenwick 2799 {
1756     #ifdef _OPENMP
1757     res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1758     #else
1759     res=work[j]->resolveNodeSample(0,sample,roffset);
1760     #endif
1761     DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1762     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1763     }
1764     }
1765     }
1766     // Now we need to load the new results as identity ops into the lazy nodes
1767 jfenwick 2824 for (int i=work.size()-1;i>=0;--i)
1768 jfenwick 2799 {
1769     work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1770     }
1771     }
1772     else // functionspaces do not match
1773     {
1774     for (int i=0;i<work.size();++i)
1775     {
1776     work[i]->resolveToIdentity();
1777     }
1778     }
1779     }
1780    
1781    
1782    
1783 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1784     DataReady_ptr
1785     DataLazy::resolveNodeWorker()
1786     {
1787     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1788     {
1789     collapse();
1790     }
1791     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1792     {
1793     return m_id;
1794     }
1795     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1796     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1797     ValueType& resvec=result->getVectorRW();
1798     DataReady_ptr resptr=DataReady_ptr(result);
1799    
1800     int sample;
1801     int totalsamples=getNumSamples();
1802     const ValueType* res=0; // Storage for answer
1803     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1804 jfenwick 2777 #pragma omp parallel private(sample,res)
1805 jfenwick 2500 {
1806 jfenwick 2777 size_t roffset=0;
1807     #ifdef LAZY_STACK_PROF
1808     stackstart[omp_get_thread_num()]=&roffset;
1809     stackend[omp_get_thread_num()]=&roffset;
1810     #endif
1811     #pragma omp for schedule(static)
1812     for (sample=0;sample<totalsamples;++sample)
1813     {
1814     roffset=0;
1815 jfenwick 2500 #ifdef _OPENMP
1816 jfenwick 2777 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1817 jfenwick 2500 #else
1818 jfenwick 2777 res=resolveNodeSample(0,sample,roffset);
1819 jfenwick 2500 #endif
1820     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1821     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1822 jfenwick 2777 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1823     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1824     }
1825 jfenwick 2500 }
1826 jfenwick 2777 #ifdef LAZY_STACK_PROF
1827     for (int i=0;i<getNumberOfThreads();++i)
1828     {
1829     size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1830     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1831     if (r>maxstackuse)
1832     {
1833     maxstackuse=r;
1834     }
1835     }
1836     cout << "Max resolve Stack use=" << maxstackuse << endl;
1837     #endif
1838 jfenwick 2500 return resptr;
1839     }
1840    
1841 jfenwick 1865 std::string
1842     DataLazy::toString() const
1843     {
1844 jfenwick 1886 ostringstream oss;
1845 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1846 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1847 jfenwick 2737 {
1848 jfenwick 2795 case 1: // tree format
1849 jfenwick 2737 oss << endl;
1850 jfenwick 2795 intoTreeString(oss,"");
1851     break;
1852     case 2: // just the depth
1853     break;
1854     default:
1855     intoString(oss);
1856     break;
1857 jfenwick 2737 }
1858 jfenwick 1886 return oss.str();
1859 jfenwick 1865 }
1860    
1861 jfenwick 1899
1862 jfenwick 1886 void
1863     DataLazy::intoString(ostringstream& oss) const
1864     {
1865 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1866 jfenwick 1886 switch (getOpgroup(m_op))
1867     {
1868 jfenwick 1889 case G_IDENTITY:
1869     if (m_id->isExpanded())
1870     {
1871     oss << "E";
1872     }
1873     else if (m_id->isTagged())
1874     {
1875     oss << "T";
1876     }
1877     else if (m_id->isConstant())
1878     {
1879     oss << "C";
1880     }
1881     else
1882     {
1883     oss << "?";
1884     }
1885 jfenwick 1886 oss << '@' << m_id.get();
1886     break;
1887     case G_BINARY:
1888     oss << '(';
1889     m_left->intoString(oss);
1890     oss << ' ' << opToString(m_op) << ' ';
1891     m_right->intoString(oss);
1892     oss << ')';
1893     break;
1894     case G_UNARY:
1895 jfenwick 2147 case G_UNARY_P:
1896 jfenwick 2037 case G_NP1OUT:
1897 jfenwick 2084 case G_NP1OUT_P:
1898 jfenwick 2721 case G_REDUCTION:
1899 jfenwick 1886 oss << opToString(m_op) << '(';
1900     m_left->intoString(oss);
1901     oss << ')';
1902     break;
1903 jfenwick 2066 case G_TENSORPROD:
1904     oss << opToString(m_op) << '(';
1905     m_left->intoString(oss);
1906     oss << ", ";
1907     m_right->intoString(oss);
1908     oss << ')';
1909     break;
1910 jfenwick 2496 case G_NP1OUT_2P:
1911     oss << opToString(m_op) << '(';
1912     m_left->intoString(oss);
1913     oss << ", " << m_axis_offset << ", " << m_transpose;
1914     oss << ')';
1915     break;
1916 jfenwick 3035 case G_CONDEVAL:
1917     oss << opToString(m_op)<< '(' ;
1918     m_mask->intoString(oss);
1919     oss << " ? ";
1920     m_left->intoString(oss);
1921     oss << " : ";
1922     m_right->intoString(oss);
1923     oss << ')';
1924     break;
1925 jfenwick 1886 default:
1926     oss << "UNKNOWN";
1927     }
1928     }
1929    
1930 jfenwick 2737
1931     void
1932     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1933     {
1934     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1935     switch (getOpgroup(m_op))
1936     {
1937     case G_IDENTITY:
1938     if (m_id->isExpanded())
1939     {
1940     oss << "E";
1941     }
1942     else if (m_id->isTagged())
1943     {
1944     oss << "T";
1945     }
1946     else if (m_id->isConstant())
1947     {
1948     oss << "C";
1949     }
1950     else
1951     {
1952     oss << "?";
1953     }
1954     oss << '@' << m_id.get() << endl;
1955     break;
1956     case G_BINARY:
1957     oss << opToString(m_op) << endl;
1958     indent+='.';
1959     m_left->intoTreeString(oss, indent);
1960     m_right->intoTreeString(oss, indent);
1961     break;
1962     case G_UNARY:
1963     case G_UNARY_P:
1964     case G_NP1OUT:
1965     case G_NP1OUT_P:
1966     case G_REDUCTION:
1967     oss << opToString(m_op) << endl;
1968     indent+='.';
1969     m_left->intoTreeString(oss, indent);
1970     break;
1971     case G_TENSORPROD:
1972     oss << opToString(m_op) << endl;
1973     indent+='.';
1974     m_left->intoTreeString(oss, indent);
1975     m_right->intoTreeString(oss, indent);
1976     break;
1977     case G_NP1OUT_2P:
1978     oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1979     indent+='.';
1980     m_left->intoTreeString(oss, indent);
1981     break;
1982     default:
1983     oss << "UNKNOWN";
1984     }
1985     }
1986    
1987    
1988 jfenwick 1865 DataAbstract*
1989     DataLazy::deepCopy()
1990     {
1991 jfenwick 1935 switch (getOpgroup(m_op))
1992 jfenwick 1865 {
1993 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1994 jfenwick 2721 case G_UNARY:
1995     case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1996     case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1997 jfenwick 1935 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1998 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1999     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2000 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2001     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002 jfenwick 1935 default:
2003     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2004 jfenwick 1865 }
2005     }
2006    
2007    
2008 jfenwick 2721
2009 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2010     // Instances of DataReady can look at the size of their vectors.
2011     // For lazy though, it could be the size the data would be if it were resolved;
2012     // or it could be some function of the lengths of the DataReady instances which
2013     // form part of the expression.
2014     // Rather than have people making assumptions, I have disabled the method.
2015 jfenwick 1865 DataTypes::ValueType::size_type
2016     DataLazy::getLength() const
2017     {
2018 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2019 jfenwick 1865 }
2020    
2021    
2022     DataAbstract*
2023     DataLazy::getSlice(const DataTypes::RegionType& region) const
2024     {
2025 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2026 jfenwick 1865 }
2027    
2028 jfenwick 1935
2029     // To do this we need to rely on our child nodes
2030 jfenwick 1865 DataTypes::ValueType::size_type
2031     DataLazy::getPointOffset(int sampleNo,
2032 jfenwick 1935 int dataPointNo)
2033     {
2034     if (m_op==IDENTITY)
2035     {
2036     return m_id->getPointOffset(sampleNo,dataPointNo);
2037     }
2038     if (m_readytype!='E')
2039     {
2040     collapse();
2041     return m_id->getPointOffset(sampleNo,dataPointNo);
2042     }
2043     // at this point we do not have an identity node and the expression will be Expanded
2044     // so we only need to know which child to ask
2045     if (m_left->m_readytype=='E')
2046     {
2047     return m_left->getPointOffset(sampleNo,dataPointNo);
2048     }
2049     else
2050     {
2051     return m_right->getPointOffset(sampleNo,dataPointNo);
2052     }
2053     }
2054    
2055     // To do this we need to rely on our child nodes
2056     DataTypes::ValueType::size_type
2057     DataLazy::getPointOffset(int sampleNo,
2058 jfenwick 1865 int dataPointNo) const
2059     {
2060 jfenwick 1935 if (m_op==IDENTITY)
2061     {
2062     return m_id->getPointOffset(sampleNo,dataPointNo);
2063     }
2064     if (m_readytype=='E')
2065     {
2066     // at this point we do not have an identity node and the expression will be Expanded
2067     // so we only need to know which child to ask
2068     if (m_left->m_readytype=='E')
2069     {
2070     return m_left->getPointOffset(sampleNo,dataPointNo);
2071     }
2072     else
2073     {
2074     return m_right->getPointOffset(sampleNo,dataPointNo);
2075     }
2076     }
2077     if (m_readytype=='C')
2078     {
2079     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2080     }
2081     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2082 jfenwick 1865 }
2083    
2084 jfenwick 2271
2085     // I have decided to let Data:: handle this issue.
2086 jfenwick 1901 void
2087     DataLazy::setToZero()
2088     {
2089 jfenwick 2271 // DataTypes::ValueType v(getNoValues(),0);
2090     // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2091     // m_op=IDENTITY;
2092     // m_right.reset();
2093     // m_left.reset();
2094     // m_readytype='C';
2095     // m_buffsRequired=1;
2096    
2097 jfenwick 2514 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
2098 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2099 jfenwick 1901 }
2100    
2101 jfenwick 2271 bool
2102     DataLazy::actsExpanded() const
2103     {
2104     return (m_readytype=='E');
2105     }
2106    
2107 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26