/[escript]/trunk/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5997 - (hide annotations)
Mon Feb 29 07:24:47 2016 UTC (3 years, 2 months ago) by caltinay
File size: 67807 byte(s)
moved esys MPI to escript.

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17 jfenwick 5464 #define ESNEEDPYTHON
18     #include "esysUtils/first.h"
19 jfenwick 1865
20     #include "DataLazy.h"
21 caltinay 5972 #include "Data.h"
22     #include "DataTypes.h"
23     #include "EscriptParams.h"
24     #include "FunctionSpace.h"
25     #include "UnaryFuncs.h" // for escript::fsign
26     #include "Utils.h"
27    
28 caltinay 3317 #ifdef USE_NETCDF
29     #include <netcdfcpp.h>
30     #endif
31    
32 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
33 jfenwick 2199
34 jfenwick 5938 using namespace escript::DataTypes;
35    
36 caltinay 5972 #define NO_ARG
37    
38 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
39 jfenwick 2092 #define LAZYDEBUG(X)
40 jfenwick 2157 namespace
41     {
42     bool privdebug=false;
43 jfenwick 2092
44 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
45     #define DISABLEDEBUG privdebug=false;
46     }
47    
48 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
49 jfenwick 2177
50 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
51 jfenwick 2472
52 jfenwick 2501
53 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
54    
55 jfenwick 1899 /*
56     How does DataLazy work?
57     ~~~~~~~~~~~~~~~~~~~~~~~
58    
59     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
60     denoted left and right.
61    
62     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
63     This means that all "internal" nodes in the structure are instances of DataLazy.
64    
65     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
66 caltinay 4286 Note that IDENTITY is not considered a unary operation.
67 jfenwick 1899
68     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
69     It must however form a DAG (directed acyclic graph).
70     I will refer to individual DataLazy objects with the structure as nodes.
71    
72     Each node also stores:
73     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
74 caltinay 5972 evaluated.
75 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
76 caltinay 5972 evaluate the expression.
77 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
78    
79     When a new node is created, the above values are computed based on the values in the child nodes.
80     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
81    
82     The resolve method, which produces a DataReady from a DataLazy, does the following:
83     1) Create a DataReady to hold the new result.
84     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
85     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
86    
87     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
88    
89     resolveSample returns a Vector* and an offset within that vector where the result is stored.
90     Normally, this would be v, but for identity nodes their internal vector is returned instead.
91    
92     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
93    
94     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
95     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
96 jfenwick 2037
97 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
98 jfenwick 2037 1) Add to the ES_optype.
99     2) determine what opgroup your operation belongs to (X)
100     3) add a string for the op to the end of ES_opstrings
101     4) increase ES_opcount
102     5) add an entry (X) to opgroups
103     6) add an entry to the switch in collapseToReady
104     7) add an entry to resolveX
105 jfenwick 1899 */
106    
107    
108 jfenwick 1865 using namespace std;
109 jfenwick 1868 using namespace boost;
110 jfenwick 1865
111     namespace escript
112     {
113    
114     namespace
115     {
116    
117 jfenwick 2777
118     // enabling this will print out when ever the maximum stacksize used by resolve increases
119     // it assumes _OPENMP is also in use
120     //#define LAZY_STACK_PROF
121    
122    
123    
124     #ifndef _OPENMP
125     #ifdef LAZY_STACK_PROF
126     #undef LAZY_STACK_PROF
127     #endif
128     #endif
129    
130    
131     #ifdef LAZY_STACK_PROF
132     std::vector<void*> stackstart(getNumberOfThreads());
133     std::vector<void*> stackend(getNumberOfThreads());
134     size_t maxstackuse=0;
135     #endif
136    
137 jfenwick 1886 enum ES_opgroup
138     {
139     G_UNKNOWN,
140     G_IDENTITY,
141 caltinay 5972 G_BINARY, // pointwise operations with two arguments
142     G_UNARY, // pointwise operations with one argument
143     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
144     G_NP1OUT, // non-pointwise op with one output
145     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
146     G_TENSORPROD, // general tensor product
147     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
148     G_REDUCTION, // non-pointwise unary op with a scalar output
149 jfenwick 3035 G_CONDEVAL
150 jfenwick 1886 };
151    
152    
153    
154    
155 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
156 caltinay 5972 "sin","cos","tan",
157     "asin","acos","atan","sinh","cosh","tanh","erf",
158     "asinh","acosh","atanh",
159     "log10","log","sign","abs","neg","pos","exp","sqrt",
160     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
161     "symmetric","nonsymmetric",
162     "prod",
163     "transpose", "trace",
164     "swapaxes",
165     "minval", "maxval",
166     "condEval"};
167 jfenwick 3035 int ES_opcount=44;
168 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
169 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
170     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
171     G_UNARY,G_UNARY,G_UNARY, // 20
172     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
173     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
174     G_NP1OUT,G_NP1OUT,
175     G_TENSORPROD,
176     G_NP1OUT_P, G_NP1OUT_P,
177     G_NP1OUT_2P,
178     G_REDUCTION, G_REDUCTION,
179     G_CONDEVAL};
180 jfenwick 1886 inline
181     ES_opgroup
182     getOpgroup(ES_optype op)
183     {
184     return opgroups[op];
185     }
186    
187 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
188     FunctionSpace
189     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
190     {
191 caltinay 5972 // perhaps this should call interpolate and throw or something?
192     // maybe we need an interpolate node -
193     // that way, if interpolate is required in any other op we can just throw a
194     // programming error exception.
195 jfenwick 1879
196 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
197     FunctionSpace r=right->getFunctionSpace();
198     if (l!=r)
199     {
200 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
201     if (res==1)
202 jfenwick 1943 {
203 caltinay 5972 return l;
204 jfenwick 1943 }
205 jfenwick 4264 if (res==-1)
206 jfenwick 1943 {
207 caltinay 5972 return r;
208 jfenwick 1943 }
209     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
210     }
211     return l;
212 jfenwick 1865 }
213    
214     // return the shape of the result of "left op right"
215 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
216 jfenwick 1865 DataTypes::ShapeType
217     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
218     {
219 caltinay 5972 if (left->getShape()!=right->getShape())
220     {
221     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
222     {
223     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
224     }
225 jfenwick 2721
226 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
227     {
228     return right->getShape();
229     }
230     if (right->getRank()==0)
231     {
232     return left->getShape();
233     }
234     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
235     }
236     return left->getShape();
237 jfenwick 1865 }
238    
239 jfenwick 2084 // return the shape for "op left"
240    
241     DataTypes::ShapeType
242 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
243 jfenwick 2084 {
244 caltinay 5972 switch(op)
245     {
246     case TRANS:
247     { // for the scoping of variables
248     const DataTypes::ShapeType& s=left->getShape();
249     DataTypes::ShapeType sh;
250     int rank=left->getRank();
251     if (axis_offset<0 || axis_offset>rank)
252     {
253 caltinay 2779 stringstream e;
254     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
255     throw DataException(e.str());
256 caltinay 5972 }
257     for (int i=0; i<rank; i++)
258     {
259     int index = (axis_offset+i)%rank;
260     sh.push_back(s[index]); // Append to new shape
261     }
262     return sh;
263     }
264     break;
265     case TRACE:
266     {
267     int rank=left->getRank();
268     if (rank<2)
269     {
270     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
271     }
272     if ((axis_offset>rank-2) || (axis_offset<0))
273     {
274     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
275     }
276     if (rank==2)
277     {
278     return DataTypes::scalarShape;
279     }
280     else if (rank==3)
281     {
282     DataTypes::ShapeType sh;
283     if (axis_offset==0)
284     {
285     sh.push_back(left->getShape()[2]);
286     }
287     else // offset==1
288     {
289     sh.push_back(left->getShape()[0]);
290     }
291     return sh;
292     }
293     else if (rank==4)
294     {
295     DataTypes::ShapeType sh;
296     const DataTypes::ShapeType& s=left->getShape();
297     if (axis_offset==0)
298     {
299     sh.push_back(s[2]);
300     sh.push_back(s[3]);
301     }
302     else if (axis_offset==1)
303     {
304     sh.push_back(s[0]);
305     sh.push_back(s[3]);
306     }
307     else // offset==2
308     {
309     sh.push_back(s[0]);
310     sh.push_back(s[1]);
311     }
312     return sh;
313     }
314     else // unknown rank
315     {
316     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
317     }
318     }
319     break;
320     default:
321     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
322     }
323 jfenwick 2084 }
324    
325 jfenwick 2496 DataTypes::ShapeType
326     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
327     {
328     // This code taken from the Data.cpp swapaxes() method
329     // Some of the checks are probably redundant here
330     int axis0_tmp,axis1_tmp;
331     const DataTypes::ShapeType& s=left->getShape();
332     DataTypes::ShapeType out_shape;
333     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
334     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
335     int rank=left->getRank();
336     if (rank<2) {
337     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
338     }
339     if (axis0<0 || axis0>rank-1) {
340 caltinay 2779 stringstream e;
341     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
342     throw DataException(e.str());
343 jfenwick 2496 }
344     if (axis1<0 || axis1>rank-1) {
345 caltinay 2782 stringstream e;
346 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
347     throw DataException(e.str());
348 jfenwick 2496 }
349     if (axis0 == axis1) {
350     throw DataException("Error - Data::swapaxes: axis indices must be different.");
351     }
352     if (axis0 > axis1) {
353     axis0_tmp=axis1;
354     axis1_tmp=axis0;
355     } else {
356     axis0_tmp=axis0;
357     axis1_tmp=axis1;
358     }
359     for (int i=0; i<rank; i++) {
360     if (i == axis0_tmp) {
361     out_shape.push_back(s[axis1_tmp]);
362     } else if (i == axis1_tmp) {
363     out_shape.push_back(s[axis0_tmp]);
364     } else {
365     out_shape.push_back(s[i]);
366     }
367     }
368     return out_shape;
369     }
370    
371    
372 jfenwick 2066 // determine the output shape for the general tensor product operation
373     // the additional parameters return information required later for the product
374     // the majority of this code is copy pasted from C_General_Tensor_Product
375     DataTypes::ShapeType
376     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
377 jfenwick 1865 {
378 caltinay 5972
379 jfenwick 2066 // Get rank and shape of inputs
380     int rank0 = left->getRank();
381     int rank1 = right->getRank();
382     const DataTypes::ShapeType& shape0 = left->getShape();
383     const DataTypes::ShapeType& shape1 = right->getShape();
384    
385     // Prepare for the loops of the product and verify compatibility of shapes
386     int start0=0, start1=0;
387 caltinay 5972 if (transpose == 0) {}
388     else if (transpose == 1) { start0 = axis_offset; }
389     else if (transpose == 2) { start1 = rank1-axis_offset; }
390     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
391 jfenwick 2066
392 jfenwick 2085 if (rank0<axis_offset)
393     {
394 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
395 jfenwick 2085 }
396 jfenwick 2066
397     // Adjust the shapes for transpose
398 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
399     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
400     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
401     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
402 jfenwick 2066
403     // Prepare for the loops of the product
404     SL=1, SM=1, SR=1;
405 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
406 jfenwick 2066 SL *= tmpShape0[i];
407     }
408 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
409 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
410     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
411     }
412     SM *= tmpShape0[i];
413     }
414 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
415 jfenwick 2066 SR *= tmpShape1[i];
416     }
417    
418     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
419 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
420     { // block to limit the scope of out_index
421 jfenwick 2066 int out_index=0;
422     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
423     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
424     }
425 jfenwick 2086
426     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
427     {
428     ostringstream os;
429     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
430     throw DataException(os.str());
431     }
432    
433 jfenwick 2066 return shape2;
434 jfenwick 1865 }
435    
436 caltinay 5972 } // end anonymous namespace
437 jfenwick 1865
438    
439 jfenwick 1899
440     // Return a string representing the operation
441 jfenwick 1865 const std::string&
442     opToString(ES_optype op)
443     {
444     if (op<0 || op>=ES_opcount)
445     {
446     op=UNKNOWNOP;
447     }
448     return ES_opstrings[op];
449     }
450    
451 jfenwick 2500 void DataLazy::LazyNodeSetup()
452     {
453     #ifdef _OPENMP
454     int numthreads=omp_get_max_threads();
455     m_samples.resize(numthreads*m_samplesize);
456     m_sampleids=new int[numthreads];
457     for (int i=0;i<numthreads;++i)
458     {
459     m_sampleids[i]=-1;
460     }
461     #else
462     m_samples.resize(m_samplesize);
463     m_sampleids=new int[1];
464     m_sampleids[0]=-1;
465     #endif // _OPENMP
466     }
467 jfenwick 1865
468 jfenwick 2500
469     // Creates an identity node
470 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
471 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
472     ,m_sampleids(0),
473     m_samples(1)
474 jfenwick 1865 {
475 jfenwick 1879 if (p->isLazy())
476     {
477 caltinay 5972 // I don't want identity of Lazy.
478     // Question: Why would that be so bad?
479     // Answer: We assume that the child of ID is something we can call getVector on
480     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
481 jfenwick 1879 }
482     else
483     {
484 caltinay 5972 p->makeLazyShared();
485     DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486     makeIdentity(dr);
487 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
488 jfenwick 1879 }
489 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
490 jfenwick 1865 }
491    
492 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494     m_op(op),
495     m_axis_offset(0),
496     m_transpose(0),
497     m_SL(0), m_SM(0), m_SR(0)
498 jfenwick 1886 {
499 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500 jfenwick 1886 {
501 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502 jfenwick 1886 }
503 jfenwick 2066
504 jfenwick 1886 DataLazy_ptr lleft;
505     if (!left->isLazy())
506     {
507 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
508 jfenwick 1886 }
509     else
510     {
511 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
512 jfenwick 1886 }
513 jfenwick 1889 m_readytype=lleft->m_readytype;
514 jfenwick 1886 m_left=lleft;
515     m_samplesize=getNumDPPSample()*getNoValues();
516 jfenwick 2177 m_children=m_left->m_children+1;
517     m_height=m_left->m_height+1;
518 jfenwick 2500 LazyNodeSetup();
519 jfenwick 2177 SIZELIMIT
520 jfenwick 1886 }
521    
522    
523 jfenwick 1943 // In this constructor we need to consider interpolation
524 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
526     m_op(op),
527     m_SL(0), m_SM(0), m_SR(0)
528 jfenwick 1879 {
529 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
531 jfenwick 1886 {
532 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533 jfenwick 1886 }
534 jfenwick 1943
535 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
536 jfenwick 1943 {
537 caltinay 5972 FunctionSpace fs=getFunctionSpace();
538     Data ltemp(left);
539     Data tmp(ltemp,fs);
540     left=tmp.borrowDataPtr();
541 jfenwick 1943 }
542 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
543 jfenwick 1943 {
544 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
545     right=tmp.borrowDataPtr();
546 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547 jfenwick 1943 }
548     left->operandCheck(*right);
549    
550 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
551 jfenwick 1879 {
552 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
553 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554 jfenwick 1879 }
555     else
556     {
557 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
558 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559 jfenwick 1879 }
560     if (right->isLazy())
561     {
562 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
563 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564 jfenwick 1879 }
565     else
566     {
567 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
568 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569 jfenwick 1879 }
570 jfenwick 1889 char lt=m_left->m_readytype;
571     char rt=m_right->m_readytype;
572     if (lt=='E' || rt=='E')
573     {
574 caltinay 5972 m_readytype='E';
575 jfenwick 1889 }
576     else if (lt=='T' || rt=='T')
577     {
578 caltinay 5972 m_readytype='T';
579 jfenwick 1889 }
580     else
581     {
582 caltinay 5972 m_readytype='C';
583 jfenwick 1889 }
584 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
585 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
586     m_height=max(m_left->m_height,m_right->m_height)+1;
587 jfenwick 2500 LazyNodeSetup();
588 jfenwick 2177 SIZELIMIT
589 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590 jfenwick 1879 }
591    
592 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594     m_op(op),
595     m_axis_offset(axis_offset),
596     m_transpose(transpose)
597 jfenwick 2066 {
598     if ((getOpgroup(op)!=G_TENSORPROD))
599     {
600 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601 jfenwick 2066 }
602     if ((transpose>2) || (transpose<0))
603     {
604 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605 jfenwick 2066 }
606 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
607 jfenwick 2066 {
608 caltinay 5972 FunctionSpace fs=getFunctionSpace();
609     Data ltemp(left);
610     Data tmp(ltemp,fs);
611     left=tmp.borrowDataPtr();
612 jfenwick 2066 }
613 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
614 jfenwick 2066 {
615 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
616     right=tmp.borrowDataPtr();
617 jfenwick 2066 }
618 jfenwick 2195 // left->operandCheck(*right);
619 jfenwick 1879
620 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
621 jfenwick 2066 {
622 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
623 jfenwick 2066 }
624     else
625     {
626 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
627 jfenwick 2066 }
628     if (right->isLazy())
629     {
630 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
631 jfenwick 2066 }
632     else
633     {
634 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
635 jfenwick 2066 }
636     char lt=m_left->m_readytype;
637     char rt=m_right->m_readytype;
638     if (lt=='E' || rt=='E')
639     {
640 caltinay 5972 m_readytype='E';
641 jfenwick 2066 }
642     else if (lt=='T' || rt=='T')
643     {
644 caltinay 5972 m_readytype='T';
645 jfenwick 2066 }
646     else
647     {
648 caltinay 5972 m_readytype='C';
649 jfenwick 2066 }
650     m_samplesize=getNumDPPSample()*getNoValues();
651 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
652     m_height=max(m_left->m_height,m_right->m_height)+1;
653 jfenwick 2500 LazyNodeSetup();
654 jfenwick 2177 SIZELIMIT
655 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656 jfenwick 2066 }
657    
658    
659 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661     m_op(op),
662     m_axis_offset(axis_offset),
663     m_transpose(0),
664     m_tol(0)
665 jfenwick 2084 {
666     if ((getOpgroup(op)!=G_NP1OUT_P))
667     {
668 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669 jfenwick 2084 }
670     DataLazy_ptr lleft;
671     if (!left->isLazy())
672     {
673 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
674 jfenwick 2084 }
675     else
676     {
677 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
678 jfenwick 2084 }
679     m_readytype=lleft->m_readytype;
680     m_left=lleft;
681     m_samplesize=getNumDPPSample()*getNoValues();
682 jfenwick 2177 m_children=m_left->m_children+1;
683     m_height=m_left->m_height+1;
684 jfenwick 2500 LazyNodeSetup();
685 jfenwick 2177 SIZELIMIT
686 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687 jfenwick 2084 }
688    
689 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
691     m_op(op),
692     m_axis_offset(0),
693     m_transpose(0),
694     m_tol(tol)
695 jfenwick 2147 {
696     if ((getOpgroup(op)!=G_UNARY_P))
697     {
698 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699 jfenwick 2147 }
700     DataLazy_ptr lleft;
701     if (!left->isLazy())
702     {
703 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
704 jfenwick 2147 }
705     else
706     {
707 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
708 jfenwick 2147 }
709     m_readytype=lleft->m_readytype;
710     m_left=lleft;
711     m_samplesize=getNumDPPSample()*getNoValues();
712 jfenwick 2177 m_children=m_left->m_children+1;
713     m_height=m_left->m_height+1;
714 jfenwick 2500 LazyNodeSetup();
715 jfenwick 2177 SIZELIMIT
716 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717     }
718 jfenwick 2084
719 jfenwick 2496
720     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722     m_op(op),
723     m_axis_offset(axis0),
724     m_transpose(axis1),
725     m_tol(0)
726 jfenwick 2496 {
727     if ((getOpgroup(op)!=G_NP1OUT_2P))
728     {
729 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730 jfenwick 2496 }
731     DataLazy_ptr lleft;
732     if (!left->isLazy())
733     {
734 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
735 jfenwick 2496 }
736     else
737     {
738 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
739 jfenwick 2496 }
740     m_readytype=lleft->m_readytype;
741     m_left=lleft;
742     m_samplesize=getNumDPPSample()*getNoValues();
743     m_children=m_left->m_children+1;
744     m_height=m_left->m_height+1;
745 jfenwick 2500 LazyNodeSetup();
746 jfenwick 2496 SIZELIMIT
747     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748     }
749    
750 jfenwick 3035
751     namespace
752     {
753    
754     inline int max3(int a, int b, int c)
755     {
756 caltinay 5972 int t=(a>b?a:b);
757     return (t>c?t:c);
758 jfenwick 3035
759     }
760     }
761    
762     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
764     m_op(CONDEVAL),
765     m_axis_offset(0),
766     m_transpose(0),
767     m_tol(0)
768 jfenwick 3035 {
769    
770     DataLazy_ptr lmask;
771     DataLazy_ptr lleft;
772     DataLazy_ptr lright;
773     if (!mask->isLazy())
774     {
775 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
776 jfenwick 3035 }
777     else
778     {
779 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
780 jfenwick 3035 }
781     if (!left->isLazy())
782     {
783 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
784 jfenwick 3035 }
785     else
786     {
787 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
788 jfenwick 3035 }
789     if (!right->isLazy())
790     {
791 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
792 jfenwick 3035 }
793     else
794     {
795 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
796 jfenwick 3035 }
797     m_readytype=lmask->m_readytype;
798     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799     {
800 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
801 jfenwick 3035 }
802     m_left=lleft;
803     m_right=lright;
804     m_mask=lmask;
805     m_samplesize=getNumDPPSample()*getNoValues();
806     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808     LazyNodeSetup();
809     SIZELIMIT
810     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811     }
812    
813    
814    
815 jfenwick 1865 DataLazy::~DataLazy()
816     {
817 jfenwick 2500 delete[] m_sampleids;
818 jfenwick 1865 }
819    
820 jfenwick 1879
821 jfenwick 1899 /*
822     \brief Evaluates the expression using methods on Data.
823     This does the work for the collapse method.
824     For reasons of efficiency do not call this method on DataExpanded nodes.
825     */
826 jfenwick 1889 DataReady_ptr
827 jfenwick 4621 DataLazy::collapseToReady() const
828 jfenwick 1889 {
829     if (m_readytype=='E')
830 caltinay 5972 { // this is more an efficiency concern than anything else
831 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
832     }
833     if (m_op==IDENTITY)
834     {
835     return m_id;
836     }
837     DataReady_ptr pleft=m_left->collapseToReady();
838     Data left(pleft);
839     Data right;
840 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
841 jfenwick 1889 {
842     right=Data(m_right->collapseToReady());
843     }
844     Data result;
845     switch(m_op)
846     {
847     case ADD:
848 caltinay 5972 result=left+right;
849     break;
850     case SUB:
851     result=left-right;
852     break;
853     case MUL:
854     result=left*right;
855     break;
856     case DIV:
857     result=left/right;
858     break;
859 jfenwick 1889 case SIN:
860 caltinay 5972 result=left.sin();
861     break;
862 jfenwick 1889 case COS:
863 caltinay 5972 result=left.cos();
864     break;
865 jfenwick 1889 case TAN:
866 caltinay 5972 result=left.tan();
867     break;
868 jfenwick 1889 case ASIN:
869 caltinay 5972 result=left.asin();
870     break;
871 jfenwick 1889 case ACOS:
872 caltinay 5972 result=left.acos();
873     break;
874 jfenwick 1889 case ATAN:
875 caltinay 5972 result=left.atan();
876     break;
877 jfenwick 1889 case SINH:
878 caltinay 5972 result=left.sinh();
879     break;
880 jfenwick 1889 case COSH:
881 caltinay 5972 result=left.cosh();
882     break;
883 jfenwick 1889 case TANH:
884 caltinay 5972 result=left.tanh();
885     break;
886 jfenwick 1889 case ERF:
887 caltinay 5972 result=left.erf();
888     break;
889 jfenwick 1889 case ASINH:
890 caltinay 5972 result=left.asinh();
891     break;
892 jfenwick 1889 case ACOSH:
893 caltinay 5972 result=left.acosh();
894     break;
895 jfenwick 1889 case ATANH:
896 caltinay 5972 result=left.atanh();
897     break;
898 jfenwick 1889 case LOG10:
899 caltinay 5972 result=left.log10();
900     break;
901 jfenwick 1889 case LOG:
902 caltinay 5972 result=left.log();
903     break;
904 jfenwick 1889 case SIGN:
905 caltinay 5972 result=left.sign();
906     break;
907 jfenwick 1889 case ABS:
908 caltinay 5972 result=left.abs();
909     break;
910 jfenwick 1889 case NEG:
911 caltinay 5972 result=left.neg();
912     break;
913 jfenwick 1889 case POS:
914 caltinay 5972 // it doesn't mean anything for delayed.
915     // it will just trigger a deep copy of the lazy object
916     throw DataException("Programmer error - POS not supported for lazy data.");
917     break;
918 jfenwick 1889 case EXP:
919 caltinay 5972 result=left.exp();
920     break;
921 jfenwick 1889 case SQRT:
922 caltinay 5972 result=left.sqrt();
923     break;
924 jfenwick 1889 case RECIP:
925 caltinay 5972 result=left.oneOver();
926     break;
927 jfenwick 1889 case GZ:
928 caltinay 5972 result=left.wherePositive();
929     break;
930 jfenwick 1889 case LZ:
931 caltinay 5972 result=left.whereNegative();
932     break;
933 jfenwick 1889 case GEZ:
934 caltinay 5972 result=left.whereNonNegative();
935     break;
936 jfenwick 1889 case LEZ:
937 caltinay 5972 result=left.whereNonPositive();
938     break;
939 jfenwick 2147 case NEZ:
940 caltinay 5972 result=left.whereNonZero(m_tol);
941     break;
942 jfenwick 2147 case EZ:
943 caltinay 5972 result=left.whereZero(m_tol);
944     break;
945 jfenwick 2037 case SYM:
946 caltinay 5972 result=left.symmetric();
947     break;
948 jfenwick 2037 case NSYM:
949 caltinay 5972 result=left.nonsymmetric();
950     break;
951 jfenwick 2066 case PROD:
952 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953     break;
954 jfenwick 2084 case TRANS:
955 caltinay 5972 result=left.transpose(m_axis_offset);
956     break;
957 jfenwick 2084 case TRACE:
958 caltinay 5972 result=left.trace(m_axis_offset);
959     break;
960 jfenwick 2496 case SWAP:
961 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
962     break;
963 jfenwick 2721 case MINVAL:
964 caltinay 5972 result=left.minval();
965     break;
966 jfenwick 2721 case MAXVAL:
967 caltinay 5972 result=left.minval();
968     break;
969 jfenwick 1889 default:
970 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
971 jfenwick 1889 }
972     return result.borrowReadyPtr();
973     }
974    
975 jfenwick 1899 /*
976     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
977     This method uses the original methods on the Data class to evaluate the expressions.
978     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
979     the purpose of using DataLazy in the first place).
980     */
981 jfenwick 1889 void
982 jfenwick 4621 DataLazy::collapse() const
983 jfenwick 1889 {
984     if (m_op==IDENTITY)
985     {
986 caltinay 5972 return;
987 jfenwick 1889 }
988     if (m_readytype=='E')
989 caltinay 5972 { // this is more an efficiency concern than anything else
990 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
991     }
992     m_id=collapseToReady();
993     m_op=IDENTITY;
994     }
995    
996 jfenwick 1899
997 jfenwick 1889
998    
999    
1000 jfenwick 2147
1001 phornby 1987 #define PROC_OP(TYPE,X) \
1002 caltinay 5972 for (int j=0;j<onumsteps;++j)\
1003     {\
1004     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1005     { \
1006 jfenwick 2152 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1007 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1008 caltinay 5972 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1009 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1010 caltinay 5972 lroffset+=leftstep; \
1011     rroffset+=rightstep; \
1012     }\
1013     lroffset+=oleftstep;\
1014     rroffset+=orightstep;\
1015     }
1016 jfenwick 1889
1017 jfenwick 1899
1018 jfenwick 2500 // The result will be stored in m_samples
1019     // The return value is a pointer to the DataVector, offset is the offset within the return value
1020 jfenwick 5938 const DataTypes::RealVectorType*
1021 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1022 jfenwick 2500 {
1023     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1024 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1025 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1026     {
1027 caltinay 5972 collapse();
1028 jfenwick 2500 }
1029 caltinay 5972 if (m_op==IDENTITY)
1030 jfenwick 2500 {
1031     const ValueType& vec=m_id->getVectorRO();
1032     roffset=m_id->getPointOffset(sampleNo, 0);
1033 jfenwick 2777 #ifdef LAZY_STACK_PROF
1034     int x;
1035     if (&x<stackend[omp_get_thread_num()])
1036     {
1037     stackend[omp_get_thread_num()]=&x;
1038     }
1039     #endif
1040 jfenwick 2500 return &(vec);
1041     }
1042     if (m_readytype!='E')
1043     {
1044     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1045     }
1046 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1047     {
1048 caltinay 5972 roffset=tid*m_samplesize;
1049     return &(m_samples); // sample is already resolved
1050 jfenwick 2501 }
1051     m_sampleids[tid]=sampleNo;
1052 jfenwick 3035
1053 jfenwick 2500 switch (getOpgroup(m_op))
1054     {
1055     case G_UNARY:
1056     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1057     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1058     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1059     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1060     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1061     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1062 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1063 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1064 jfenwick 2500 default:
1065     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1066     }
1067     }
1068    
1069 jfenwick 5938 const DataTypes::RealVectorType*
1070 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1071 jfenwick 2500 {
1072 caltinay 5972 // we assume that any collapsing has been done before we get here
1073     // since we only have one argument we don't need to think about only
1074     // processing single points.
1075     // we will also know we won't get identity nodes
1076 jfenwick 2500 if (m_readytype!='E')
1077     {
1078     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1079     }
1080     if (m_op==IDENTITY)
1081     {
1082     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1083     }
1084 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1085 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1086     roffset=m_samplesize*tid;
1087     double* result=&(m_samples[roffset]);
1088     switch (m_op)
1089     {
1090 caltinay 5972 case SIN:
1091     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1092     break;
1093 jfenwick 2500 case COS:
1094 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1095     break;
1096 jfenwick 2500 case TAN:
1097 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1098     break;
1099 jfenwick 2500 case ASIN:
1100 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1101     break;
1102 jfenwick 2500 case ACOS:
1103 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1104     break;
1105 jfenwick 2500 case ATAN:
1106 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1107     break;
1108 jfenwick 2500 case SINH:
1109 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1110     break;
1111 jfenwick 2500 case COSH:
1112 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1113     break;
1114 jfenwick 2500 case TANH:
1115 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1116     break;
1117 jfenwick 2500 case ERF:
1118     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1119 caltinay 5972 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1120 jfenwick 2500 #else
1121 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::erf);
1122     break;
1123 jfenwick 2500 #endif
1124     case ASINH:
1125     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1126 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1127 jfenwick 2500 #else
1128 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1129 jfenwick 2500 #endif
1130 caltinay 5972 break;
1131 jfenwick 2500 case ACOSH:
1132     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1133 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1134 jfenwick 2500 #else
1135 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1136 jfenwick 2500 #endif
1137 caltinay 5972 break;
1138 jfenwick 2500 case ATANH:
1139     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1140 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1141 jfenwick 2500 #else
1142 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1143 jfenwick 2500 #endif
1144 caltinay 5972 break;
1145 jfenwick 2500 case LOG10:
1146 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1147     break;
1148 jfenwick 2500 case LOG:
1149 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1150     break;
1151 jfenwick 2500 case SIGN:
1152 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1153     break;
1154 jfenwick 2500 case ABS:
1155 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1156     break;
1157 jfenwick 2500 case NEG:
1158 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1159     break;
1160 jfenwick 2500 case POS:
1161 caltinay 5972 // it doesn't mean anything for delayed.
1162     // it will just trigger a deep copy of the lazy object
1163     throw DataException("Programmer error - POS not supported for lazy data.");
1164     break;
1165 jfenwick 2500 case EXP:
1166 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1167     break;
1168 jfenwick 2500 case SQRT:
1169 caltinay 5972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1170     break;
1171 jfenwick 2500 case RECIP:
1172 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1173     break;
1174 jfenwick 2500 case GZ:
1175 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1176     break;
1177 jfenwick 2500 case LZ:
1178 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1179     break;
1180 jfenwick 2500 case GEZ:
1181 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1182     break;
1183 jfenwick 2500 case LEZ:
1184 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1185     break;
1186 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1187     case NEZ:
1188 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1189     break;
1190 jfenwick 2500 case EZ:
1191 caltinay 5972 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1192     break;
1193 jfenwick 2500
1194     default:
1195 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1196 jfenwick 2500 }
1197     return &(m_samples);
1198     }
1199    
1200    
1201 jfenwick 5938 const DataTypes::RealVectorType*
1202 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1203 jfenwick 2721 {
1204 caltinay 5972 // we assume that any collapsing has been done before we get here
1205     // since we only have one argument we don't need to think about only
1206     // processing single points.
1207     // we will also know we won't get identity nodes
1208 jfenwick 2721 if (m_readytype!='E')
1209     {
1210     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1211     }
1212     if (m_op==IDENTITY)
1213     {
1214     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1215     }
1216     size_t loffset=0;
1217 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1218 jfenwick 2721
1219     roffset=m_samplesize*tid;
1220 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1221 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1222 jfenwick 2721 double* result=&(m_samples[roffset]);
1223     switch (m_op)
1224     {
1225     case MINVAL:
1226 caltinay 5972 {
1227     for (unsigned int z=0;z<ndpps;++z)
1228     {
1229     FMin op;
1230     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1231     loffset+=psize;
1232     result++;
1233     }
1234     }
1235     break;
1236 jfenwick 2721 case MAXVAL:
1237 caltinay 5972 {
1238     for (unsigned int z=0;z<ndpps;++z)
1239     {
1240     FMax op;
1241     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1242     loffset+=psize;
1243     result++;
1244     }
1245     }
1246     break;
1247 jfenwick 2721 default:
1248 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1249 jfenwick 2721 }
1250     return &(m_samples);
1251     }
1252    
1253 jfenwick 5938 const DataTypes::RealVectorType*
1254 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1255 jfenwick 2500 {
1256 caltinay 5972 // we assume that any collapsing has been done before we get here
1257     // since we only have one argument we don't need to think about only
1258     // processing single points.
1259 jfenwick 2500 if (m_readytype!='E')
1260     {
1261     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1262     }
1263     if (m_op==IDENTITY)
1264     {
1265     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1266     }
1267     size_t subroffset;
1268     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1269     roffset=m_samplesize*tid;
1270     size_t loop=0;
1271     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1272     size_t step=getNoValues();
1273     size_t offset=roffset;
1274     switch (m_op)
1275     {
1276     case SYM:
1277 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1278     {
1279     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1280     subroffset+=step;
1281     offset+=step;
1282     }
1283     break;
1284 jfenwick 2500 case NSYM:
1285 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1286     {
1287     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1288     subroffset+=step;
1289     offset+=step;
1290     }
1291     break;
1292 jfenwick 2500 default:
1293 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1294 jfenwick 2500 }
1295     return &m_samples;
1296     }
1297    
1298 jfenwick 5938 const DataTypes::RealVectorType*
1299 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1300 jfenwick 2500 {
1301 caltinay 5972 // we assume that any collapsing has been done before we get here
1302     // since we only have one argument we don't need to think about only
1303     // processing single points.
1304 jfenwick 2500 if (m_readytype!='E')
1305     {
1306     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1307     }
1308     if (m_op==IDENTITY)
1309     {
1310     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1311     }
1312     size_t subroffset;
1313     size_t offset;
1314     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1315     roffset=m_samplesize*tid;
1316     offset=roffset;
1317     size_t loop=0;
1318     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1319     size_t outstep=getNoValues();
1320     size_t instep=m_left->getNoValues();
1321     switch (m_op)
1322     {
1323     case TRACE:
1324 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1325     {
1326 jfenwick 2500 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1327 caltinay 5972 subroffset+=instep;
1328     offset+=outstep;
1329     }
1330     break;
1331 jfenwick 2500 case TRANS:
1332 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1333     {
1334 jfenwick 2500 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1335 caltinay 5972 subroffset+=instep;
1336     offset+=outstep;
1337     }
1338     break;
1339 jfenwick 2500 default:
1340 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1341 jfenwick 2500 }
1342     return &m_samples;
1343     }
1344    
1345    
1346 jfenwick 5938 const DataTypes::RealVectorType*
1347 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1348 jfenwick 2500 {
1349     if (m_readytype!='E')
1350     {
1351     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1352     }
1353     if (m_op==IDENTITY)
1354     {
1355     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1356     }
1357     size_t subroffset;
1358     size_t offset;
1359     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1360     roffset=m_samplesize*tid;
1361     offset=roffset;
1362     size_t loop=0;
1363     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1364     size_t outstep=getNoValues();
1365     size_t instep=m_left->getNoValues();
1366     switch (m_op)
1367     {
1368     case SWAP:
1369 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1370     {
1371 jfenwick 2500 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1372 caltinay 5972 subroffset+=instep;
1373     offset+=outstep;
1374     }
1375     break;
1376 jfenwick 2500 default:
1377 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1378 jfenwick 2500 }
1379     return &m_samples;
1380     }
1381    
1382 jfenwick 5938 const DataTypes::RealVectorType*
1383 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1384 jfenwick 3035 {
1385     if (m_readytype!='E')
1386     {
1387     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1388     }
1389     if (m_op!=CONDEVAL)
1390     {
1391     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1392     }
1393     size_t subroffset;
1394 jfenwick 2500
1395 jfenwick 3035 const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1396     const ValueType* srcres=0;
1397     if ((*maskres)[subroffset]>0)
1398     {
1399 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1400 jfenwick 3035 }
1401     else
1402     {
1403 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1404 jfenwick 3035 }
1405    
1406     // Now we need to copy the result
1407    
1408     roffset=m_samplesize*tid;
1409     for (int i=0;i<m_samplesize;++i)
1410     {
1411 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1412 jfenwick 3035 }
1413    
1414     return &m_samples;
1415     }
1416    
1417 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1418     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1419     // If both children are expanded, then we can process them in a single operation (we treat
1420     // the whole sample as one big datapoint.
1421     // If one of the children is not expanded, then we need to treat each point in the sample
1422     // individually.
1423     // There is an additional complication when scalar operations are considered.
1424     // For example, 2+Vector.
1425     // In this case each double within the point is treated individually
1426 jfenwick 5938 const DataTypes::RealVectorType*
1427 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1428 jfenwick 2500 {
1429     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1430    
1431 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1432     // first work out which of the children are expanded
1433 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1434     bool rightExp=(m_right->m_readytype=='E');
1435     if (!leftExp && !rightExp)
1436     {
1437 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1438 jfenwick 2500 }
1439     bool leftScalar=(m_left->getRank()==0);
1440     bool rightScalar=(m_right->getRank()==0);
1441     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1442     {
1443 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1444 jfenwick 2500 }
1445     size_t leftsize=m_left->getNoValues();
1446     size_t rightsize=m_right->getNoValues();
1447 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1448     int leftstep=0; // how far should the left offset advance after each step
1449 jfenwick 2500 int rightstep=0;
1450 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1451     int oleftstep=0; // the o variables refer to the outer loop
1452     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1453 jfenwick 2500 int onumsteps=1;
1454    
1455 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1456 jfenwick 2500 bool RES=(rightExp && rightScalar);
1457 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1458 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1459 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1460 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1461 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1462 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1463    
1464 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1465 jfenwick 2500 {
1466 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1467     leftstep=0;
1468     rightstep=0;
1469     numsteps=1;
1470 jfenwick 2500 }
1471     else if (LES || RES)
1472     {
1473 caltinay 5972 chunksize=1;
1474     if (LES) // left is an expanded scalar
1475     {
1476     if (RS)
1477     {
1478     leftstep=1;
1479     rightstep=0;
1480     numsteps=m_left->getNumDPPSample();
1481     }
1482     else // RN or REN
1483     {
1484     leftstep=0;
1485     oleftstep=1;
1486     rightstep=1;
1487     orightstep=(RN ? -(int)rightsize : 0);
1488     numsteps=rightsize;
1489     onumsteps=m_left->getNumDPPSample();
1490     }
1491     }
1492     else // right is an expanded scalar
1493     {
1494     if (LS)
1495     {
1496     rightstep=1;
1497     leftstep=0;
1498     numsteps=m_right->getNumDPPSample();
1499     }
1500     else
1501     {
1502     rightstep=0;
1503     orightstep=1;
1504     leftstep=1;
1505     oleftstep=(LN ? -(int)leftsize : 0);
1506     numsteps=leftsize;
1507     onumsteps=m_right->getNumDPPSample();
1508     }
1509     }
1510 jfenwick 2500 }
1511 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1512 jfenwick 2500 {
1513 caltinay 5972 if (LEN) // and Right will be a single value
1514     {
1515     chunksize=rightsize;
1516     leftstep=rightsize;
1517     rightstep=0;
1518     numsteps=m_left->getNumDPPSample();
1519     if (RS)
1520     {
1521     numsteps*=leftsize;
1522     }
1523     }
1524     else // REN
1525     {
1526     chunksize=leftsize;
1527     rightstep=leftsize;
1528     leftstep=0;
1529     numsteps=m_right->getNumDPPSample();
1530     if (LS)
1531     {
1532     numsteps*=rightsize;
1533     }
1534     }
1535 jfenwick 2500 }
1536    
1537 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1538     // Get the values of sub-expressions
1539     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1540 jfenwick 2500 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1541     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1542     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1543     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1544     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1545     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1546     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1547     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1548    
1549     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1550     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1551    
1552    
1553     roffset=m_samplesize*tid;
1554 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1555 jfenwick 2500 switch(m_op)
1556     {
1557     case ADD:
1558     PROC_OP(NO_ARG,plus<double>());
1559 caltinay 5972 break;
1560 jfenwick 2500 case SUB:
1561 caltinay 5972 PROC_OP(NO_ARG,minus<double>());
1562     break;
1563 jfenwick 2500 case MUL:
1564 caltinay 5972 PROC_OP(NO_ARG,multiplies<double>());
1565     break;
1566 jfenwick 2500 case DIV:
1567 caltinay 5972 PROC_OP(NO_ARG,divides<double>());
1568     break;
1569 jfenwick 2500 case POW:
1570     PROC_OP(double (double,double),::pow);
1571 caltinay 5972 break;
1572 jfenwick 2500 default:
1573 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1574 jfenwick 2500 }
1575     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1576     return &m_samples;
1577     }
1578    
1579    
1580     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1581     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1582     // unlike the other resolve helpers, we must treat these datapoints separately.
1583 jfenwick 5938 const DataTypes::RealVectorType*
1584 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1585 jfenwick 2500 {
1586     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1587    
1588 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1589     // first work out which of the children are expanded
1590 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1591     bool rightExp=(m_right->m_readytype=='E');
1592     int steps=getNumDPPSample();
1593 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1594 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1595    
1596     int resultStep=getNoValues();
1597     roffset=m_samplesize*tid;
1598     size_t offset=roffset;
1599    
1600     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1601    
1602     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1603    
1604     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1605     cout << getNoValues() << endl;)
1606    
1607    
1608     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1609     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1610     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1611     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1612     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1613     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1614     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1615    
1616 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1617 jfenwick 2500 switch(m_op)
1618     {
1619     case PROD:
1620 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1621     {
1622     const double *ptr_0 = &((*left)[lroffset]);
1623     const double *ptr_1 = &((*right)[rroffset]);
1624 jfenwick 2500
1625     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1626     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1627    
1628 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1629 jfenwick 2500
1630 caltinay 5972 lroffset+=leftStep;
1631     rroffset+=rightStep;
1632     }
1633     break;
1634 jfenwick 2500 default:
1635 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636 jfenwick 2500 }
1637     roffset=offset;
1638     return &m_samples;
1639     }
1640    
1641 jfenwick 1898
1642 jfenwick 5938 const DataTypes::RealVectorType*
1643 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1644 jfenwick 1879 {
1645 jfenwick 2271 #ifdef _OPENMP
1646 caltinay 5972 int tid=omp_get_thread_num();
1647 jfenwick 2271 #else
1648 caltinay 5972 int tid=0;
1649 jfenwick 2271 #endif
1650 jfenwick 2777
1651     #ifdef LAZY_STACK_PROF
1652 caltinay 5972 stackstart[tid]=&tid;
1653     stackend[tid]=&tid;
1654     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1655     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1656     #pragma omp critical
1657     if (d>maxstackuse)
1658     {
1659 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1660 caltinay 5972 maxstackuse=d;
1661     }
1662     return r;
1663 jfenwick 2777 #else
1664 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1665 jfenwick 2777 #endif
1666 jfenwick 2271 }
1667    
1668    
1669 jfenwick 2497 // This needs to do the work of the identity constructor
1670 jfenwick 2177 void
1671     DataLazy::resolveToIdentity()
1672     {
1673     if (m_op==IDENTITY)
1674 caltinay 5972 return;
1675 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1676 jfenwick 2177 makeIdentity(p);
1677     }
1678 jfenwick 1889
1679 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1680     {
1681     m_op=IDENTITY;
1682     m_axis_offset=0;
1683     m_transpose=0;
1684     m_SL=m_SM=m_SR=0;
1685     m_children=m_height=0;
1686     m_id=p;
1687     if(p->isConstant()) {m_readytype='C';}
1688     else if(p->isExpanded()) {m_readytype='E';}
1689     else if (p->isTagged()) {m_readytype='T';}
1690     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1691     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1692     m_left.reset();
1693     m_right.reset();
1694     }
1695    
1696 jfenwick 2497
1697     DataReady_ptr
1698     DataLazy::resolve()
1699     {
1700     resolveToIdentity();
1701     return m_id;
1702     }
1703    
1704 jfenwick 2799
1705     /* This is really a static method but I think that caused problems in windows */
1706     void
1707     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1708     {
1709     if (dats.empty())
1710     {
1711 caltinay 5972 return;
1712 jfenwick 2799 }
1713     vector<DataLazy*> work;
1714     FunctionSpace fs=dats[0]->getFunctionSpace();
1715     bool match=true;
1716 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1717 jfenwick 2799 {
1718 caltinay 5972 if (dats[i]->m_readytype!='E')
1719     {
1720     dats[i]->collapse();
1721     }
1722     if (dats[i]->m_op!=IDENTITY)
1723     {
1724     work.push_back(dats[i]);
1725     if (fs!=dats[i]->getFunctionSpace())
1726     {
1727     match=false;
1728     }
1729     }
1730 jfenwick 2799 }
1731     if (work.empty())
1732     {
1733 caltinay 5972 return; // no work to do
1734 jfenwick 2799 }
1735 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1736     { // it is possible that dats[0] is one of the objects which we discarded and
1737     // all the other functionspaces match.
1738     vector<DataExpanded*> dep;
1739     vector<ValueType*> vecs;
1740     for (int i=0;i<work.size();++i)
1741     {
1742     dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1743     vecs.push_back(&(dep[i]->getVectorRW()));
1744     }
1745     int totalsamples=work[0]->getNumSamples();
1746     const ValueType* res=0; // Storage for answer
1747     int sample;
1748     #pragma omp parallel private(sample, res)
1749     {
1750     size_t roffset=0;
1751     #pragma omp for schedule(static)
1752     for (sample=0;sample<totalsamples;++sample)
1753     {
1754     roffset=0;
1755     int j;
1756     for (j=work.size()-1;j>=0;--j)
1757     {
1758 jfenwick 2799 #ifdef _OPENMP
1759 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1760 jfenwick 2799 #else
1761 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1762 jfenwick 2799 #endif
1763 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1764     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1765     }
1766     }
1767     }
1768     // Now we need to load the new results as identity ops into the lazy nodes
1769     for (int i=work.size()-1;i>=0;--i)
1770     {
1771 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1772 caltinay 5972 }
1773 jfenwick 2799 }
1774 caltinay 5972 else // functionspaces do not match
1775 jfenwick 2799 {
1776 caltinay 5972 for (int i=0;i<work.size();++i)
1777     {
1778     work[i]->resolveToIdentity();
1779     }
1780 jfenwick 2799 }
1781     }
1782    
1783    
1784    
1785 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1786     DataReady_ptr
1787     DataLazy::resolveNodeWorker()
1788     {
1789 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1790 jfenwick 2500 {
1791     collapse();
1792     }
1793 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1794 jfenwick 2500 {
1795     return m_id;
1796     }
1797 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1798 jfenwick 2500 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1799     ValueType& resvec=result->getVectorRW();
1800     DataReady_ptr resptr=DataReady_ptr(result);
1801    
1802     int sample;
1803     int totalsamples=getNumSamples();
1804 caltinay 5972 const ValueType* res=0; // Storage for answer
1805 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1806 jfenwick 2777 #pragma omp parallel private(sample,res)
1807 jfenwick 2500 {
1808 caltinay 5972 size_t roffset=0;
1809 jfenwick 2777 #ifdef LAZY_STACK_PROF
1810 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1811     stackend[omp_get_thread_num()]=&roffset;
1812 jfenwick 2777 #endif
1813 caltinay 5972 #pragma omp for schedule(static)
1814     for (sample=0;sample<totalsamples;++sample)
1815     {
1816     roffset=0;
1817 jfenwick 2500 #ifdef _OPENMP
1818 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1819 jfenwick 2500 #else
1820 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1821 jfenwick 2500 #endif
1822     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1823     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1824 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1825     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1826     }
1827 jfenwick 2500 }
1828 jfenwick 2777 #ifdef LAZY_STACK_PROF
1829     for (int i=0;i<getNumberOfThreads();++i)
1830     {
1831 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1832     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1833     if (r>maxstackuse)
1834     {
1835     maxstackuse=r;
1836     }
1837 jfenwick 2777 }
1838     cout << "Max resolve Stack use=" << maxstackuse << endl;
1839     #endif
1840 jfenwick 2500 return resptr;
1841     }
1842    
1843 jfenwick 1865 std::string
1844     DataLazy::toString() const
1845     {
1846 jfenwick 1886 ostringstream oss;
1847 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1848 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1849 jfenwick 2737 {
1850 caltinay 5972 case 1: // tree format
1851     oss << endl;
1852     intoTreeString(oss,"");
1853     break;
1854     case 2: // just the depth
1855     break;
1856 jfenwick 2795 default:
1857 caltinay 5972 intoString(oss);
1858     break;
1859 jfenwick 2737 }
1860 jfenwick 1886 return oss.str();
1861 jfenwick 1865 }
1862    
1863 jfenwick 1899
1864 jfenwick 1886 void
1865     DataLazy::intoString(ostringstream& oss) const
1866     {
1867 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1868 jfenwick 1886 switch (getOpgroup(m_op))
1869     {
1870 jfenwick 1889 case G_IDENTITY:
1871 caltinay 5972 if (m_id->isExpanded())
1872     {
1873     oss << "E";
1874     }
1875     else if (m_id->isTagged())
1876     {
1877     oss << "T";
1878     }
1879     else if (m_id->isConstant())
1880     {
1881     oss << "C";
1882     }
1883     else
1884     {
1885     oss << "?";
1886     }
1887     oss << '@' << m_id.get();
1888     break;
1889 jfenwick 1886 case G_BINARY:
1890 caltinay 5972 oss << '(';
1891     m_left->intoString(oss);
1892     oss << ' ' << opToString(m_op) << ' ';
1893     m_right->intoString(oss);
1894     oss << ')';
1895     break;
1896 jfenwick 1886 case G_UNARY:
1897 jfenwick 2147 case G_UNARY_P:
1898 jfenwick 2037 case G_NP1OUT:
1899 jfenwick 2084 case G_NP1OUT_P:
1900 jfenwick 2721 case G_REDUCTION:
1901 caltinay 5972 oss << opToString(m_op) << '(';
1902     m_left->intoString(oss);
1903     oss << ')';
1904     break;
1905 jfenwick 2066 case G_TENSORPROD:
1906 caltinay 5972 oss << opToString(m_op) << '(';
1907     m_left->intoString(oss);
1908     oss << ", ";
1909     m_right->intoString(oss);
1910     oss << ')';
1911     break;
1912 jfenwick 2496 case G_NP1OUT_2P:
1913 caltinay 5972 oss << opToString(m_op) << '(';
1914     m_left->intoString(oss);
1915     oss << ", " << m_axis_offset << ", " << m_transpose;
1916     oss << ')';
1917     break;
1918 jfenwick 3035 case G_CONDEVAL:
1919 caltinay 5972 oss << opToString(m_op)<< '(' ;
1920     m_mask->intoString(oss);
1921     oss << " ? ";
1922     m_left->intoString(oss);
1923     oss << " : ";
1924     m_right->intoString(oss);
1925     oss << ')';
1926     break;
1927 jfenwick 1886 default:
1928 caltinay 5972 oss << "UNKNOWN";
1929 jfenwick 1886 }
1930     }
1931    
1932 jfenwick 2737
1933     void
1934     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1935     {
1936     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1937     switch (getOpgroup(m_op))
1938     {
1939     case G_IDENTITY:
1940 caltinay 5972 if (m_id->isExpanded())
1941     {
1942     oss << "E";
1943     }
1944     else if (m_id->isTagged())
1945     {
1946     oss << "T";
1947     }
1948     else if (m_id->isConstant())
1949     {
1950     oss << "C";
1951     }
1952     else
1953     {
1954     oss << "?";
1955     }
1956     oss << '@' << m_id.get() << endl;
1957     break;
1958 jfenwick 2737 case G_BINARY:
1959 caltinay 5972 oss << opToString(m_op) << endl;
1960     indent+='.';
1961     m_left->intoTreeString(oss, indent);
1962     m_right->intoTreeString(oss, indent);
1963     break;
1964 jfenwick 2737 case G_UNARY:
1965     case G_UNARY_P:
1966     case G_NP1OUT:
1967     case G_NP1OUT_P:
1968     case G_REDUCTION:
1969 caltinay 5972 oss << opToString(m_op) << endl;
1970     indent+='.';
1971     m_left->intoTreeString(oss, indent);
1972     break;
1973 jfenwick 2737 case G_TENSORPROD:
1974 caltinay 5972 oss << opToString(m_op) << endl;
1975     indent+='.';
1976     m_left->intoTreeString(oss, indent);
1977     m_right->intoTreeString(oss, indent);
1978     break;
1979 jfenwick 2737 case G_NP1OUT_2P:
1980 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1981     indent+='.';
1982     m_left->intoTreeString(oss, indent);
1983     break;
1984 jfenwick 2737 default:
1985 caltinay 5972 oss << "UNKNOWN";
1986 jfenwick 2737 }
1987     }
1988    
1989    
1990 jfenwick 1865 DataAbstract*
1991 jfenwick 5938 DataLazy::deepCopy() const
1992 jfenwick 1865 {
1993 jfenwick 1935 switch (getOpgroup(m_op))
1994 jfenwick 1865 {
1995 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1996 caltinay 5972 case G_UNARY:
1997 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1998 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1999     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2000 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2001     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2003     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2004 jfenwick 1935 default:
2005 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2006 jfenwick 1865 }
2007     }
2008    
2009    
2010 jfenwick 2721
2011 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2012     // Instances of DataReady can look at the size of their vectors.
2013     // For lazy though, it could be the size the data would be if it were resolved;
2014     // or it could be some function of the lengths of the DataReady instances which
2015     // form part of the expression.
2016     // Rather than have people making assumptions, I have disabled the method.
2017 jfenwick 5938 DataTypes::RealVectorType::size_type
2018 jfenwick 1865 DataLazy::getLength() const
2019     {
2020 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2021 jfenwick 1865 }
2022    
2023    
2024     DataAbstract*
2025     DataLazy::getSlice(const DataTypes::RegionType& region) const
2026     {
2027 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2028 jfenwick 1865 }
2029    
2030 jfenwick 1935
2031     // To do this we need to rely on our child nodes
2032 jfenwick 5938 DataTypes::RealVectorType::size_type
2033 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2034 jfenwick 1935 int dataPointNo)
2035     {
2036     if (m_op==IDENTITY)
2037     {
2038 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2039 jfenwick 1935 }
2040     if (m_readytype!='E')
2041     {
2042 caltinay 5972 collapse();
2043     return m_id->getPointOffset(sampleNo,dataPointNo);
2044 jfenwick 1935 }
2045     // at this point we do not have an identity node and the expression will be Expanded
2046     // so we only need to know which child to ask
2047     if (m_left->m_readytype=='E')
2048     {
2049 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2050 jfenwick 1935 }
2051     else
2052     {
2053 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2054 jfenwick 1935 }
2055     }
2056    
2057     // To do this we need to rely on our child nodes
2058 jfenwick 5938 DataTypes::RealVectorType::size_type
2059 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2060 jfenwick 1865 int dataPointNo) const
2061     {
2062 jfenwick 1935 if (m_op==IDENTITY)
2063     {
2064 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2065 jfenwick 1935 }
2066     if (m_readytype=='E')
2067     {
2068     // at this point we do not have an identity node and the expression will be Expanded
2069     // so we only need to know which child to ask
2070     if (m_left->m_readytype=='E')
2071     {
2072 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2073 jfenwick 1935 }
2074     else
2075     {
2076 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2077 jfenwick 1935 }
2078     }
2079     if (m_readytype=='C')
2080     {
2081 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2082 jfenwick 1935 }
2083     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2084 jfenwick 1865 }
2085    
2086 jfenwick 2271
2087     // I have decided to let Data:: handle this issue.
2088 jfenwick 1901 void
2089     DataLazy::setToZero()
2090     {
2091 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2092 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2093     // m_op=IDENTITY;
2094     // m_right.reset();
2095     // m_left.reset();
2096     // m_readytype='C';
2097     // m_buffsRequired=1;
2098    
2099 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2100 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2101 jfenwick 1901 }
2102    
2103 jfenwick 2271 bool
2104     DataLazy::actsExpanded() const
2105     {
2106 caltinay 5972 return (m_readytype=='E');
2107 jfenwick 2271 }
2108    
2109 caltinay 5997 } // end namespace
2110 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26