/[escript]/trunk/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6144 - (hide annotations)
Wed Apr 6 05:25:13 2016 UTC (3 years ago) by caltinay
File size: 66467 byte(s)
last round of namespacing defines.

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
26 jfenwick 2199
27 jfenwick 5938 using namespace escript::DataTypes;
28    
29 caltinay 5972 #define NO_ARG
30    
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42 jfenwick 2177
43 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44 jfenwick 2472
45 jfenwick 2501
46 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47    
48 jfenwick 1899 /*
49     How does DataLazy work?
50     ~~~~~~~~~~~~~~~~~~~~~~~
51    
52     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
53     denoted left and right.
54    
55     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
56     This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59 caltinay 4286 Note that IDENTITY is not considered a unary operation.
60 jfenwick 1899
61     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62     It must however form a DAG (directed acyclic graph).
63     I will refer to individual DataLazy objects with the structure as nodes.
64    
65     Each node also stores:
66     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67 caltinay 5972 evaluated.
68 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69 caltinay 5972 evaluate the expression.
70 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
71    
72     When a new node is created, the above values are computed based on the values in the child nodes.
73     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
74    
75     The resolve method, which produces a DataReady from a DataLazy, does the following:
76     1) Create a DataReady to hold the new result.
77     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
78     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
79    
80     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
81    
82     resolveSample returns a Vector* and an offset within that vector where the result is stored.
83     Normally, this would be v, but for identity nodes their internal vector is returned instead.
84    
85     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
86    
87     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89 jfenwick 2037
90 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91 jfenwick 2037 1) Add to the ES_optype.
92     2) determine what opgroup your operation belongs to (X)
93     3) add a string for the op to the end of ES_opstrings
94     4) increase ES_opcount
95     5) add an entry (X) to opgroups
96     6) add an entry to the switch in collapseToReady
97     7) add an entry to resolveX
98 jfenwick 1899 */
99    
100    
101 jfenwick 1865 using namespace std;
102 jfenwick 1868 using namespace boost;
103 jfenwick 1865
104     namespace escript
105     {
106    
107     namespace
108     {
109    
110 jfenwick 2777
111     // enabling this will print out when ever the maximum stacksize used by resolve increases
112     // it assumes _OPENMP is also in use
113     //#define LAZY_STACK_PROF
114    
115    
116    
117     #ifndef _OPENMP
118     #ifdef LAZY_STACK_PROF
119     #undef LAZY_STACK_PROF
120     #endif
121     #endif
122    
123    
124     #ifdef LAZY_STACK_PROF
125     std::vector<void*> stackstart(getNumberOfThreads());
126     std::vector<void*> stackend(getNumberOfThreads());
127     size_t maxstackuse=0;
128     #endif
129    
130 jfenwick 1886 enum ES_opgroup
131     {
132     G_UNKNOWN,
133     G_IDENTITY,
134 caltinay 5972 G_BINARY, // pointwise operations with two arguments
135     G_UNARY, // pointwise operations with one argument
136     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
137     G_NP1OUT, // non-pointwise op with one output
138     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
139     G_TENSORPROD, // general tensor product
140     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
141     G_REDUCTION, // non-pointwise unary op with a scalar output
142 jfenwick 3035 G_CONDEVAL
143 jfenwick 1886 };
144    
145    
146    
147    
148 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
149 caltinay 5972 "sin","cos","tan",
150     "asin","acos","atan","sinh","cosh","tanh","erf",
151     "asinh","acosh","atanh",
152     "log10","log","sign","abs","neg","pos","exp","sqrt",
153     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
154 jfenwick 6114 "symmetric","antisymmetric",
155 caltinay 5972 "prod",
156     "transpose", "trace",
157     "swapaxes",
158     "minval", "maxval",
159 jfenwick 6125 "condEval",
160     "hermitian","antihermitian"
161     };
162     int ES_opcount=46;
163 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
164 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
165     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
166     G_UNARY,G_UNARY,G_UNARY, // 20
167     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
168     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
169     G_NP1OUT,G_NP1OUT,
170     G_TENSORPROD,
171     G_NP1OUT_P, G_NP1OUT_P,
172     G_NP1OUT_2P,
173     G_REDUCTION, G_REDUCTION,
174 jfenwick 6125 G_CONDEVAL,
175     G_UNARY,G_UNARY
176     };
177 jfenwick 1886 inline
178     ES_opgroup
179     getOpgroup(ES_optype op)
180     {
181     return opgroups[op];
182     }
183    
184 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
185     FunctionSpace
186     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 caltinay 5972 // perhaps this should call interpolate and throw or something?
189     // maybe we need an interpolate node -
190     // that way, if interpolate is required in any other op we can just throw a
191     // programming error exception.
192 jfenwick 1879
193 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
194     FunctionSpace r=right->getFunctionSpace();
195     if (l!=r)
196     {
197 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198     if (res==1)
199 jfenwick 1943 {
200 caltinay 5972 return l;
201 jfenwick 1943 }
202 jfenwick 4264 if (res==-1)
203 jfenwick 1943 {
204 caltinay 5972 return r;
205 jfenwick 1943 }
206     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207     }
208     return l;
209 jfenwick 1865 }
210    
211     // return the shape of the result of "left op right"
212 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213 jfenwick 1865 DataTypes::ShapeType
214     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215     {
216 caltinay 5972 if (left->getShape()!=right->getShape())
217     {
218     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219     {
220     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221     }
222 jfenwick 2721
223 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
224     {
225     return right->getShape();
226     }
227     if (right->getRank()==0)
228     {
229     return left->getShape();
230     }
231     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232     }
233     return left->getShape();
234 jfenwick 1865 }
235    
236 jfenwick 2084 // return the shape for "op left"
237    
238     DataTypes::ShapeType
239 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240 jfenwick 2084 {
241 caltinay 5972 switch(op)
242     {
243     case TRANS:
244     { // for the scoping of variables
245     const DataTypes::ShapeType& s=left->getShape();
246     DataTypes::ShapeType sh;
247     int rank=left->getRank();
248     if (axis_offset<0 || axis_offset>rank)
249     {
250 caltinay 2779 stringstream e;
251     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252     throw DataException(e.str());
253 caltinay 5972 }
254     for (int i=0; i<rank; i++)
255     {
256     int index = (axis_offset+i)%rank;
257     sh.push_back(s[index]); // Append to new shape
258     }
259     return sh;
260     }
261     break;
262     case TRACE:
263     {
264     int rank=left->getRank();
265     if (rank<2)
266     {
267     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268     }
269     if ((axis_offset>rank-2) || (axis_offset<0))
270     {
271     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272     }
273     if (rank==2)
274     {
275     return DataTypes::scalarShape;
276     }
277     else if (rank==3)
278     {
279     DataTypes::ShapeType sh;
280     if (axis_offset==0)
281     {
282     sh.push_back(left->getShape()[2]);
283     }
284     else // offset==1
285     {
286     sh.push_back(left->getShape()[0]);
287     }
288     return sh;
289     }
290     else if (rank==4)
291     {
292     DataTypes::ShapeType sh;
293     const DataTypes::ShapeType& s=left->getShape();
294     if (axis_offset==0)
295     {
296     sh.push_back(s[2]);
297     sh.push_back(s[3]);
298     }
299     else if (axis_offset==1)
300     {
301     sh.push_back(s[0]);
302     sh.push_back(s[3]);
303     }
304     else // offset==2
305     {
306     sh.push_back(s[0]);
307     sh.push_back(s[1]);
308     }
309     return sh;
310     }
311     else // unknown rank
312     {
313     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314     }
315     }
316     break;
317     default:
318     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319     }
320 jfenwick 2084 }
321    
322 jfenwick 2496 DataTypes::ShapeType
323     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324     {
325     // This code taken from the Data.cpp swapaxes() method
326     // Some of the checks are probably redundant here
327     int axis0_tmp,axis1_tmp;
328     const DataTypes::ShapeType& s=left->getShape();
329     DataTypes::ShapeType out_shape;
330     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     int rank=left->getRank();
333     if (rank<2) {
334     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335     }
336     if (axis0<0 || axis0>rank-1) {
337 caltinay 2779 stringstream e;
338     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339     throw DataException(e.str());
340 jfenwick 2496 }
341     if (axis1<0 || axis1>rank-1) {
342 caltinay 2782 stringstream e;
343 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344     throw DataException(e.str());
345 jfenwick 2496 }
346     if (axis0 == axis1) {
347     throw DataException("Error - Data::swapaxes: axis indices must be different.");
348     }
349     if (axis0 > axis1) {
350     axis0_tmp=axis1;
351     axis1_tmp=axis0;
352     } else {
353     axis0_tmp=axis0;
354     axis1_tmp=axis1;
355     }
356     for (int i=0; i<rank; i++) {
357     if (i == axis0_tmp) {
358     out_shape.push_back(s[axis1_tmp]);
359     } else if (i == axis1_tmp) {
360     out_shape.push_back(s[axis0_tmp]);
361     } else {
362     out_shape.push_back(s[i]);
363     }
364     }
365     return out_shape;
366     }
367    
368    
369 jfenwick 2066 // determine the output shape for the general tensor product operation
370     // the additional parameters return information required later for the product
371     // the majority of this code is copy pasted from C_General_Tensor_Product
372     DataTypes::ShapeType
373     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374 jfenwick 1865 {
375 caltinay 5972
376 jfenwick 2066 // Get rank and shape of inputs
377     int rank0 = left->getRank();
378     int rank1 = right->getRank();
379     const DataTypes::ShapeType& shape0 = left->getShape();
380     const DataTypes::ShapeType& shape1 = right->getShape();
381    
382     // Prepare for the loops of the product and verify compatibility of shapes
383     int start0=0, start1=0;
384 caltinay 5972 if (transpose == 0) {}
385     else if (transpose == 1) { start0 = axis_offset; }
386     else if (transpose == 2) { start1 = rank1-axis_offset; }
387     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388 jfenwick 2066
389 jfenwick 2085 if (rank0<axis_offset)
390     {
391 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392 jfenwick 2085 }
393 jfenwick 2066
394     // Adjust the shapes for transpose
395 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
396     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
397     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399 jfenwick 2066
400     // Prepare for the loops of the product
401     SL=1, SM=1, SR=1;
402 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
403 jfenwick 2066 SL *= tmpShape0[i];
404     }
405 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
406 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408     }
409     SM *= tmpShape0[i];
410     }
411 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
412 jfenwick 2066 SR *= tmpShape1[i];
413     }
414    
415     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
417     { // block to limit the scope of out_index
418 jfenwick 2066 int out_index=0;
419     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421     }
422 jfenwick 2086
423     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424     {
425     ostringstream os;
426     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427     throw DataException(os.str());
428     }
429    
430 jfenwick 2066 return shape2;
431 jfenwick 1865 }
432    
433 caltinay 5972 } // end anonymous namespace
434 jfenwick 1865
435    
436 jfenwick 1899
437     // Return a string representing the operation
438 jfenwick 1865 const std::string&
439     opToString(ES_optype op)
440     {
441     if (op<0 || op>=ES_opcount)
442     {
443     op=UNKNOWNOP;
444     }
445     return ES_opstrings[op];
446     }
447    
448 jfenwick 2500 void DataLazy::LazyNodeSetup()
449     {
450     #ifdef _OPENMP
451     int numthreads=omp_get_max_threads();
452     m_samples.resize(numthreads*m_samplesize);
453     m_sampleids=new int[numthreads];
454     for (int i=0;i<numthreads;++i)
455     {
456     m_sampleids[i]=-1;
457     }
458     #else
459     m_samples.resize(m_samplesize);
460     m_sampleids=new int[1];
461     m_sampleids[0]=-1;
462     #endif // _OPENMP
463     }
464 jfenwick 1865
465 jfenwick 2500
466     // Creates an identity node
467 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
468 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
469     ,m_sampleids(0),
470     m_samples(1)
471 jfenwick 1865 {
472 jfenwick 1879 if (p->isLazy())
473     {
474 caltinay 5972 // I don't want identity of Lazy.
475     // Question: Why would that be so bad?
476     // Answer: We assume that the child of ID is something we can call getVector on
477     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478 jfenwick 1879 }
479     else
480     {
481 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
482     makeIdentity(dr);
483 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
484 jfenwick 1879 }
485 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
486 jfenwick 1865 }
487    
488 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
489 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
490     m_op(op),
491     m_axis_offset(0),
492     m_transpose(0),
493     m_SL(0), m_SM(0), m_SR(0)
494 jfenwick 1886 {
495 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
496 jfenwick 1886 {
497 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
498 jfenwick 1886 }
499 jfenwick 2066
500 jfenwick 1886 DataLazy_ptr lleft;
501     if (!left->isLazy())
502     {
503 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
504 jfenwick 1886 }
505     else
506     {
507 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
508 jfenwick 1886 }
509 jfenwick 1889 m_readytype=lleft->m_readytype;
510 jfenwick 1886 m_left=lleft;
511     m_samplesize=getNumDPPSample()*getNoValues();
512 jfenwick 2177 m_children=m_left->m_children+1;
513     m_height=m_left->m_height+1;
514 jfenwick 2500 LazyNodeSetup();
515 jfenwick 2177 SIZELIMIT
516 jfenwick 1886 }
517    
518    
519 jfenwick 1943 // In this constructor we need to consider interpolation
520 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
521 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
522     m_op(op),
523     m_SL(0), m_SM(0), m_SR(0)
524 jfenwick 1879 {
525 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
526 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
527 jfenwick 1886 {
528 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
529 jfenwick 1886 }
530 jfenwick 1943
531 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
532 jfenwick 1943 {
533 caltinay 5972 FunctionSpace fs=getFunctionSpace();
534     Data ltemp(left);
535     Data tmp(ltemp,fs);
536     left=tmp.borrowDataPtr();
537 jfenwick 1943 }
538 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
539 jfenwick 1943 {
540 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
541     right=tmp.borrowDataPtr();
542 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
543 jfenwick 1943 }
544     left->operandCheck(*right);
545    
546 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
547 jfenwick 1879 {
548 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
549 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
550 jfenwick 1879 }
551     else
552     {
553 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
554 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
555 jfenwick 1879 }
556     if (right->isLazy())
557     {
558 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
559 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
560 jfenwick 1879 }
561     else
562     {
563 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
564 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
565 jfenwick 1879 }
566 jfenwick 1889 char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')
569     {
570 caltinay 5972 m_readytype='E';
571 jfenwick 1889 }
572     else if (lt=='T' || rt=='T')
573     {
574 caltinay 5972 m_readytype='T';
575 jfenwick 1889 }
576     else
577     {
578 caltinay 5972 m_readytype='C';
579 jfenwick 1889 }
580 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
581 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;
583 jfenwick 2500 LazyNodeSetup();
584 jfenwick 2177 SIZELIMIT
585 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
586 jfenwick 1879 }
587    
588 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
589 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
590     m_op(op),
591     m_axis_offset(axis_offset),
592     m_transpose(transpose)
593 jfenwick 2066 {
594     if ((getOpgroup(op)!=G_TENSORPROD))
595     {
596 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
597 jfenwick 2066 }
598     if ((transpose>2) || (transpose<0))
599     {
600 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
601 jfenwick 2066 }
602 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
603 jfenwick 2066 {
604 caltinay 5972 FunctionSpace fs=getFunctionSpace();
605     Data ltemp(left);
606     Data tmp(ltemp,fs);
607     left=tmp.borrowDataPtr();
608 jfenwick 2066 }
609 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
610 jfenwick 2066 {
611 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
612     right=tmp.borrowDataPtr();
613 jfenwick 2066 }
614 jfenwick 2195 // left->operandCheck(*right);
615 jfenwick 1879
616 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
617 jfenwick 2066 {
618 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
619 jfenwick 2066 }
620     else
621     {
622 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
623 jfenwick 2066 }
624     if (right->isLazy())
625     {
626 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
627 jfenwick 2066 }
628     else
629     {
630 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
631 jfenwick 2066 }
632     char lt=m_left->m_readytype;
633     char rt=m_right->m_readytype;
634     if (lt=='E' || rt=='E')
635     {
636 caltinay 5972 m_readytype='E';
637 jfenwick 2066 }
638     else if (lt=='T' || rt=='T')
639     {
640 caltinay 5972 m_readytype='T';
641 jfenwick 2066 }
642     else
643     {
644 caltinay 5972 m_readytype='C';
645 jfenwick 2066 }
646     m_samplesize=getNumDPPSample()*getNoValues();
647 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
648     m_height=max(m_left->m_height,m_right->m_height)+1;
649 jfenwick 2500 LazyNodeSetup();
650 jfenwick 2177 SIZELIMIT
651 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
652 jfenwick 2066 }
653    
654    
655 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
656 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
657     m_op(op),
658     m_axis_offset(axis_offset),
659     m_transpose(0),
660     m_tol(0)
661 jfenwick 2084 {
662     if ((getOpgroup(op)!=G_NP1OUT_P))
663     {
664 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
665 jfenwick 2084 }
666     DataLazy_ptr lleft;
667     if (!left->isLazy())
668     {
669 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
670 jfenwick 2084 }
671     else
672     {
673 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
674 jfenwick 2084 }
675     m_readytype=lleft->m_readytype;
676     m_left=lleft;
677     m_samplesize=getNumDPPSample()*getNoValues();
678 jfenwick 2177 m_children=m_left->m_children+1;
679     m_height=m_left->m_height+1;
680 jfenwick 2500 LazyNodeSetup();
681 jfenwick 2177 SIZELIMIT
682 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
683 jfenwick 2084 }
684    
685 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
686 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
687     m_op(op),
688     m_axis_offset(0),
689     m_transpose(0),
690     m_tol(tol)
691 jfenwick 2147 {
692     if ((getOpgroup(op)!=G_UNARY_P))
693     {
694 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
695 jfenwick 2147 }
696     DataLazy_ptr lleft;
697     if (!left->isLazy())
698     {
699 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
700 jfenwick 2147 }
701     else
702     {
703 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
704 jfenwick 2147 }
705     m_readytype=lleft->m_readytype;
706     m_left=lleft;
707     m_samplesize=getNumDPPSample()*getNoValues();
708 jfenwick 2177 m_children=m_left->m_children+1;
709     m_height=m_left->m_height+1;
710 jfenwick 2500 LazyNodeSetup();
711 jfenwick 2177 SIZELIMIT
712 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
713     }
714 jfenwick 2084
715 jfenwick 2496
716     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
717 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
718     m_op(op),
719     m_axis_offset(axis0),
720     m_transpose(axis1),
721     m_tol(0)
722 jfenwick 2496 {
723     if ((getOpgroup(op)!=G_NP1OUT_2P))
724     {
725 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
726 jfenwick 2496 }
727     DataLazy_ptr lleft;
728     if (!left->isLazy())
729     {
730 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
731 jfenwick 2496 }
732     else
733     {
734 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
735 jfenwick 2496 }
736     m_readytype=lleft->m_readytype;
737     m_left=lleft;
738     m_samplesize=getNumDPPSample()*getNoValues();
739     m_children=m_left->m_children+1;
740     m_height=m_left->m_height+1;
741 jfenwick 2500 LazyNodeSetup();
742 jfenwick 2496 SIZELIMIT
743     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
744     }
745    
746 jfenwick 3035
747     namespace
748     {
749    
750     inline int max3(int a, int b, int c)
751     {
752 caltinay 5972 int t=(a>b?a:b);
753     return (t>c?t:c);
754 jfenwick 3035
755     }
756     }
757    
758     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
759 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
760     m_op(CONDEVAL),
761     m_axis_offset(0),
762     m_transpose(0),
763     m_tol(0)
764 jfenwick 3035 {
765    
766     DataLazy_ptr lmask;
767     DataLazy_ptr lleft;
768     DataLazy_ptr lright;
769     if (!mask->isLazy())
770     {
771 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
772 jfenwick 3035 }
773     else
774     {
775 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
776 jfenwick 3035 }
777     if (!left->isLazy())
778     {
779 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
780 jfenwick 3035 }
781     else
782     {
783 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
784 jfenwick 3035 }
785     if (!right->isLazy())
786     {
787 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
788 jfenwick 3035 }
789     else
790     {
791 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
792 jfenwick 3035 }
793     m_readytype=lmask->m_readytype;
794     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
795     {
796 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
797 jfenwick 3035 }
798     m_left=lleft;
799     m_right=lright;
800     m_mask=lmask;
801     m_samplesize=getNumDPPSample()*getNoValues();
802     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
803     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
804     LazyNodeSetup();
805     SIZELIMIT
806     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
807     }
808    
809    
810    
811 jfenwick 1865 DataLazy::~DataLazy()
812     {
813 jfenwick 2500 delete[] m_sampleids;
814 jfenwick 1865 }
815    
816 jfenwick 1879
817 jfenwick 1899 /*
818     \brief Evaluates the expression using methods on Data.
819     This does the work for the collapse method.
820     For reasons of efficiency do not call this method on DataExpanded nodes.
821     */
822 jfenwick 1889 DataReady_ptr
823 jfenwick 4621 DataLazy::collapseToReady() const
824 jfenwick 1889 {
825     if (m_readytype=='E')
826 caltinay 5972 { // this is more an efficiency concern than anything else
827 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
828     }
829     if (m_op==IDENTITY)
830     {
831     return m_id;
832     }
833     DataReady_ptr pleft=m_left->collapseToReady();
834     Data left(pleft);
835     Data right;
836 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
837 jfenwick 1889 {
838     right=Data(m_right->collapseToReady());
839     }
840     Data result;
841     switch(m_op)
842     {
843     case ADD:
844 caltinay 5972 result=left+right;
845     break;
846     case SUB:
847     result=left-right;
848     break;
849     case MUL:
850     result=left*right;
851     break;
852     case DIV:
853     result=left/right;
854     break;
855 jfenwick 1889 case SIN:
856 caltinay 5972 result=left.sin();
857     break;
858 jfenwick 1889 case COS:
859 caltinay 5972 result=left.cos();
860     break;
861 jfenwick 1889 case TAN:
862 caltinay 5972 result=left.tan();
863     break;
864 jfenwick 1889 case ASIN:
865 caltinay 5972 result=left.asin();
866     break;
867 jfenwick 1889 case ACOS:
868 caltinay 5972 result=left.acos();
869     break;
870 jfenwick 1889 case ATAN:
871 caltinay 5972 result=left.atan();
872     break;
873 jfenwick 1889 case SINH:
874 caltinay 5972 result=left.sinh();
875     break;
876 jfenwick 1889 case COSH:
877 caltinay 5972 result=left.cosh();
878     break;
879 jfenwick 1889 case TANH:
880 caltinay 5972 result=left.tanh();
881     break;
882 jfenwick 1889 case ERF:
883 caltinay 5972 result=left.erf();
884     break;
885 jfenwick 1889 case ASINH:
886 caltinay 5972 result=left.asinh();
887     break;
888 jfenwick 1889 case ACOSH:
889 caltinay 5972 result=left.acosh();
890     break;
891 jfenwick 1889 case ATANH:
892 caltinay 5972 result=left.atanh();
893     break;
894 jfenwick 1889 case LOG10:
895 caltinay 5972 result=left.log10();
896     break;
897 jfenwick 1889 case LOG:
898 caltinay 5972 result=left.log();
899     break;
900 jfenwick 1889 case SIGN:
901 caltinay 5972 result=left.sign();
902     break;
903 jfenwick 1889 case ABS:
904 caltinay 5972 result=left.abs();
905     break;
906 jfenwick 1889 case NEG:
907 caltinay 5972 result=left.neg();
908     break;
909 jfenwick 1889 case POS:
910 caltinay 5972 // it doesn't mean anything for delayed.
911     // it will just trigger a deep copy of the lazy object
912     throw DataException("Programmer error - POS not supported for lazy data.");
913     break;
914 jfenwick 1889 case EXP:
915 caltinay 5972 result=left.exp();
916     break;
917 jfenwick 1889 case SQRT:
918 caltinay 5972 result=left.sqrt();
919     break;
920 jfenwick 1889 case RECIP:
921 caltinay 5972 result=left.oneOver();
922     break;
923 jfenwick 1889 case GZ:
924 caltinay 5972 result=left.wherePositive();
925     break;
926 jfenwick 1889 case LZ:
927 caltinay 5972 result=left.whereNegative();
928     break;
929 jfenwick 1889 case GEZ:
930 caltinay 5972 result=left.whereNonNegative();
931     break;
932 jfenwick 1889 case LEZ:
933 caltinay 5972 result=left.whereNonPositive();
934     break;
935 jfenwick 2147 case NEZ:
936 caltinay 5972 result=left.whereNonZero(m_tol);
937     break;
938 jfenwick 2147 case EZ:
939 caltinay 5972 result=left.whereZero(m_tol);
940     break;
941 jfenwick 2037 case SYM:
942 caltinay 5972 result=left.symmetric();
943     break;
944 jfenwick 2037 case NSYM:
945 jfenwick 6110 result=left.antisymmetric();
946 caltinay 5972 break;
947 jfenwick 2066 case PROD:
948 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
949     break;
950 jfenwick 2084 case TRANS:
951 caltinay 5972 result=left.transpose(m_axis_offset);
952     break;
953 jfenwick 2084 case TRACE:
954 caltinay 5972 result=left.trace(m_axis_offset);
955     break;
956 jfenwick 2496 case SWAP:
957 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
958     break;
959 jfenwick 2721 case MINVAL:
960 caltinay 5972 result=left.minval();
961     break;
962 jfenwick 2721 case MAXVAL:
963 caltinay 5972 result=left.minval();
964     break;
965 jfenwick 6125 case HER:
966     result=left.hermitian();
967     break;
968 jfenwick 1889 default:
969 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970 jfenwick 1889 }
971     return result.borrowReadyPtr();
972     }
973    
974 jfenwick 1899 /*
975     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
976     This method uses the original methods on the Data class to evaluate the expressions.
977     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
978     the purpose of using DataLazy in the first place).
979     */
980 jfenwick 1889 void
981 jfenwick 4621 DataLazy::collapse() const
982 jfenwick 1889 {
983     if (m_op==IDENTITY)
984     {
985 caltinay 5972 return;
986 jfenwick 1889 }
987     if (m_readytype=='E')
988 caltinay 5972 { // this is more an efficiency concern than anything else
989 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
990     }
991     m_id=collapseToReady();
992     m_op=IDENTITY;
993     }
994    
995 jfenwick 2500 // The result will be stored in m_samples
996     // The return value is a pointer to the DataVector, offset is the offset within the return value
997 jfenwick 5938 const DataTypes::RealVectorType*
998 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
999 jfenwick 2500 {
1000     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1001 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1002 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1003     {
1004 caltinay 5972 collapse();
1005 jfenwick 2500 }
1006 caltinay 5972 if (m_op==IDENTITY)
1007 jfenwick 2500 {
1008 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1009 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1010 jfenwick 2777 #ifdef LAZY_STACK_PROF
1011     int x;
1012     if (&x<stackend[omp_get_thread_num()])
1013     {
1014     stackend[omp_get_thread_num()]=&x;
1015     }
1016     #endif
1017 jfenwick 2500 return &(vec);
1018     }
1019     if (m_readytype!='E')
1020     {
1021     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1022     }
1023 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1024     {
1025 caltinay 5972 roffset=tid*m_samplesize;
1026     return &(m_samples); // sample is already resolved
1027 jfenwick 2501 }
1028     m_sampleids[tid]=sampleNo;
1029 jfenwick 3035
1030 jfenwick 2500 switch (getOpgroup(m_op))
1031     {
1032     case G_UNARY:
1033     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1034     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1035     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1036     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1037     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1038     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1039 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1040 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1041 jfenwick 2500 default:
1042     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1043     }
1044     }
1045    
1046 jfenwick 5938 const DataTypes::RealVectorType*
1047 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1048 jfenwick 2500 {
1049 caltinay 5972 // we assume that any collapsing has been done before we get here
1050     // since we only have one argument we don't need to think about only
1051     // processing single points.
1052     // we will also know we won't get identity nodes
1053 jfenwick 2500 if (m_readytype!='E')
1054     {
1055     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1056     }
1057     if (m_op==IDENTITY)
1058     {
1059     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1060     }
1061 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1062 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1063     roffset=m_samplesize*tid;
1064     double* result=&(m_samples[roffset]);
1065 jfenwick 6066 escript::ESFunction operation=SINF;
1066 jfenwick 2500 switch (m_op)
1067     {
1068 jfenwick 6066 case SIN:
1069     operation=SINF;
1070     break;
1071 jfenwick 2500 case COS:
1072 jfenwick 6066 operation=COSF;
1073     break;
1074 jfenwick 2500 case TAN:
1075 jfenwick 6066 operation=TANF;
1076     break;
1077 jfenwick 2500 case ASIN:
1078 jfenwick 6066 operation=ASINF;
1079     break;
1080 jfenwick 2500 case ACOS:
1081 jfenwick 6066 operation=ACOSF;
1082     break;
1083 jfenwick 2500 case ATAN:
1084 jfenwick 6066 operation=ATANF;
1085     break;
1086 jfenwick 2500 case SINH:
1087 jfenwick 6066 operation=SINHF;
1088     break;
1089 jfenwick 2500 case COSH:
1090 jfenwick 6066 operation=COSHF;
1091     break;
1092 jfenwick 2500 case TANH:
1093 jfenwick 6066 operation=TANHF;
1094     break;
1095 jfenwick 2500 case ERF:
1096 jfenwick 6066 operation=ERFF;
1097     break;
1098 jfenwick 2500 case ASINH:
1099 jfenwick 6066 operation=ASINHF;
1100     break;
1101 jfenwick 2500 case ACOSH:
1102 jfenwick 6066 operation=ACOSHF;
1103     break;
1104 jfenwick 2500 case ATANH:
1105 jfenwick 6066 operation=ATANHF;
1106     break;
1107 jfenwick 2500 case LOG10:
1108 jfenwick 6066 operation=LOG10F;
1109     break;
1110 jfenwick 2500 case LOG:
1111 jfenwick 6066 operation=LOGF;
1112     break;
1113 jfenwick 2500 case SIGN:
1114 jfenwick 6066 operation=SIGNF;
1115     break;
1116 jfenwick 2500 case ABS:
1117 jfenwick 6066 operation=ABSF;
1118     break;
1119 jfenwick 2500 case NEG:
1120 jfenwick 6066 operation=NEGF;
1121     break;
1122 jfenwick 2500 case POS:
1123 caltinay 5972 // it doesn't mean anything for delayed.
1124     // it will just trigger a deep copy of the lazy object
1125     throw DataException("Programmer error - POS not supported for lazy data.");
1126     break;
1127 jfenwick 2500 case EXP:
1128 jfenwick 6066 operation=EXPF;
1129     break;
1130 jfenwick 2500 case SQRT:
1131 jfenwick 6066 operation=SQRTF;
1132     break;
1133 jfenwick 2500 case RECIP:
1134 jfenwick 6066 operation=INVF;
1135     break;
1136 jfenwick 2500 case GZ:
1137 jfenwick 6066 operation=GTZEROF;
1138     break;
1139 jfenwick 2500 case LZ:
1140 jfenwick 6066 operation=LTZEROF;
1141     break;
1142 jfenwick 2500 case GEZ:
1143 jfenwick 6066 operation=GEZEROF;
1144     break;
1145 jfenwick 2500 case LEZ:
1146 jfenwick 6066 operation=LEZEROF;
1147     break;
1148 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1149     case NEZ:
1150 jfenwick 6066 operation=NEQZEROF;
1151     break;
1152 jfenwick 2500 case EZ:
1153 jfenwick 6066 operation=EQZEROF;
1154     break;
1155 jfenwick 2500 default:
1156 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1157 jfenwick 2500 }
1158 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1159     left,
1160     result,
1161     operation,
1162     m_tol);
1163 jfenwick 2500 return &(m_samples);
1164     }
1165    
1166    
1167 jfenwick 5938 const DataTypes::RealVectorType*
1168 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1169 jfenwick 2721 {
1170 caltinay 5972 // we assume that any collapsing has been done before we get here
1171     // since we only have one argument we don't need to think about only
1172     // processing single points.
1173     // we will also know we won't get identity nodes
1174 jfenwick 2721 if (m_readytype!='E')
1175     {
1176     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1177     }
1178     if (m_op==IDENTITY)
1179     {
1180     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1181     }
1182     size_t loffset=0;
1183 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1184 jfenwick 2721
1185     roffset=m_samplesize*tid;
1186 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1187 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1188 jfenwick 2721 double* result=&(m_samples[roffset]);
1189     switch (m_op)
1190     {
1191     case MINVAL:
1192 caltinay 5972 {
1193     for (unsigned int z=0;z<ndpps;++z)
1194     {
1195     FMin op;
1196 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1197 caltinay 5972 loffset+=psize;
1198     result++;
1199     }
1200     }
1201     break;
1202 jfenwick 2721 case MAXVAL:
1203 caltinay 5972 {
1204     for (unsigned int z=0;z<ndpps;++z)
1205     {
1206     FMax op;
1207 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1208 caltinay 5972 loffset+=psize;
1209     result++;
1210     }
1211     }
1212     break;
1213 jfenwick 2721 default:
1214 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1215 jfenwick 2721 }
1216     return &(m_samples);
1217     }
1218    
1219 jfenwick 5938 const DataTypes::RealVectorType*
1220 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1221 jfenwick 2500 {
1222 caltinay 5972 // we assume that any collapsing has been done before we get here
1223     // since we only have one argument we don't need to think about only
1224     // processing single points.
1225 jfenwick 2500 if (m_readytype!='E')
1226     {
1227     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1228     }
1229     if (m_op==IDENTITY)
1230     {
1231     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1232     }
1233     size_t subroffset;
1234 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1235 jfenwick 2500 roffset=m_samplesize*tid;
1236     size_t loop=0;
1237     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1238     size_t step=getNoValues();
1239     size_t offset=roffset;
1240     switch (m_op)
1241     {
1242     case SYM:
1243 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1244     {
1245 jfenwick 6083 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1246 caltinay 5972 subroffset+=step;
1247     offset+=step;
1248     }
1249     break;
1250 jfenwick 2500 case NSYM:
1251 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1252     {
1253 jfenwick 6114 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1254 caltinay 5972 subroffset+=step;
1255     offset+=step;
1256     }
1257     break;
1258 jfenwick 2500 default:
1259 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1260 jfenwick 2500 }
1261     return &m_samples;
1262     }
1263    
1264 jfenwick 5938 const DataTypes::RealVectorType*
1265 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1266 jfenwick 2500 {
1267 caltinay 5972 // we assume that any collapsing has been done before we get here
1268     // since we only have one argument we don't need to think about only
1269     // processing single points.
1270 jfenwick 2500 if (m_readytype!='E')
1271     {
1272     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1273     }
1274     if (m_op==IDENTITY)
1275     {
1276     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1277     }
1278     size_t subroffset;
1279     size_t offset;
1280 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1281 jfenwick 2500 roffset=m_samplesize*tid;
1282     offset=roffset;
1283     size_t loop=0;
1284     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1285     size_t outstep=getNoValues();
1286     size_t instep=m_left->getNoValues();
1287     switch (m_op)
1288     {
1289     case TRACE:
1290 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1291     {
1292 jfenwick 6083 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1293 caltinay 5972 subroffset+=instep;
1294     offset+=outstep;
1295     }
1296     break;
1297 jfenwick 2500 case TRANS:
1298 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1299     {
1300 jfenwick 6083 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1301 caltinay 5972 subroffset+=instep;
1302     offset+=outstep;
1303     }
1304     break;
1305 jfenwick 2500 default:
1306 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1307 jfenwick 2500 }
1308     return &m_samples;
1309     }
1310    
1311    
1312 jfenwick 5938 const DataTypes::RealVectorType*
1313 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1314 jfenwick 2500 {
1315     if (m_readytype!='E')
1316     {
1317     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1318     }
1319     if (m_op==IDENTITY)
1320     {
1321     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1322     }
1323     size_t subroffset;
1324     size_t offset;
1325 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1326 jfenwick 2500 roffset=m_samplesize*tid;
1327     offset=roffset;
1328     size_t loop=0;
1329     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1330     size_t outstep=getNoValues();
1331     size_t instep=m_left->getNoValues();
1332     switch (m_op)
1333     {
1334     case SWAP:
1335 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1336     {
1337 jfenwick 6083 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1338 caltinay 5972 subroffset+=instep;
1339     offset+=outstep;
1340     }
1341     break;
1342 jfenwick 2500 default:
1343 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1344 jfenwick 2500 }
1345     return &m_samples;
1346     }
1347    
1348 jfenwick 5938 const DataTypes::RealVectorType*
1349 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1350 jfenwick 3035 {
1351     if (m_readytype!='E')
1352     {
1353     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1354     }
1355     if (m_op!=CONDEVAL)
1356     {
1357     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1358     }
1359     size_t subroffset;
1360 jfenwick 2500
1361 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1362     const RealVectorType* srcres=0;
1363 jfenwick 3035 if ((*maskres)[subroffset]>0)
1364     {
1365 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1366 jfenwick 3035 }
1367     else
1368     {
1369 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1370 jfenwick 3035 }
1371    
1372     // Now we need to copy the result
1373    
1374     roffset=m_samplesize*tid;
1375     for (int i=0;i<m_samplesize;++i)
1376     {
1377 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1378 jfenwick 3035 }
1379    
1380     return &m_samples;
1381     }
1382    
1383 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1384     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1385     // If both children are expanded, then we can process them in a single operation (we treat
1386     // the whole sample as one big datapoint.
1387     // If one of the children is not expanded, then we need to treat each point in the sample
1388     // individually.
1389     // There is an additional complication when scalar operations are considered.
1390     // For example, 2+Vector.
1391     // In this case each double within the point is treated individually
1392 jfenwick 5938 const DataTypes::RealVectorType*
1393 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1394 jfenwick 2500 {
1395     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1396    
1397 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1398     // first work out which of the children are expanded
1399 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1400     bool rightExp=(m_right->m_readytype=='E');
1401     if (!leftExp && !rightExp)
1402     {
1403 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1404 jfenwick 2500 }
1405     bool leftScalar=(m_left->getRank()==0);
1406     bool rightScalar=(m_right->getRank()==0);
1407     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1408     {
1409 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1410 jfenwick 2500 }
1411     size_t leftsize=m_left->getNoValues();
1412     size_t rightsize=m_right->getNoValues();
1413 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1414     int leftstep=0; // how far should the left offset advance after each step
1415 jfenwick 2500 int rightstep=0;
1416 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1417     int oleftstep=0; // the o variables refer to the outer loop
1418     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1419 jfenwick 2500 int onumsteps=1;
1420    
1421 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1422 jfenwick 2500 bool RES=(rightExp && rightScalar);
1423 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1424 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1425 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1426 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1427 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1428 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1429    
1430 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1431 jfenwick 2500 {
1432 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1433     leftstep=0;
1434     rightstep=0;
1435     numsteps=1;
1436 jfenwick 2500 }
1437     else if (LES || RES)
1438     {
1439 caltinay 5972 chunksize=1;
1440     if (LES) // left is an expanded scalar
1441     {
1442     if (RS)
1443     {
1444     leftstep=1;
1445     rightstep=0;
1446     numsteps=m_left->getNumDPPSample();
1447     }
1448     else // RN or REN
1449     {
1450     leftstep=0;
1451     oleftstep=1;
1452     rightstep=1;
1453     orightstep=(RN ? -(int)rightsize : 0);
1454     numsteps=rightsize;
1455     onumsteps=m_left->getNumDPPSample();
1456     }
1457     }
1458     else // right is an expanded scalar
1459     {
1460     if (LS)
1461     {
1462     rightstep=1;
1463     leftstep=0;
1464     numsteps=m_right->getNumDPPSample();
1465     }
1466     else
1467     {
1468     rightstep=0;
1469     orightstep=1;
1470     leftstep=1;
1471     oleftstep=(LN ? -(int)leftsize : 0);
1472     numsteps=leftsize;
1473     onumsteps=m_right->getNumDPPSample();
1474     }
1475     }
1476 jfenwick 2500 }
1477 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1478 jfenwick 2500 {
1479 caltinay 5972 if (LEN) // and Right will be a single value
1480     {
1481     chunksize=rightsize;
1482     leftstep=rightsize;
1483     rightstep=0;
1484     numsteps=m_left->getNumDPPSample();
1485     if (RS)
1486     {
1487     numsteps*=leftsize;
1488     }
1489     }
1490     else // REN
1491     {
1492     chunksize=leftsize;
1493     rightstep=leftsize;
1494     leftstep=0;
1495     numsteps=m_right->getNumDPPSample();
1496     if (LS)
1497     {
1498     numsteps*=rightsize;
1499     }
1500     }
1501 jfenwick 2500 }
1502    
1503 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1504     // Get the values of sub-expressions
1505 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1506     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1507 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1508     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1509     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1510     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1511     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1512     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1513     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1514    
1515     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1516     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1517    
1518    
1519     roffset=m_samplesize*tid;
1520 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1521 jfenwick 2500 switch(m_op)
1522     {
1523     case ADD:
1524 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1525 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1526 jfenwick 6042 &(*left)[0],
1527     &(*right)[0],
1528     chunksize,
1529     onumsteps,
1530     numsteps,
1531     resultStep,
1532     leftstep,
1533     rightstep,
1534     oleftstep,
1535     orightstep,
1536 jfenwick 6056 lroffset,
1537     rroffset,
1538 jfenwick 6042 escript::ESFunction::PLUSF);
1539 caltinay 5972 break;
1540 jfenwick 2500 case SUB:
1541 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1542 jfenwick 6057 &(*left)[0],
1543     &(*right)[0],
1544     chunksize,
1545     onumsteps,
1546     numsteps,
1547     resultStep,
1548     leftstep,
1549     rightstep,
1550     oleftstep,
1551     orightstep,
1552     lroffset,
1553     rroffset,
1554     escript::ESFunction::MINUSF);
1555     //PROC_OP(NO_ARG,minus<double>());
1556 caltinay 5972 break;
1557 jfenwick 2500 case MUL:
1558 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1559 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1560 jfenwick 6057 &(*left)[0],
1561     &(*right)[0],
1562     chunksize,
1563     onumsteps,
1564     numsteps,
1565     resultStep,
1566     leftstep,
1567     rightstep,
1568     oleftstep,
1569     orightstep,
1570     lroffset,
1571     rroffset,
1572     escript::ESFunction::MULTIPLIESF);
1573 caltinay 5972 break;
1574 jfenwick 2500 case DIV:
1575 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1576 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1577 jfenwick 6057 &(*left)[0],
1578     &(*right)[0],
1579     chunksize,
1580     onumsteps,
1581     numsteps,
1582     resultStep,
1583     leftstep,
1584     rightstep,
1585     oleftstep,
1586     orightstep,
1587     lroffset,
1588     rroffset,
1589     escript::ESFunction::DIVIDESF);
1590 caltinay 5972 break;
1591 jfenwick 2500 case POW:
1592 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1593 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1594 jfenwick 6057 &(*left)[0],
1595     &(*right)[0],
1596     chunksize,
1597     onumsteps,
1598     numsteps,
1599     resultStep,
1600     leftstep,
1601     rightstep,
1602     oleftstep,
1603     orightstep,
1604     lroffset,
1605     rroffset,
1606     escript::ESFunction::POWF);
1607 caltinay 5972 break;
1608 jfenwick 2500 default:
1609 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1610 jfenwick 2500 }
1611     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1612     return &m_samples;
1613     }
1614    
1615    
1616     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1617     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1618     // unlike the other resolve helpers, we must treat these datapoints separately.
1619 jfenwick 5938 const DataTypes::RealVectorType*
1620 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1621 jfenwick 2500 {
1622     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1623    
1624 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1625     // first work out which of the children are expanded
1626 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1627     bool rightExp=(m_right->m_readytype=='E');
1628     int steps=getNumDPPSample();
1629 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1630 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1631    
1632     int resultStep=getNoValues();
1633     roffset=m_samplesize*tid;
1634     size_t offset=roffset;
1635    
1636 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1637 jfenwick 2500
1638 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1639 jfenwick 2500
1640     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1641     cout << getNoValues() << endl;)
1642    
1643    
1644     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1645     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1646     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1647     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1648     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1649     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1650     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1651    
1652 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1653 jfenwick 2500 switch(m_op)
1654     {
1655     case PROD:
1656 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1657     {
1658     const double *ptr_0 = &((*left)[lroffset]);
1659     const double *ptr_1 = &((*right)[rroffset]);
1660 jfenwick 2500
1661     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1662     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1663    
1664 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1665 jfenwick 2500
1666 caltinay 5972 lroffset+=leftStep;
1667     rroffset+=rightStep;
1668     }
1669     break;
1670 jfenwick 2500 default:
1671 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1672 jfenwick 2500 }
1673     roffset=offset;
1674     return &m_samples;
1675     }
1676    
1677 jfenwick 1898
1678 jfenwick 5938 const DataTypes::RealVectorType*
1679 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1680 jfenwick 1879 {
1681 jfenwick 2271 #ifdef _OPENMP
1682 caltinay 5972 int tid=omp_get_thread_num();
1683 jfenwick 2271 #else
1684 caltinay 5972 int tid=0;
1685 jfenwick 2271 #endif
1686 jfenwick 2777
1687     #ifdef LAZY_STACK_PROF
1688 caltinay 5972 stackstart[tid]=&tid;
1689     stackend[tid]=&tid;
1690     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1691     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1692     #pragma omp critical
1693     if (d>maxstackuse)
1694     {
1695 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1696 caltinay 5972 maxstackuse=d;
1697     }
1698     return r;
1699 jfenwick 2777 #else
1700 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1701 jfenwick 2777 #endif
1702 jfenwick 2271 }
1703    
1704    
1705 jfenwick 2497 // This needs to do the work of the identity constructor
1706 jfenwick 2177 void
1707     DataLazy::resolveToIdentity()
1708     {
1709     if (m_op==IDENTITY)
1710 caltinay 5972 return;
1711 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1712 jfenwick 2177 makeIdentity(p);
1713     }
1714 jfenwick 1889
1715 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1716     {
1717     m_op=IDENTITY;
1718     m_axis_offset=0;
1719     m_transpose=0;
1720     m_SL=m_SM=m_SR=0;
1721     m_children=m_height=0;
1722     m_id=p;
1723     if(p->isConstant()) {m_readytype='C';}
1724     else if(p->isExpanded()) {m_readytype='E';}
1725     else if (p->isTagged()) {m_readytype='T';}
1726     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1727     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1728     m_left.reset();
1729     m_right.reset();
1730     }
1731    
1732 jfenwick 2497
1733     DataReady_ptr
1734     DataLazy::resolve()
1735     {
1736     resolveToIdentity();
1737     return m_id;
1738     }
1739    
1740 jfenwick 2799
1741     /* This is really a static method but I think that caused problems in windows */
1742     void
1743     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1744     {
1745     if (dats.empty())
1746     {
1747 caltinay 5972 return;
1748 jfenwick 2799 }
1749     vector<DataLazy*> work;
1750     FunctionSpace fs=dats[0]->getFunctionSpace();
1751     bool match=true;
1752 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1753 jfenwick 2799 {
1754 caltinay 5972 if (dats[i]->m_readytype!='E')
1755     {
1756     dats[i]->collapse();
1757     }
1758     if (dats[i]->m_op!=IDENTITY)
1759     {
1760     work.push_back(dats[i]);
1761     if (fs!=dats[i]->getFunctionSpace())
1762     {
1763     match=false;
1764     }
1765     }
1766 jfenwick 2799 }
1767     if (work.empty())
1768     {
1769 caltinay 5972 return; // no work to do
1770 jfenwick 2799 }
1771 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1772     { // it is possible that dats[0] is one of the objects which we discarded and
1773     // all the other functionspaces match.
1774     vector<DataExpanded*> dep;
1775 jfenwick 6042 vector<RealVectorType*> vecs;
1776 caltinay 5972 for (int i=0;i<work.size();++i)
1777     {
1778 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1779 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1780     }
1781     int totalsamples=work[0]->getNumSamples();
1782 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1783 caltinay 5972 int sample;
1784     #pragma omp parallel private(sample, res)
1785     {
1786     size_t roffset=0;
1787     #pragma omp for schedule(static)
1788     for (sample=0;sample<totalsamples;++sample)
1789     {
1790     roffset=0;
1791     int j;
1792     for (j=work.size()-1;j>=0;--j)
1793     {
1794 jfenwick 2799 #ifdef _OPENMP
1795 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1796 jfenwick 2799 #else
1797 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1798 jfenwick 2799 #endif
1799 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1800     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1801     }
1802     }
1803     }
1804     // Now we need to load the new results as identity ops into the lazy nodes
1805     for (int i=work.size()-1;i>=0;--i)
1806     {
1807 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1808 caltinay 5972 }
1809 jfenwick 2799 }
1810 caltinay 5972 else // functionspaces do not match
1811 jfenwick 2799 {
1812 caltinay 5972 for (int i=0;i<work.size();++i)
1813     {
1814     work[i]->resolveToIdentity();
1815     }
1816 jfenwick 2799 }
1817     }
1818    
1819    
1820    
1821 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1822     DataReady_ptr
1823     DataLazy::resolveNodeWorker()
1824     {
1825 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1826 jfenwick 2500 {
1827     collapse();
1828     }
1829 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1830 jfenwick 2500 {
1831     return m_id;
1832     }
1833 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1834 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1835     RealVectorType& resvec=result->getVectorRW();
1836 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1837    
1838     int sample;
1839     int totalsamples=getNumSamples();
1840 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1841 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1842 jfenwick 2777 #pragma omp parallel private(sample,res)
1843 jfenwick 2500 {
1844 caltinay 5972 size_t roffset=0;
1845 jfenwick 2777 #ifdef LAZY_STACK_PROF
1846 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1847     stackend[omp_get_thread_num()]=&roffset;
1848 jfenwick 2777 #endif
1849 caltinay 5972 #pragma omp for schedule(static)
1850     for (sample=0;sample<totalsamples;++sample)
1851     {
1852     roffset=0;
1853 jfenwick 2500 #ifdef _OPENMP
1854 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1855 jfenwick 2500 #else
1856 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1857 jfenwick 2500 #endif
1858     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1859     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1860 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1861     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1862     }
1863 jfenwick 2500 }
1864 jfenwick 2777 #ifdef LAZY_STACK_PROF
1865     for (int i=0;i<getNumberOfThreads();++i)
1866     {
1867 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1868     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1869     if (r>maxstackuse)
1870     {
1871     maxstackuse=r;
1872     }
1873 jfenwick 2777 }
1874     cout << "Max resolve Stack use=" << maxstackuse << endl;
1875     #endif
1876 jfenwick 2500 return resptr;
1877     }
1878    
1879 jfenwick 1865 std::string
1880     DataLazy::toString() const
1881     {
1882 jfenwick 1886 ostringstream oss;
1883 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1884 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1885 jfenwick 2737 {
1886 caltinay 5972 case 1: // tree format
1887     oss << endl;
1888     intoTreeString(oss,"");
1889     break;
1890     case 2: // just the depth
1891     break;
1892 jfenwick 2795 default:
1893 caltinay 5972 intoString(oss);
1894     break;
1895 jfenwick 2737 }
1896 jfenwick 1886 return oss.str();
1897 jfenwick 1865 }
1898    
1899 jfenwick 1899
1900 jfenwick 1886 void
1901     DataLazy::intoString(ostringstream& oss) const
1902     {
1903 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1904 jfenwick 1886 switch (getOpgroup(m_op))
1905     {
1906 jfenwick 1889 case G_IDENTITY:
1907 caltinay 5972 if (m_id->isExpanded())
1908     {
1909     oss << "E";
1910     }
1911     else if (m_id->isTagged())
1912     {
1913     oss << "T";
1914     }
1915     else if (m_id->isConstant())
1916     {
1917     oss << "C";
1918     }
1919     else
1920     {
1921     oss << "?";
1922     }
1923     oss << '@' << m_id.get();
1924     break;
1925 jfenwick 1886 case G_BINARY:
1926 caltinay 5972 oss << '(';
1927     m_left->intoString(oss);
1928     oss << ' ' << opToString(m_op) << ' ';
1929     m_right->intoString(oss);
1930     oss << ')';
1931     break;
1932 jfenwick 1886 case G_UNARY:
1933 jfenwick 2147 case G_UNARY_P:
1934 jfenwick 2037 case G_NP1OUT:
1935 jfenwick 2084 case G_NP1OUT_P:
1936 jfenwick 2721 case G_REDUCTION:
1937 caltinay 5972 oss << opToString(m_op) << '(';
1938     m_left->intoString(oss);
1939     oss << ')';
1940     break;
1941 jfenwick 2066 case G_TENSORPROD:
1942 caltinay 5972 oss << opToString(m_op) << '(';
1943     m_left->intoString(oss);
1944     oss << ", ";
1945     m_right->intoString(oss);
1946     oss << ')';
1947     break;
1948 jfenwick 2496 case G_NP1OUT_2P:
1949 caltinay 5972 oss << opToString(m_op) << '(';
1950     m_left->intoString(oss);
1951     oss << ", " << m_axis_offset << ", " << m_transpose;
1952     oss << ')';
1953     break;
1954 jfenwick 3035 case G_CONDEVAL:
1955 caltinay 5972 oss << opToString(m_op)<< '(' ;
1956     m_mask->intoString(oss);
1957     oss << " ? ";
1958     m_left->intoString(oss);
1959     oss << " : ";
1960     m_right->intoString(oss);
1961     oss << ')';
1962     break;
1963 jfenwick 1886 default:
1964 caltinay 5972 oss << "UNKNOWN";
1965 jfenwick 1886 }
1966     }
1967    
1968 jfenwick 2737
1969     void
1970     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1971     {
1972     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1973     switch (getOpgroup(m_op))
1974     {
1975     case G_IDENTITY:
1976 caltinay 5972 if (m_id->isExpanded())
1977     {
1978     oss << "E";
1979     }
1980     else if (m_id->isTagged())
1981     {
1982     oss << "T";
1983     }
1984     else if (m_id->isConstant())
1985     {
1986     oss << "C";
1987     }
1988     else
1989     {
1990     oss << "?";
1991     }
1992     oss << '@' << m_id.get() << endl;
1993     break;
1994 jfenwick 2737 case G_BINARY:
1995 caltinay 5972 oss << opToString(m_op) << endl;
1996     indent+='.';
1997     m_left->intoTreeString(oss, indent);
1998     m_right->intoTreeString(oss, indent);
1999     break;
2000 jfenwick 2737 case G_UNARY:
2001     case G_UNARY_P:
2002     case G_NP1OUT:
2003     case G_NP1OUT_P:
2004     case G_REDUCTION:
2005 caltinay 5972 oss << opToString(m_op) << endl;
2006     indent+='.';
2007     m_left->intoTreeString(oss, indent);
2008     break;
2009 jfenwick 2737 case G_TENSORPROD:
2010 caltinay 5972 oss << opToString(m_op) << endl;
2011     indent+='.';
2012     m_left->intoTreeString(oss, indent);
2013     m_right->intoTreeString(oss, indent);
2014     break;
2015 jfenwick 2737 case G_NP1OUT_2P:
2016 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2017     indent+='.';
2018     m_left->intoTreeString(oss, indent);
2019     break;
2020 jfenwick 2737 default:
2021 caltinay 5972 oss << "UNKNOWN";
2022 jfenwick 2737 }
2023     }
2024    
2025    
2026 jfenwick 1865 DataAbstract*
2027 jfenwick 5938 DataLazy::deepCopy() const
2028 jfenwick 1865 {
2029 jfenwick 1935 switch (getOpgroup(m_op))
2030 jfenwick 1865 {
2031 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
2032 caltinay 5972 case G_UNARY:
2033 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2034 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2035     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2036 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2037     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2038 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2039     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2040 jfenwick 1935 default:
2041 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2042 jfenwick 1865 }
2043     }
2044    
2045    
2046 jfenwick 2721
2047 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2048     // Instances of DataReady can look at the size of their vectors.
2049     // For lazy though, it could be the size the data would be if it were resolved;
2050     // or it could be some function of the lengths of the DataReady instances which
2051     // form part of the expression.
2052     // Rather than have people making assumptions, I have disabled the method.
2053 jfenwick 5938 DataTypes::RealVectorType::size_type
2054 jfenwick 1865 DataLazy::getLength() const
2055     {
2056 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2057 jfenwick 1865 }
2058    
2059    
2060     DataAbstract*
2061     DataLazy::getSlice(const DataTypes::RegionType& region) const
2062     {
2063 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2064 jfenwick 1865 }
2065    
2066 jfenwick 1935
2067     // To do this we need to rely on our child nodes
2068 jfenwick 5938 DataTypes::RealVectorType::size_type
2069 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2070 jfenwick 1935 int dataPointNo)
2071     {
2072     if (m_op==IDENTITY)
2073     {
2074 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2075 jfenwick 1935 }
2076     if (m_readytype!='E')
2077     {
2078 caltinay 5972 collapse();
2079     return m_id->getPointOffset(sampleNo,dataPointNo);
2080 jfenwick 1935 }
2081     // at this point we do not have an identity node and the expression will be Expanded
2082     // so we only need to know which child to ask
2083     if (m_left->m_readytype=='E')
2084     {
2085 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2086 jfenwick 1935 }
2087     else
2088     {
2089 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2090 jfenwick 1935 }
2091     }
2092    
2093     // To do this we need to rely on our child nodes
2094 jfenwick 5938 DataTypes::RealVectorType::size_type
2095 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2096 jfenwick 1865 int dataPointNo) const
2097     {
2098 jfenwick 1935 if (m_op==IDENTITY)
2099     {
2100 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2101 jfenwick 1935 }
2102     if (m_readytype=='E')
2103     {
2104     // at this point we do not have an identity node and the expression will be Expanded
2105     // so we only need to know which child to ask
2106     if (m_left->m_readytype=='E')
2107     {
2108 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2109 jfenwick 1935 }
2110     else
2111     {
2112 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2113 jfenwick 1935 }
2114     }
2115     if (m_readytype=='C')
2116     {
2117 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2118 jfenwick 1935 }
2119     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2120 jfenwick 1865 }
2121    
2122 jfenwick 2271
2123     // I have decided to let Data:: handle this issue.
2124 jfenwick 1901 void
2125     DataLazy::setToZero()
2126     {
2127 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2128 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2129     // m_op=IDENTITY;
2130     // m_right.reset();
2131     // m_left.reset();
2132     // m_readytype='C';
2133     // m_buffsRequired=1;
2134    
2135 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2136 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2137 jfenwick 1901 }
2138    
2139 jfenwick 2271 bool
2140     DataLazy::actsExpanded() const
2141     {
2142 caltinay 5972 return (m_readytype=='E');
2143 jfenwick 2271 }
2144    
2145 caltinay 5997 } // end namespace
2146 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26