/[escript]/trunk/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6125 - (hide annotations)
Tue Apr 5 04:12:13 2016 UTC (3 years, 2 months ago) by jfenwick
File size: 66516 byte(s)
Fix incorrect behaviour of anti-symmetric for scalars.

Also add hermitian and antihermitian.
Very quick check looks ok but really not tested yet.

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 3317 #ifdef USE_NETCDF
26     #include <netcdfcpp.h>
27     #endif
28    
29 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
30 jfenwick 2199
31 jfenwick 5938 using namespace escript::DataTypes;
32    
33 caltinay 5972 #define NO_ARG
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48 jfenwick 2472
49 jfenwick 2501
50 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52 jfenwick 1899 /*
53     How does DataLazy work?
54     ~~~~~~~~~~~~~~~~~~~~~~~
55    
56     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57     denoted left and right.
58    
59     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60     This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63 caltinay 4286 Note that IDENTITY is not considered a unary operation.
64 jfenwick 1899
65     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66     It must however form a DAG (directed acyclic graph).
67     I will refer to individual DataLazy objects with the structure as nodes.
68    
69     Each node also stores:
70     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71 caltinay 5972 evaluated.
72 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73 caltinay 5972 evaluate the expression.
74 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
75    
76     When a new node is created, the above values are computed based on the values in the child nodes.
77     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79     The resolve method, which produces a DataReady from a DataLazy, does the following:
80     1) Create a DataReady to hold the new result.
81     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86     resolveSample returns a Vector* and an offset within that vector where the result is stored.
87     Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93 jfenwick 2037
94 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 jfenwick 2037 1) Add to the ES_optype.
96     2) determine what opgroup your operation belongs to (X)
97     3) add a string for the op to the end of ES_opstrings
98     4) increase ES_opcount
99     5) add an entry (X) to opgroups
100     6) add an entry to the switch in collapseToReady
101     7) add an entry to resolveX
102 jfenwick 1899 */
103    
104    
105 jfenwick 1865 using namespace std;
106 jfenwick 1868 using namespace boost;
107 jfenwick 1865
108     namespace escript
109     {
110    
111     namespace
112     {
113    
114 jfenwick 2777
115     // enabling this will print out when ever the maximum stacksize used by resolve increases
116     // it assumes _OPENMP is also in use
117     //#define LAZY_STACK_PROF
118    
119    
120    
121     #ifndef _OPENMP
122     #ifdef LAZY_STACK_PROF
123     #undef LAZY_STACK_PROF
124     #endif
125     #endif
126    
127    
128     #ifdef LAZY_STACK_PROF
129     std::vector<void*> stackstart(getNumberOfThreads());
130     std::vector<void*> stackend(getNumberOfThreads());
131     size_t maxstackuse=0;
132     #endif
133    
134 jfenwick 1886 enum ES_opgroup
135     {
136     G_UNKNOWN,
137     G_IDENTITY,
138 caltinay 5972 G_BINARY, // pointwise operations with two arguments
139     G_UNARY, // pointwise operations with one argument
140     G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT, // non-pointwise op with one output
142     G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD, // general tensor product
144     G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145     G_REDUCTION, // non-pointwise unary op with a scalar output
146 jfenwick 3035 G_CONDEVAL
147 jfenwick 1886 };
148    
149    
150    
151    
152 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153 caltinay 5972 "sin","cos","tan",
154     "asin","acos","atan","sinh","cosh","tanh","erf",
155     "asinh","acosh","atanh",
156     "log10","log","sign","abs","neg","pos","exp","sqrt",
157     "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158 jfenwick 6114 "symmetric","antisymmetric",
159 caltinay 5972 "prod",
160     "transpose", "trace",
161     "swapaxes",
162     "minval", "maxval",
163 jfenwick 6125 "condEval",
164     "hermitian","antihermitian"
165     };
166     int ES_opcount=46;
167 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168 caltinay 5972 G_UNARY,G_UNARY,G_UNARY, //10
169     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
170     G_UNARY,G_UNARY,G_UNARY, // 20
171     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
172     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
173     G_NP1OUT,G_NP1OUT,
174     G_TENSORPROD,
175     G_NP1OUT_P, G_NP1OUT_P,
176     G_NP1OUT_2P,
177     G_REDUCTION, G_REDUCTION,
178 jfenwick 6125 G_CONDEVAL,
179     G_UNARY,G_UNARY
180     };
181 jfenwick 1886 inline
182     ES_opgroup
183     getOpgroup(ES_optype op)
184     {
185     return opgroups[op];
186     }
187    
188 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
189     FunctionSpace
190     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
191     {
192 caltinay 5972 // perhaps this should call interpolate and throw or something?
193     // maybe we need an interpolate node -
194     // that way, if interpolate is required in any other op we can just throw a
195     // programming error exception.
196 jfenwick 1879
197 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
198     FunctionSpace r=right->getFunctionSpace();
199     if (l!=r)
200     {
201 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202     if (res==1)
203 jfenwick 1943 {
204 caltinay 5972 return l;
205 jfenwick 1943 }
206 jfenwick 4264 if (res==-1)
207 jfenwick 1943 {
208 caltinay 5972 return r;
209 jfenwick 1943 }
210     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
211     }
212     return l;
213 jfenwick 1865 }
214    
215     // return the shape of the result of "left op right"
216 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
217 jfenwick 1865 DataTypes::ShapeType
218     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
219     {
220 caltinay 5972 if (left->getShape()!=right->getShape())
221     {
222     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
223     {
224     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225     }
226 jfenwick 2721
227 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
228     {
229     return right->getShape();
230     }
231     if (right->getRank()==0)
232     {
233     return left->getShape();
234     }
235     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
236     }
237     return left->getShape();
238 jfenwick 1865 }
239    
240 jfenwick 2084 // return the shape for "op left"
241    
242     DataTypes::ShapeType
243 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244 jfenwick 2084 {
245 caltinay 5972 switch(op)
246     {
247     case TRANS:
248     { // for the scoping of variables
249     const DataTypes::ShapeType& s=left->getShape();
250     DataTypes::ShapeType sh;
251     int rank=left->getRank();
252     if (axis_offset<0 || axis_offset>rank)
253     {
254 caltinay 2779 stringstream e;
255     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256     throw DataException(e.str());
257 caltinay 5972 }
258     for (int i=0; i<rank; i++)
259     {
260     int index = (axis_offset+i)%rank;
261     sh.push_back(s[index]); // Append to new shape
262     }
263     return sh;
264     }
265     break;
266     case TRACE:
267     {
268     int rank=left->getRank();
269     if (rank<2)
270     {
271     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272     }
273     if ((axis_offset>rank-2) || (axis_offset<0))
274     {
275     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276     }
277     if (rank==2)
278     {
279     return DataTypes::scalarShape;
280     }
281     else if (rank==3)
282     {
283     DataTypes::ShapeType sh;
284     if (axis_offset==0)
285     {
286     sh.push_back(left->getShape()[2]);
287     }
288     else // offset==1
289     {
290     sh.push_back(left->getShape()[0]);
291     }
292     return sh;
293     }
294     else if (rank==4)
295     {
296     DataTypes::ShapeType sh;
297     const DataTypes::ShapeType& s=left->getShape();
298     if (axis_offset==0)
299     {
300     sh.push_back(s[2]);
301     sh.push_back(s[3]);
302     }
303     else if (axis_offset==1)
304     {
305     sh.push_back(s[0]);
306     sh.push_back(s[3]);
307     }
308     else // offset==2
309     {
310     sh.push_back(s[0]);
311     sh.push_back(s[1]);
312     }
313     return sh;
314     }
315     else // unknown rank
316     {
317     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318     }
319     }
320     break;
321     default:
322     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323     }
324 jfenwick 2084 }
325    
326 jfenwick 2496 DataTypes::ShapeType
327     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
328     {
329     // This code taken from the Data.cpp swapaxes() method
330     // Some of the checks are probably redundant here
331     int axis0_tmp,axis1_tmp;
332     const DataTypes::ShapeType& s=left->getShape();
333     DataTypes::ShapeType out_shape;
334     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
335     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
336     int rank=left->getRank();
337     if (rank<2) {
338     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339     }
340     if (axis0<0 || axis0>rank-1) {
341 caltinay 2779 stringstream e;
342     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343     throw DataException(e.str());
344 jfenwick 2496 }
345     if (axis1<0 || axis1>rank-1) {
346 caltinay 2782 stringstream e;
347 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348     throw DataException(e.str());
349 jfenwick 2496 }
350     if (axis0 == axis1) {
351     throw DataException("Error - Data::swapaxes: axis indices must be different.");
352     }
353     if (axis0 > axis1) {
354     axis0_tmp=axis1;
355     axis1_tmp=axis0;
356     } else {
357     axis0_tmp=axis0;
358     axis1_tmp=axis1;
359     }
360     for (int i=0; i<rank; i++) {
361     if (i == axis0_tmp) {
362     out_shape.push_back(s[axis1_tmp]);
363     } else if (i == axis1_tmp) {
364     out_shape.push_back(s[axis0_tmp]);
365     } else {
366     out_shape.push_back(s[i]);
367     }
368     }
369     return out_shape;
370     }
371    
372    
373 jfenwick 2066 // determine the output shape for the general tensor product operation
374     // the additional parameters return information required later for the product
375     // the majority of this code is copy pasted from C_General_Tensor_Product
376     DataTypes::ShapeType
377     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
378 jfenwick 1865 {
379 caltinay 5972
380 jfenwick 2066 // Get rank and shape of inputs
381     int rank0 = left->getRank();
382     int rank1 = right->getRank();
383     const DataTypes::ShapeType& shape0 = left->getShape();
384     const DataTypes::ShapeType& shape1 = right->getShape();
385    
386     // Prepare for the loops of the product and verify compatibility of shapes
387     int start0=0, start1=0;
388 caltinay 5972 if (transpose == 0) {}
389     else if (transpose == 1) { start0 = axis_offset; }
390     else if (transpose == 2) { start1 = rank1-axis_offset; }
391     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392 jfenwick 2066
393 jfenwick 2085 if (rank0<axis_offset)
394     {
395 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396 jfenwick 2085 }
397 jfenwick 2066
398     // Adjust the shapes for transpose
399 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
400     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
401     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
402     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
403 jfenwick 2066
404     // Prepare for the loops of the product
405     SL=1, SM=1, SR=1;
406 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
407 jfenwick 2066 SL *= tmpShape0[i];
408     }
409 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
410 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
411     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
412     }
413     SM *= tmpShape0[i];
414     }
415 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
416 jfenwick 2066 SR *= tmpShape1[i];
417     }
418    
419     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
420 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
421     { // block to limit the scope of out_index
422 jfenwick 2066 int out_index=0;
423     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
425     }
426 jfenwick 2086
427     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
428     {
429     ostringstream os;
430     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
431     throw DataException(os.str());
432     }
433    
434 jfenwick 2066 return shape2;
435 jfenwick 1865 }
436    
437 caltinay 5972 } // end anonymous namespace
438 jfenwick 1865
439    
440 jfenwick 1899
441     // Return a string representing the operation
442 jfenwick 1865 const std::string&
443     opToString(ES_optype op)
444     {
445     if (op<0 || op>=ES_opcount)
446     {
447     op=UNKNOWNOP;
448     }
449     return ES_opstrings[op];
450     }
451    
452 jfenwick 2500 void DataLazy::LazyNodeSetup()
453     {
454     #ifdef _OPENMP
455     int numthreads=omp_get_max_threads();
456     m_samples.resize(numthreads*m_samplesize);
457     m_sampleids=new int[numthreads];
458     for (int i=0;i<numthreads;++i)
459     {
460     m_sampleids[i]=-1;
461     }
462     #else
463     m_samples.resize(m_samplesize);
464     m_sampleids=new int[1];
465     m_sampleids[0]=-1;
466     #endif // _OPENMP
467     }
468 jfenwick 1865
469 jfenwick 2500
470     // Creates an identity node
471 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
472 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
473     ,m_sampleids(0),
474     m_samples(1)
475 jfenwick 1865 {
476 jfenwick 1879 if (p->isLazy())
477     {
478 caltinay 5972 // I don't want identity of Lazy.
479     // Question: Why would that be so bad?
480     // Answer: We assume that the child of ID is something we can call getVector on
481     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
482 jfenwick 1879 }
483     else
484     {
485 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486     makeIdentity(dr);
487 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
488 jfenwick 1879 }
489 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
490 jfenwick 1865 }
491    
492 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494     m_op(op),
495     m_axis_offset(0),
496     m_transpose(0),
497     m_SL(0), m_SM(0), m_SR(0)
498 jfenwick 1886 {
499 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500 jfenwick 1886 {
501 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502 jfenwick 1886 }
503 jfenwick 2066
504 jfenwick 1886 DataLazy_ptr lleft;
505     if (!left->isLazy())
506     {
507 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
508 jfenwick 1886 }
509     else
510     {
511 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
512 jfenwick 1886 }
513 jfenwick 1889 m_readytype=lleft->m_readytype;
514 jfenwick 1886 m_left=lleft;
515     m_samplesize=getNumDPPSample()*getNoValues();
516 jfenwick 2177 m_children=m_left->m_children+1;
517     m_height=m_left->m_height+1;
518 jfenwick 2500 LazyNodeSetup();
519 jfenwick 2177 SIZELIMIT
520 jfenwick 1886 }
521    
522    
523 jfenwick 1943 // In this constructor we need to consider interpolation
524 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
526     m_op(op),
527     m_SL(0), m_SM(0), m_SR(0)
528 jfenwick 1879 {
529 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
531 jfenwick 1886 {
532 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533 jfenwick 1886 }
534 jfenwick 1943
535 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
536 jfenwick 1943 {
537 caltinay 5972 FunctionSpace fs=getFunctionSpace();
538     Data ltemp(left);
539     Data tmp(ltemp,fs);
540     left=tmp.borrowDataPtr();
541 jfenwick 1943 }
542 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
543 jfenwick 1943 {
544 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
545     right=tmp.borrowDataPtr();
546 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547 jfenwick 1943 }
548     left->operandCheck(*right);
549    
550 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
551 jfenwick 1879 {
552 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
553 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554 jfenwick 1879 }
555     else
556     {
557 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
558 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559 jfenwick 1879 }
560     if (right->isLazy())
561     {
562 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
563 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564 jfenwick 1879 }
565     else
566     {
567 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
568 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569 jfenwick 1879 }
570 jfenwick 1889 char lt=m_left->m_readytype;
571     char rt=m_right->m_readytype;
572     if (lt=='E' || rt=='E')
573     {
574 caltinay 5972 m_readytype='E';
575 jfenwick 1889 }
576     else if (lt=='T' || rt=='T')
577     {
578 caltinay 5972 m_readytype='T';
579 jfenwick 1889 }
580     else
581     {
582 caltinay 5972 m_readytype='C';
583 jfenwick 1889 }
584 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
585 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
586     m_height=max(m_left->m_height,m_right->m_height)+1;
587 jfenwick 2500 LazyNodeSetup();
588 jfenwick 2177 SIZELIMIT
589 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590 jfenwick 1879 }
591    
592 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594     m_op(op),
595     m_axis_offset(axis_offset),
596     m_transpose(transpose)
597 jfenwick 2066 {
598     if ((getOpgroup(op)!=G_TENSORPROD))
599     {
600 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601 jfenwick 2066 }
602     if ((transpose>2) || (transpose<0))
603     {
604 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605 jfenwick 2066 }
606 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
607 jfenwick 2066 {
608 caltinay 5972 FunctionSpace fs=getFunctionSpace();
609     Data ltemp(left);
610     Data tmp(ltemp,fs);
611     left=tmp.borrowDataPtr();
612 jfenwick 2066 }
613 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
614 jfenwick 2066 {
615 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
616     right=tmp.borrowDataPtr();
617 jfenwick 2066 }
618 jfenwick 2195 // left->operandCheck(*right);
619 jfenwick 1879
620 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
621 jfenwick 2066 {
622 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
623 jfenwick 2066 }
624     else
625     {
626 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
627 jfenwick 2066 }
628     if (right->isLazy())
629     {
630 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
631 jfenwick 2066 }
632     else
633     {
634 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
635 jfenwick 2066 }
636     char lt=m_left->m_readytype;
637     char rt=m_right->m_readytype;
638     if (lt=='E' || rt=='E')
639     {
640 caltinay 5972 m_readytype='E';
641 jfenwick 2066 }
642     else if (lt=='T' || rt=='T')
643     {
644 caltinay 5972 m_readytype='T';
645 jfenwick 2066 }
646     else
647     {
648 caltinay 5972 m_readytype='C';
649 jfenwick 2066 }
650     m_samplesize=getNumDPPSample()*getNoValues();
651 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
652     m_height=max(m_left->m_height,m_right->m_height)+1;
653 jfenwick 2500 LazyNodeSetup();
654 jfenwick 2177 SIZELIMIT
655 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656 jfenwick 2066 }
657    
658    
659 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661     m_op(op),
662     m_axis_offset(axis_offset),
663     m_transpose(0),
664     m_tol(0)
665 jfenwick 2084 {
666     if ((getOpgroup(op)!=G_NP1OUT_P))
667     {
668 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669 jfenwick 2084 }
670     DataLazy_ptr lleft;
671     if (!left->isLazy())
672     {
673 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
674 jfenwick 2084 }
675     else
676     {
677 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
678 jfenwick 2084 }
679     m_readytype=lleft->m_readytype;
680     m_left=lleft;
681     m_samplesize=getNumDPPSample()*getNoValues();
682 jfenwick 2177 m_children=m_left->m_children+1;
683     m_height=m_left->m_height+1;
684 jfenwick 2500 LazyNodeSetup();
685 jfenwick 2177 SIZELIMIT
686 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687 jfenwick 2084 }
688    
689 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
691     m_op(op),
692     m_axis_offset(0),
693     m_transpose(0),
694     m_tol(tol)
695 jfenwick 2147 {
696     if ((getOpgroup(op)!=G_UNARY_P))
697     {
698 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699 jfenwick 2147 }
700     DataLazy_ptr lleft;
701     if (!left->isLazy())
702     {
703 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
704 jfenwick 2147 }
705     else
706     {
707 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
708 jfenwick 2147 }
709     m_readytype=lleft->m_readytype;
710     m_left=lleft;
711     m_samplesize=getNumDPPSample()*getNoValues();
712 jfenwick 2177 m_children=m_left->m_children+1;
713     m_height=m_left->m_height+1;
714 jfenwick 2500 LazyNodeSetup();
715 jfenwick 2177 SIZELIMIT
716 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717     }
718 jfenwick 2084
719 jfenwick 2496
720     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722     m_op(op),
723     m_axis_offset(axis0),
724     m_transpose(axis1),
725     m_tol(0)
726 jfenwick 2496 {
727     if ((getOpgroup(op)!=G_NP1OUT_2P))
728     {
729 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730 jfenwick 2496 }
731     DataLazy_ptr lleft;
732     if (!left->isLazy())
733     {
734 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
735 jfenwick 2496 }
736     else
737     {
738 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
739 jfenwick 2496 }
740     m_readytype=lleft->m_readytype;
741     m_left=lleft;
742     m_samplesize=getNumDPPSample()*getNoValues();
743     m_children=m_left->m_children+1;
744     m_height=m_left->m_height+1;
745 jfenwick 2500 LazyNodeSetup();
746 jfenwick 2496 SIZELIMIT
747     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748     }
749    
750 jfenwick 3035
751     namespace
752     {
753    
754     inline int max3(int a, int b, int c)
755     {
756 caltinay 5972 int t=(a>b?a:b);
757     return (t>c?t:c);
758 jfenwick 3035
759     }
760     }
761    
762     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
764     m_op(CONDEVAL),
765     m_axis_offset(0),
766     m_transpose(0),
767     m_tol(0)
768 jfenwick 3035 {
769    
770     DataLazy_ptr lmask;
771     DataLazy_ptr lleft;
772     DataLazy_ptr lright;
773     if (!mask->isLazy())
774     {
775 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
776 jfenwick 3035 }
777     else
778     {
779 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
780 jfenwick 3035 }
781     if (!left->isLazy())
782     {
783 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
784 jfenwick 3035 }
785     else
786     {
787 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
788 jfenwick 3035 }
789     if (!right->isLazy())
790     {
791 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
792 jfenwick 3035 }
793     else
794     {
795 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
796 jfenwick 3035 }
797     m_readytype=lmask->m_readytype;
798     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799     {
800 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
801 jfenwick 3035 }
802     m_left=lleft;
803     m_right=lright;
804     m_mask=lmask;
805     m_samplesize=getNumDPPSample()*getNoValues();
806     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808     LazyNodeSetup();
809     SIZELIMIT
810     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811     }
812    
813    
814    
815 jfenwick 1865 DataLazy::~DataLazy()
816     {
817 jfenwick 2500 delete[] m_sampleids;
818 jfenwick 1865 }
819    
820 jfenwick 1879
821 jfenwick 1899 /*
822     \brief Evaluates the expression using methods on Data.
823     This does the work for the collapse method.
824     For reasons of efficiency do not call this method on DataExpanded nodes.
825     */
826 jfenwick 1889 DataReady_ptr
827 jfenwick 4621 DataLazy::collapseToReady() const
828 jfenwick 1889 {
829     if (m_readytype=='E')
830 caltinay 5972 { // this is more an efficiency concern than anything else
831 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
832     }
833     if (m_op==IDENTITY)
834     {
835     return m_id;
836     }
837     DataReady_ptr pleft=m_left->collapseToReady();
838     Data left(pleft);
839     Data right;
840 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
841 jfenwick 1889 {
842     right=Data(m_right->collapseToReady());
843     }
844     Data result;
845     switch(m_op)
846     {
847     case ADD:
848 caltinay 5972 result=left+right;
849     break;
850     case SUB:
851     result=left-right;
852     break;
853     case MUL:
854     result=left*right;
855     break;
856     case DIV:
857     result=left/right;
858     break;
859 jfenwick 1889 case SIN:
860 caltinay 5972 result=left.sin();
861     break;
862 jfenwick 1889 case COS:
863 caltinay 5972 result=left.cos();
864     break;
865 jfenwick 1889 case TAN:
866 caltinay 5972 result=left.tan();
867     break;
868 jfenwick 1889 case ASIN:
869 caltinay 5972 result=left.asin();
870     break;
871 jfenwick 1889 case ACOS:
872 caltinay 5972 result=left.acos();
873     break;
874 jfenwick 1889 case ATAN:
875 caltinay 5972 result=left.atan();
876     break;
877 jfenwick 1889 case SINH:
878 caltinay 5972 result=left.sinh();
879     break;
880 jfenwick 1889 case COSH:
881 caltinay 5972 result=left.cosh();
882     break;
883 jfenwick 1889 case TANH:
884 caltinay 5972 result=left.tanh();
885     break;
886 jfenwick 1889 case ERF:
887 caltinay 5972 result=left.erf();
888     break;
889 jfenwick 1889 case ASINH:
890 caltinay 5972 result=left.asinh();
891     break;
892 jfenwick 1889 case ACOSH:
893 caltinay 5972 result=left.acosh();
894     break;
895 jfenwick 1889 case ATANH:
896 caltinay 5972 result=left.atanh();
897     break;
898 jfenwick 1889 case LOG10:
899 caltinay 5972 result=left.log10();
900     break;
901 jfenwick 1889 case LOG:
902 caltinay 5972 result=left.log();
903     break;
904 jfenwick 1889 case SIGN:
905 caltinay 5972 result=left.sign();
906     break;
907 jfenwick 1889 case ABS:
908 caltinay 5972 result=left.abs();
909     break;
910 jfenwick 1889 case NEG:
911 caltinay 5972 result=left.neg();
912     break;
913 jfenwick 1889 case POS:
914 caltinay 5972 // it doesn't mean anything for delayed.
915     // it will just trigger a deep copy of the lazy object
916     throw DataException("Programmer error - POS not supported for lazy data.");
917     break;
918 jfenwick 1889 case EXP:
919 caltinay 5972 result=left.exp();
920     break;
921 jfenwick 1889 case SQRT:
922 caltinay 5972 result=left.sqrt();
923     break;
924 jfenwick 1889 case RECIP:
925 caltinay 5972 result=left.oneOver();
926     break;
927 jfenwick 1889 case GZ:
928 caltinay 5972 result=left.wherePositive();
929     break;
930 jfenwick 1889 case LZ:
931 caltinay 5972 result=left.whereNegative();
932     break;
933 jfenwick 1889 case GEZ:
934 caltinay 5972 result=left.whereNonNegative();
935     break;
936 jfenwick 1889 case LEZ:
937 caltinay 5972 result=left.whereNonPositive();
938     break;
939 jfenwick 2147 case NEZ:
940 caltinay 5972 result=left.whereNonZero(m_tol);
941     break;
942 jfenwick 2147 case EZ:
943 caltinay 5972 result=left.whereZero(m_tol);
944     break;
945 jfenwick 2037 case SYM:
946 caltinay 5972 result=left.symmetric();
947     break;
948 jfenwick 2037 case NSYM:
949 jfenwick 6110 result=left.antisymmetric();
950 caltinay 5972 break;
951 jfenwick 2066 case PROD:
952 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953     break;
954 jfenwick 2084 case TRANS:
955 caltinay 5972 result=left.transpose(m_axis_offset);
956     break;
957 jfenwick 2084 case TRACE:
958 caltinay 5972 result=left.trace(m_axis_offset);
959     break;
960 jfenwick 2496 case SWAP:
961 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
962     break;
963 jfenwick 2721 case MINVAL:
964 caltinay 5972 result=left.minval();
965     break;
966 jfenwick 2721 case MAXVAL:
967 caltinay 5972 result=left.minval();
968     break;
969 jfenwick 6125 case HER:
970     result=left.hermitian();
971     break;
972 jfenwick 1889 default:
973 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974 jfenwick 1889 }
975     return result.borrowReadyPtr();
976     }
977    
978 jfenwick 1899 /*
979     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
980     This method uses the original methods on the Data class to evaluate the expressions.
981     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
982     the purpose of using DataLazy in the first place).
983     */
984 jfenwick 1889 void
985 jfenwick 4621 DataLazy::collapse() const
986 jfenwick 1889 {
987     if (m_op==IDENTITY)
988     {
989 caltinay 5972 return;
990 jfenwick 1889 }
991     if (m_readytype=='E')
992 caltinay 5972 { // this is more an efficiency concern than anything else
993 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
994     }
995     m_id=collapseToReady();
996     m_op=IDENTITY;
997     }
998    
999 jfenwick 2500 // The result will be stored in m_samples
1000     // The return value is a pointer to the DataVector, offset is the offset within the return value
1001 jfenwick 5938 const DataTypes::RealVectorType*
1002 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1003 jfenwick 2500 {
1004     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1005 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1006 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1007     {
1008 caltinay 5972 collapse();
1009 jfenwick 2500 }
1010 caltinay 5972 if (m_op==IDENTITY)
1011 jfenwick 2500 {
1012 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1013 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1014 jfenwick 2777 #ifdef LAZY_STACK_PROF
1015     int x;
1016     if (&x<stackend[omp_get_thread_num()])
1017     {
1018     stackend[omp_get_thread_num()]=&x;
1019     }
1020     #endif
1021 jfenwick 2500 return &(vec);
1022     }
1023     if (m_readytype!='E')
1024     {
1025     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1026     }
1027 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1028     {
1029 caltinay 5972 roffset=tid*m_samplesize;
1030     return &(m_samples); // sample is already resolved
1031 jfenwick 2501 }
1032     m_sampleids[tid]=sampleNo;
1033 jfenwick 3035
1034 jfenwick 2500 switch (getOpgroup(m_op))
1035     {
1036     case G_UNARY:
1037     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1038     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1039     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1040     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1041     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1042     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1043 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1044 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1045 jfenwick 2500 default:
1046     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1047     }
1048     }
1049    
1050 jfenwick 5938 const DataTypes::RealVectorType*
1051 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1052 jfenwick 2500 {
1053 caltinay 5972 // we assume that any collapsing has been done before we get here
1054     // since we only have one argument we don't need to think about only
1055     // processing single points.
1056     // we will also know we won't get identity nodes
1057 jfenwick 2500 if (m_readytype!='E')
1058     {
1059     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1060     }
1061     if (m_op==IDENTITY)
1062     {
1063     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1064     }
1065 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1066 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1067     roffset=m_samplesize*tid;
1068     double* result=&(m_samples[roffset]);
1069 jfenwick 6066 escript::ESFunction operation=SINF;
1070 jfenwick 2500 switch (m_op)
1071     {
1072 jfenwick 6066 case SIN:
1073     operation=SINF;
1074     break;
1075 jfenwick 2500 case COS:
1076 jfenwick 6066 operation=COSF;
1077     break;
1078 jfenwick 2500 case TAN:
1079 jfenwick 6066 operation=TANF;
1080     break;
1081 jfenwick 2500 case ASIN:
1082 jfenwick 6066 operation=ASINF;
1083     break;
1084 jfenwick 2500 case ACOS:
1085 jfenwick 6066 operation=ACOSF;
1086     break;
1087 jfenwick 2500 case ATAN:
1088 jfenwick 6066 operation=ATANF;
1089     break;
1090 jfenwick 2500 case SINH:
1091 jfenwick 6066 operation=SINHF;
1092     break;
1093 jfenwick 2500 case COSH:
1094 jfenwick 6066 operation=COSHF;
1095     break;
1096 jfenwick 2500 case TANH:
1097 jfenwick 6066 operation=TANHF;
1098     break;
1099 jfenwick 2500 case ERF:
1100 jfenwick 6066 operation=ERFF;
1101     break;
1102 jfenwick 2500 case ASINH:
1103 jfenwick 6066 operation=ASINHF;
1104     break;
1105 jfenwick 2500 case ACOSH:
1106 jfenwick 6066 operation=ACOSHF;
1107     break;
1108 jfenwick 2500 case ATANH:
1109 jfenwick 6066 operation=ATANHF;
1110     break;
1111 jfenwick 2500 case LOG10:
1112 jfenwick 6066 operation=LOG10F;
1113     break;
1114 jfenwick 2500 case LOG:
1115 jfenwick 6066 operation=LOGF;
1116     break;
1117 jfenwick 2500 case SIGN:
1118 jfenwick 6066 operation=SIGNF;
1119     break;
1120 jfenwick 2500 case ABS:
1121 jfenwick 6066 operation=ABSF;
1122     break;
1123 jfenwick 2500 case NEG:
1124 jfenwick 6066 operation=NEGF;
1125     break;
1126 jfenwick 2500 case POS:
1127 caltinay 5972 // it doesn't mean anything for delayed.
1128     // it will just trigger a deep copy of the lazy object
1129     throw DataException("Programmer error - POS not supported for lazy data.");
1130     break;
1131 jfenwick 2500 case EXP:
1132 jfenwick 6066 operation=EXPF;
1133     break;
1134 jfenwick 2500 case SQRT:
1135 jfenwick 6066 operation=SQRTF;
1136     break;
1137 jfenwick 2500 case RECIP:
1138 jfenwick 6066 operation=INVF;
1139     break;
1140 jfenwick 2500 case GZ:
1141 jfenwick 6066 operation=GTZEROF;
1142     break;
1143 jfenwick 2500 case LZ:
1144 jfenwick 6066 operation=LTZEROF;
1145     break;
1146 jfenwick 2500 case GEZ:
1147 jfenwick 6066 operation=GEZEROF;
1148     break;
1149 jfenwick 2500 case LEZ:
1150 jfenwick 6066 operation=LEZEROF;
1151     break;
1152 jfenwick 2500 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1153     case NEZ:
1154 jfenwick 6066 operation=NEQZEROF;
1155     break;
1156 jfenwick 2500 case EZ:
1157 jfenwick 6066 operation=EQZEROF;
1158     break;
1159 jfenwick 2500 default:
1160 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1161 jfenwick 2500 }
1162 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1163     left,
1164     result,
1165     operation,
1166     m_tol);
1167 jfenwick 2500 return &(m_samples);
1168     }
1169    
1170    
1171 jfenwick 5938 const DataTypes::RealVectorType*
1172 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1173 jfenwick 2721 {
1174 caltinay 5972 // we assume that any collapsing has been done before we get here
1175     // since we only have one argument we don't need to think about only
1176     // processing single points.
1177     // we will also know we won't get identity nodes
1178 jfenwick 2721 if (m_readytype!='E')
1179     {
1180     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1181     }
1182     if (m_op==IDENTITY)
1183     {
1184     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1185     }
1186     size_t loffset=0;
1187 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1188 jfenwick 2721
1189     roffset=m_samplesize*tid;
1190 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1191 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1192 jfenwick 2721 double* result=&(m_samples[roffset]);
1193     switch (m_op)
1194     {
1195     case MINVAL:
1196 caltinay 5972 {
1197     for (unsigned int z=0;z<ndpps;++z)
1198     {
1199     FMin op;
1200 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1201 caltinay 5972 loffset+=psize;
1202     result++;
1203     }
1204     }
1205     break;
1206 jfenwick 2721 case MAXVAL:
1207 caltinay 5972 {
1208     for (unsigned int z=0;z<ndpps;++z)
1209     {
1210     FMax op;
1211 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1212 caltinay 5972 loffset+=psize;
1213     result++;
1214     }
1215     }
1216     break;
1217 jfenwick 2721 default:
1218 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1219 jfenwick 2721 }
1220     return &(m_samples);
1221     }
1222    
1223 jfenwick 5938 const DataTypes::RealVectorType*
1224 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1225 jfenwick 2500 {
1226 caltinay 5972 // we assume that any collapsing has been done before we get here
1227     // since we only have one argument we don't need to think about only
1228     // processing single points.
1229 jfenwick 2500 if (m_readytype!='E')
1230     {
1231     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1232     }
1233     if (m_op==IDENTITY)
1234     {
1235     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1236     }
1237     size_t subroffset;
1238 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1239 jfenwick 2500 roffset=m_samplesize*tid;
1240     size_t loop=0;
1241     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1242     size_t step=getNoValues();
1243     size_t offset=roffset;
1244     switch (m_op)
1245     {
1246     case SYM:
1247 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1248     {
1249 jfenwick 6083 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1250 caltinay 5972 subroffset+=step;
1251     offset+=step;
1252     }
1253     break;
1254 jfenwick 2500 case NSYM:
1255 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1256     {
1257 jfenwick 6114 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1258 caltinay 5972 subroffset+=step;
1259     offset+=step;
1260     }
1261     break;
1262 jfenwick 2500 default:
1263 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1264 jfenwick 2500 }
1265     return &m_samples;
1266     }
1267    
1268 jfenwick 5938 const DataTypes::RealVectorType*
1269 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1270 jfenwick 2500 {
1271 caltinay 5972 // we assume that any collapsing has been done before we get here
1272     // since we only have one argument we don't need to think about only
1273     // processing single points.
1274 jfenwick 2500 if (m_readytype!='E')
1275     {
1276     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1277     }
1278     if (m_op==IDENTITY)
1279     {
1280     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1281     }
1282     size_t subroffset;
1283     size_t offset;
1284 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1285 jfenwick 2500 roffset=m_samplesize*tid;
1286     offset=roffset;
1287     size_t loop=0;
1288     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1289     size_t outstep=getNoValues();
1290     size_t instep=m_left->getNoValues();
1291     switch (m_op)
1292     {
1293     case TRACE:
1294 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1295     {
1296 jfenwick 6083 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1297 caltinay 5972 subroffset+=instep;
1298     offset+=outstep;
1299     }
1300     break;
1301 jfenwick 2500 case TRANS:
1302 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1303     {
1304 jfenwick 6083 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1305 caltinay 5972 subroffset+=instep;
1306     offset+=outstep;
1307     }
1308     break;
1309 jfenwick 2500 default:
1310 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1311 jfenwick 2500 }
1312     return &m_samples;
1313     }
1314    
1315    
1316 jfenwick 5938 const DataTypes::RealVectorType*
1317 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1318 jfenwick 2500 {
1319     if (m_readytype!='E')
1320     {
1321     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1322     }
1323     if (m_op==IDENTITY)
1324     {
1325     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1326     }
1327     size_t subroffset;
1328     size_t offset;
1329 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1330 jfenwick 2500 roffset=m_samplesize*tid;
1331     offset=roffset;
1332     size_t loop=0;
1333     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1334     size_t outstep=getNoValues();
1335     size_t instep=m_left->getNoValues();
1336     switch (m_op)
1337     {
1338     case SWAP:
1339 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1340     {
1341 jfenwick 6083 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1342 caltinay 5972 subroffset+=instep;
1343     offset+=outstep;
1344     }
1345     break;
1346 jfenwick 2500 default:
1347 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1348 jfenwick 2500 }
1349     return &m_samples;
1350     }
1351    
1352 jfenwick 5938 const DataTypes::RealVectorType*
1353 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1354 jfenwick 3035 {
1355     if (m_readytype!='E')
1356     {
1357     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1358     }
1359     if (m_op!=CONDEVAL)
1360     {
1361     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1362     }
1363     size_t subroffset;
1364 jfenwick 2500
1365 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1366     const RealVectorType* srcres=0;
1367 jfenwick 3035 if ((*maskres)[subroffset]>0)
1368     {
1369 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1370 jfenwick 3035 }
1371     else
1372     {
1373 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1374 jfenwick 3035 }
1375    
1376     // Now we need to copy the result
1377    
1378     roffset=m_samplesize*tid;
1379     for (int i=0;i<m_samplesize;++i)
1380     {
1381 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1382 jfenwick 3035 }
1383    
1384     return &m_samples;
1385     }
1386    
1387 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1388     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1389     // If both children are expanded, then we can process them in a single operation (we treat
1390     // the whole sample as one big datapoint.
1391     // If one of the children is not expanded, then we need to treat each point in the sample
1392     // individually.
1393     // There is an additional complication when scalar operations are considered.
1394     // For example, 2+Vector.
1395     // In this case each double within the point is treated individually
1396 jfenwick 5938 const DataTypes::RealVectorType*
1397 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1398 jfenwick 2500 {
1399     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1400    
1401 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1402     // first work out which of the children are expanded
1403 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1404     bool rightExp=(m_right->m_readytype=='E');
1405     if (!leftExp && !rightExp)
1406     {
1407 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1408 jfenwick 2500 }
1409     bool leftScalar=(m_left->getRank()==0);
1410     bool rightScalar=(m_right->getRank()==0);
1411     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1412     {
1413 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1414 jfenwick 2500 }
1415     size_t leftsize=m_left->getNoValues();
1416     size_t rightsize=m_right->getNoValues();
1417 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1418     int leftstep=0; // how far should the left offset advance after each step
1419 jfenwick 2500 int rightstep=0;
1420 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1421     int oleftstep=0; // the o variables refer to the outer loop
1422     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1423 jfenwick 2500 int onumsteps=1;
1424    
1425 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1426 jfenwick 2500 bool RES=(rightExp && rightScalar);
1427 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1428 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1429 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1430 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1431 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1432 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1433    
1434 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1435 jfenwick 2500 {
1436 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1437     leftstep=0;
1438     rightstep=0;
1439     numsteps=1;
1440 jfenwick 2500 }
1441     else if (LES || RES)
1442     {
1443 caltinay 5972 chunksize=1;
1444     if (LES) // left is an expanded scalar
1445     {
1446     if (RS)
1447     {
1448     leftstep=1;
1449     rightstep=0;
1450     numsteps=m_left->getNumDPPSample();
1451     }
1452     else // RN or REN
1453     {
1454     leftstep=0;
1455     oleftstep=1;
1456     rightstep=1;
1457     orightstep=(RN ? -(int)rightsize : 0);
1458     numsteps=rightsize;
1459     onumsteps=m_left->getNumDPPSample();
1460     }
1461     }
1462     else // right is an expanded scalar
1463     {
1464     if (LS)
1465     {
1466     rightstep=1;
1467     leftstep=0;
1468     numsteps=m_right->getNumDPPSample();
1469     }
1470     else
1471     {
1472     rightstep=0;
1473     orightstep=1;
1474     leftstep=1;
1475     oleftstep=(LN ? -(int)leftsize : 0);
1476     numsteps=leftsize;
1477     onumsteps=m_right->getNumDPPSample();
1478     }
1479     }
1480 jfenwick 2500 }
1481 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1482 jfenwick 2500 {
1483 caltinay 5972 if (LEN) // and Right will be a single value
1484     {
1485     chunksize=rightsize;
1486     leftstep=rightsize;
1487     rightstep=0;
1488     numsteps=m_left->getNumDPPSample();
1489     if (RS)
1490     {
1491     numsteps*=leftsize;
1492     }
1493     }
1494     else // REN
1495     {
1496     chunksize=leftsize;
1497     rightstep=leftsize;
1498     leftstep=0;
1499     numsteps=m_right->getNumDPPSample();
1500     if (LS)
1501     {
1502     numsteps*=rightsize;
1503     }
1504     }
1505 jfenwick 2500 }
1506    
1507 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1508     // Get the values of sub-expressions
1509 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1510     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1511 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1512     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1513     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1514     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1515     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1516     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1517     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1518    
1519     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1520     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1521    
1522    
1523     roffset=m_samplesize*tid;
1524 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1525 jfenwick 2500 switch(m_op)
1526     {
1527     case ADD:
1528 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1529 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1530 jfenwick 6042 &(*left)[0],
1531     &(*right)[0],
1532     chunksize,
1533     onumsteps,
1534     numsteps,
1535     resultStep,
1536     leftstep,
1537     rightstep,
1538     oleftstep,
1539     orightstep,
1540 jfenwick 6056 lroffset,
1541     rroffset,
1542 jfenwick 6042 escript::ESFunction::PLUSF);
1543 caltinay 5972 break;
1544 jfenwick 2500 case SUB:
1545 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1546 jfenwick 6057 &(*left)[0],
1547     &(*right)[0],
1548     chunksize,
1549     onumsteps,
1550     numsteps,
1551     resultStep,
1552     leftstep,
1553     rightstep,
1554     oleftstep,
1555     orightstep,
1556     lroffset,
1557     rroffset,
1558     escript::ESFunction::MINUSF);
1559     //PROC_OP(NO_ARG,minus<double>());
1560 caltinay 5972 break;
1561 jfenwick 2500 case MUL:
1562 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1563 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1564 jfenwick 6057 &(*left)[0],
1565     &(*right)[0],
1566     chunksize,
1567     onumsteps,
1568     numsteps,
1569     resultStep,
1570     leftstep,
1571     rightstep,
1572     oleftstep,
1573     orightstep,
1574     lroffset,
1575     rroffset,
1576     escript::ESFunction::MULTIPLIESF);
1577 caltinay 5972 break;
1578 jfenwick 2500 case DIV:
1579 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1580 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1581 jfenwick 6057 &(*left)[0],
1582     &(*right)[0],
1583     chunksize,
1584     onumsteps,
1585     numsteps,
1586     resultStep,
1587     leftstep,
1588     rightstep,
1589     oleftstep,
1590     orightstep,
1591     lroffset,
1592     rroffset,
1593     escript::ESFunction::DIVIDESF);
1594 caltinay 5972 break;
1595 jfenwick 2500 case POW:
1596 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1597 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1598 jfenwick 6057 &(*left)[0],
1599     &(*right)[0],
1600     chunksize,
1601     onumsteps,
1602     numsteps,
1603     resultStep,
1604     leftstep,
1605     rightstep,
1606     oleftstep,
1607     orightstep,
1608     lroffset,
1609     rroffset,
1610     escript::ESFunction::POWF);
1611 caltinay 5972 break;
1612 jfenwick 2500 default:
1613 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1614 jfenwick 2500 }
1615     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1616     return &m_samples;
1617     }
1618    
1619    
1620     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1621     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1622     // unlike the other resolve helpers, we must treat these datapoints separately.
1623 jfenwick 5938 const DataTypes::RealVectorType*
1624 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1625 jfenwick 2500 {
1626     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1627    
1628 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1629     // first work out which of the children are expanded
1630 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1631     bool rightExp=(m_right->m_readytype=='E');
1632     int steps=getNumDPPSample();
1633 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1634 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1635    
1636     int resultStep=getNoValues();
1637     roffset=m_samplesize*tid;
1638     size_t offset=roffset;
1639    
1640 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1641 jfenwick 2500
1642 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1643 jfenwick 2500
1644     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1645     cout << getNoValues() << endl;)
1646    
1647    
1648     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1649     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1650     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1651     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1652     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1653     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1654     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1655    
1656 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1657 jfenwick 2500 switch(m_op)
1658     {
1659     case PROD:
1660 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1661     {
1662     const double *ptr_0 = &((*left)[lroffset]);
1663     const double *ptr_1 = &((*right)[rroffset]);
1664 jfenwick 2500
1665     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1666     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1667    
1668 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1669 jfenwick 2500
1670 caltinay 5972 lroffset+=leftStep;
1671     rroffset+=rightStep;
1672     }
1673     break;
1674 jfenwick 2500 default:
1675 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1676 jfenwick 2500 }
1677     roffset=offset;
1678     return &m_samples;
1679     }
1680    
1681 jfenwick 1898
1682 jfenwick 5938 const DataTypes::RealVectorType*
1683 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1684 jfenwick 1879 {
1685 jfenwick 2271 #ifdef _OPENMP
1686 caltinay 5972 int tid=omp_get_thread_num();
1687 jfenwick 2271 #else
1688 caltinay 5972 int tid=0;
1689 jfenwick 2271 #endif
1690 jfenwick 2777
1691     #ifdef LAZY_STACK_PROF
1692 caltinay 5972 stackstart[tid]=&tid;
1693     stackend[tid]=&tid;
1694     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1695     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1696     #pragma omp critical
1697     if (d>maxstackuse)
1698     {
1699 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1700 caltinay 5972 maxstackuse=d;
1701     }
1702     return r;
1703 jfenwick 2777 #else
1704 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1705 jfenwick 2777 #endif
1706 jfenwick 2271 }
1707    
1708    
1709 jfenwick 2497 // This needs to do the work of the identity constructor
1710 jfenwick 2177 void
1711     DataLazy::resolveToIdentity()
1712     {
1713     if (m_op==IDENTITY)
1714 caltinay 5972 return;
1715 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1716 jfenwick 2177 makeIdentity(p);
1717     }
1718 jfenwick 1889
1719 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1720     {
1721     m_op=IDENTITY;
1722     m_axis_offset=0;
1723     m_transpose=0;
1724     m_SL=m_SM=m_SR=0;
1725     m_children=m_height=0;
1726     m_id=p;
1727     if(p->isConstant()) {m_readytype='C';}
1728     else if(p->isExpanded()) {m_readytype='E';}
1729     else if (p->isTagged()) {m_readytype='T';}
1730     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1731     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1732     m_left.reset();
1733     m_right.reset();
1734     }
1735    
1736 jfenwick 2497
1737     DataReady_ptr
1738     DataLazy::resolve()
1739     {
1740     resolveToIdentity();
1741     return m_id;
1742     }
1743    
1744 jfenwick 2799
1745     /* This is really a static method but I think that caused problems in windows */
1746     void
1747     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1748     {
1749     if (dats.empty())
1750     {
1751 caltinay 5972 return;
1752 jfenwick 2799 }
1753     vector<DataLazy*> work;
1754     FunctionSpace fs=dats[0]->getFunctionSpace();
1755     bool match=true;
1756 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1757 jfenwick 2799 {
1758 caltinay 5972 if (dats[i]->m_readytype!='E')
1759     {
1760     dats[i]->collapse();
1761     }
1762     if (dats[i]->m_op!=IDENTITY)
1763     {
1764     work.push_back(dats[i]);
1765     if (fs!=dats[i]->getFunctionSpace())
1766     {
1767     match=false;
1768     }
1769     }
1770 jfenwick 2799 }
1771     if (work.empty())
1772     {
1773 caltinay 5972 return; // no work to do
1774 jfenwick 2799 }
1775 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1776     { // it is possible that dats[0] is one of the objects which we discarded and
1777     // all the other functionspaces match.
1778     vector<DataExpanded*> dep;
1779 jfenwick 6042 vector<RealVectorType*> vecs;
1780 caltinay 5972 for (int i=0;i<work.size();++i)
1781     {
1782 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1783 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1784     }
1785     int totalsamples=work[0]->getNumSamples();
1786 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1787 caltinay 5972 int sample;
1788     #pragma omp parallel private(sample, res)
1789     {
1790     size_t roffset=0;
1791     #pragma omp for schedule(static)
1792     for (sample=0;sample<totalsamples;++sample)
1793     {
1794     roffset=0;
1795     int j;
1796     for (j=work.size()-1;j>=0;--j)
1797     {
1798 jfenwick 2799 #ifdef _OPENMP
1799 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1800 jfenwick 2799 #else
1801 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1802 jfenwick 2799 #endif
1803 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1804     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1805     }
1806     }
1807     }
1808     // Now we need to load the new results as identity ops into the lazy nodes
1809     for (int i=work.size()-1;i>=0;--i)
1810     {
1811 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1812 caltinay 5972 }
1813 jfenwick 2799 }
1814 caltinay 5972 else // functionspaces do not match
1815 jfenwick 2799 {
1816 caltinay 5972 for (int i=0;i<work.size();++i)
1817     {
1818     work[i]->resolveToIdentity();
1819     }
1820 jfenwick 2799 }
1821     }
1822    
1823    
1824    
1825 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1826     DataReady_ptr
1827     DataLazy::resolveNodeWorker()
1828     {
1829 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1830 jfenwick 2500 {
1831     collapse();
1832     }
1833 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1834 jfenwick 2500 {
1835     return m_id;
1836     }
1837 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1838 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1839     RealVectorType& resvec=result->getVectorRW();
1840 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1841    
1842     int sample;
1843     int totalsamples=getNumSamples();
1844 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1845 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1846 jfenwick 2777 #pragma omp parallel private(sample,res)
1847 jfenwick 2500 {
1848 caltinay 5972 size_t roffset=0;
1849 jfenwick 2777 #ifdef LAZY_STACK_PROF
1850 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1851     stackend[omp_get_thread_num()]=&roffset;
1852 jfenwick 2777 #endif
1853 caltinay 5972 #pragma omp for schedule(static)
1854     for (sample=0;sample<totalsamples;++sample)
1855     {
1856     roffset=0;
1857 jfenwick 2500 #ifdef _OPENMP
1858 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1859 jfenwick 2500 #else
1860 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1861 jfenwick 2500 #endif
1862     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1863     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1864 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1865     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1866     }
1867 jfenwick 2500 }
1868 jfenwick 2777 #ifdef LAZY_STACK_PROF
1869     for (int i=0;i<getNumberOfThreads();++i)
1870     {
1871 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1872     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1873     if (r>maxstackuse)
1874     {
1875     maxstackuse=r;
1876     }
1877 jfenwick 2777 }
1878     cout << "Max resolve Stack use=" << maxstackuse << endl;
1879     #endif
1880 jfenwick 2500 return resptr;
1881     }
1882    
1883 jfenwick 1865 std::string
1884     DataLazy::toString() const
1885     {
1886 jfenwick 1886 ostringstream oss;
1887 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1888 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1889 jfenwick 2737 {
1890 caltinay 5972 case 1: // tree format
1891     oss << endl;
1892     intoTreeString(oss,"");
1893     break;
1894     case 2: // just the depth
1895     break;
1896 jfenwick 2795 default:
1897 caltinay 5972 intoString(oss);
1898     break;
1899 jfenwick 2737 }
1900 jfenwick 1886 return oss.str();
1901 jfenwick 1865 }
1902    
1903 jfenwick 1899
1904 jfenwick 1886 void
1905     DataLazy::intoString(ostringstream& oss) const
1906     {
1907 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1908 jfenwick 1886 switch (getOpgroup(m_op))
1909     {
1910 jfenwick 1889 case G_IDENTITY:
1911 caltinay 5972 if (m_id->isExpanded())
1912     {
1913     oss << "E";
1914     }
1915     else if (m_id->isTagged())
1916     {
1917     oss << "T";
1918     }
1919     else if (m_id->isConstant())
1920     {
1921     oss << "C";
1922     }
1923     else
1924     {
1925     oss << "?";
1926     }
1927     oss << '@' << m_id.get();
1928     break;
1929 jfenwick 1886 case G_BINARY:
1930 caltinay 5972 oss << '(';
1931     m_left->intoString(oss);
1932     oss << ' ' << opToString(m_op) << ' ';
1933     m_right->intoString(oss);
1934     oss << ')';
1935     break;
1936 jfenwick 1886 case G_UNARY:
1937 jfenwick 2147 case G_UNARY_P:
1938 jfenwick 2037 case G_NP1OUT:
1939 jfenwick 2084 case G_NP1OUT_P:
1940 jfenwick 2721 case G_REDUCTION:
1941 caltinay 5972 oss << opToString(m_op) << '(';
1942     m_left->intoString(oss);
1943     oss << ')';
1944     break;
1945 jfenwick 2066 case G_TENSORPROD:
1946 caltinay 5972 oss << opToString(m_op) << '(';
1947     m_left->intoString(oss);
1948     oss << ", ";
1949     m_right->intoString(oss);
1950     oss << ')';
1951     break;
1952 jfenwick 2496 case G_NP1OUT_2P:
1953 caltinay 5972 oss << opToString(m_op) << '(';
1954     m_left->intoString(oss);
1955     oss << ", " << m_axis_offset << ", " << m_transpose;
1956     oss << ')';
1957     break;
1958 jfenwick 3035 case G_CONDEVAL:
1959 caltinay 5972 oss << opToString(m_op)<< '(' ;
1960     m_mask->intoString(oss);
1961     oss << " ? ";
1962     m_left->intoString(oss);
1963     oss << " : ";
1964     m_right->intoString(oss);
1965     oss << ')';
1966     break;
1967 jfenwick 1886 default:
1968 caltinay 5972 oss << "UNKNOWN";
1969 jfenwick 1886 }
1970     }
1971    
1972 jfenwick 2737
1973     void
1974     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1975     {
1976     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1977     switch (getOpgroup(m_op))
1978     {
1979     case G_IDENTITY:
1980 caltinay 5972 if (m_id->isExpanded())
1981     {
1982     oss << "E";
1983     }
1984     else if (m_id->isTagged())
1985     {
1986     oss << "T";
1987     }
1988     else if (m_id->isConstant())
1989     {
1990     oss << "C";
1991     }
1992     else
1993     {
1994     oss << "?";
1995     }
1996     oss << '@' << m_id.get() << endl;
1997     break;
1998 jfenwick 2737 case G_BINARY:
1999 caltinay 5972 oss << opToString(m_op) << endl;
2000     indent+='.';
2001     m_left->intoTreeString(oss, indent);
2002     m_right->intoTreeString(oss, indent);
2003     break;
2004 jfenwick 2737 case G_UNARY:
2005     case G_UNARY_P:
2006     case G_NP1OUT:
2007     case G_NP1OUT_P:
2008     case G_REDUCTION:
2009 caltinay 5972 oss << opToString(m_op) << endl;
2010     indent+='.';
2011     m_left->intoTreeString(oss, indent);
2012     break;
2013 jfenwick 2737 case G_TENSORPROD:
2014 caltinay 5972 oss << opToString(m_op) << endl;
2015     indent+='.';
2016     m_left->intoTreeString(oss, indent);
2017     m_right->intoTreeString(oss, indent);
2018     break;
2019 jfenwick 2737 case G_NP1OUT_2P:
2020 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2021     indent+='.';
2022     m_left->intoTreeString(oss, indent);
2023     break;
2024 jfenwick 2737 default:
2025 caltinay 5972 oss << "UNKNOWN";
2026 jfenwick 2737 }
2027     }
2028    
2029    
2030 jfenwick 1865 DataAbstract*
2031 jfenwick 5938 DataLazy::deepCopy() const
2032 jfenwick 1865 {
2033 jfenwick 1935 switch (getOpgroup(m_op))
2034 jfenwick 1865 {
2035 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
2036 caltinay 5972 case G_UNARY:
2037 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2038 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2039     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2040 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2041     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2042 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
2043     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2044 jfenwick 1935 default:
2045 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2046 jfenwick 1865 }
2047     }
2048    
2049    
2050 jfenwick 2721
2051 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
2052     // Instances of DataReady can look at the size of their vectors.
2053     // For lazy though, it could be the size the data would be if it were resolved;
2054     // or it could be some function of the lengths of the DataReady instances which
2055     // form part of the expression.
2056     // Rather than have people making assumptions, I have disabled the method.
2057 jfenwick 5938 DataTypes::RealVectorType::size_type
2058 jfenwick 1865 DataLazy::getLength() const
2059     {
2060 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
2061 jfenwick 1865 }
2062    
2063    
2064     DataAbstract*
2065     DataLazy::getSlice(const DataTypes::RegionType& region) const
2066     {
2067 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
2068 jfenwick 1865 }
2069    
2070 jfenwick 1935
2071     // To do this we need to rely on our child nodes
2072 jfenwick 5938 DataTypes::RealVectorType::size_type
2073 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
2074 jfenwick 1935 int dataPointNo)
2075     {
2076     if (m_op==IDENTITY)
2077     {
2078 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2079 jfenwick 1935 }
2080     if (m_readytype!='E')
2081     {
2082 caltinay 5972 collapse();
2083     return m_id->getPointOffset(sampleNo,dataPointNo);
2084 jfenwick 1935 }
2085     // at this point we do not have an identity node and the expression will be Expanded
2086     // so we only need to know which child to ask
2087     if (m_left->m_readytype=='E')
2088     {
2089 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2090 jfenwick 1935 }
2091     else
2092     {
2093 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2094 jfenwick 1935 }
2095     }
2096    
2097     // To do this we need to rely on our child nodes
2098 jfenwick 5938 DataTypes::RealVectorType::size_type
2099 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
2100 jfenwick 1865 int dataPointNo) const
2101     {
2102 jfenwick 1935 if (m_op==IDENTITY)
2103     {
2104 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
2105 jfenwick 1935 }
2106     if (m_readytype=='E')
2107     {
2108     // at this point we do not have an identity node and the expression will be Expanded
2109     // so we only need to know which child to ask
2110     if (m_left->m_readytype=='E')
2111     {
2112 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
2113 jfenwick 1935 }
2114     else
2115     {
2116 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
2117 jfenwick 1935 }
2118     }
2119     if (m_readytype=='C')
2120     {
2121 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2122 jfenwick 1935 }
2123     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2124 jfenwick 1865 }
2125    
2126 jfenwick 2271
2127     // I have decided to let Data:: handle this issue.
2128 jfenwick 1901 void
2129     DataLazy::setToZero()
2130     {
2131 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
2132 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2133     // m_op=IDENTITY;
2134     // m_right.reset();
2135     // m_left.reset();
2136     // m_readytype='C';
2137     // m_buffsRequired=1;
2138    
2139 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2140 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2141 jfenwick 1901 }
2142    
2143 jfenwick 2271 bool
2144     DataLazy::actsExpanded() const
2145     {
2146 caltinay 5972 return (m_readytype=='E');
2147 jfenwick 2271 }
2148    
2149 caltinay 5997 } // end namespace
2150 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26