/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2721 - (hide annotations)
Fri Oct 16 05:40:12 2009 UTC (9 years, 5 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 85637 byte(s)
minval and maxval are now lazy operations (they weren't before).
Whether or not Lsup, sup and inf resolve their arguments before computing answers is controlled by the escriptParam 'RESOLVE_COLLECTIVE'.
Note: integrate() still forces a resolve.

Added some unit tests for operations which weren't tested before.
Added deepcopy implementations for lazy operations which got missed somehow.
1 jfenwick 1865
2     /*******************************************************
3     *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 jfenwick 1865 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2472 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49 jfenwick 2501
50 jfenwick 1899 /*
51     How does DataLazy work?
52     ~~~~~~~~~~~~~~~~~~~~~~~
53    
54     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55     denoted left and right.
56    
57     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58     This means that all "internal" nodes in the structure are instances of DataLazy.
59    
60     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61     Note that IDENITY is not considered a unary operation.
62    
63     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64     It must however form a DAG (directed acyclic graph).
65     I will refer to individual DataLazy objects with the structure as nodes.
66    
67     Each node also stores:
68     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69     evaluated.
70     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71     evaluate the expression.
72     - m_samplesize ~ the number of doubles stored in a sample.
73    
74     When a new node is created, the above values are computed based on the values in the child nodes.
75     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76    
77     The resolve method, which produces a DataReady from a DataLazy, does the following:
78     1) Create a DataReady to hold the new result.
79     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81    
82     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83    
84     resolveSample returns a Vector* and an offset within that vector where the result is stored.
85     Normally, this would be v, but for identity nodes their internal vector is returned instead.
86    
87     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88    
89     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91 jfenwick 2037
92 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 jfenwick 2037 1) Add to the ES_optype.
94     2) determine what opgroup your operation belongs to (X)
95     3) add a string for the op to the end of ES_opstrings
96     4) increase ES_opcount
97     5) add an entry (X) to opgroups
98     6) add an entry to the switch in collapseToReady
99     7) add an entry to resolveX
100 jfenwick 1899 */
101    
102    
103 jfenwick 1865 using namespace std;
104 jfenwick 1868 using namespace boost;
105 jfenwick 1865
106     namespace escript
107     {
108    
109     namespace
110     {
111    
112 jfenwick 1886 enum ES_opgroup
113     {
114     G_UNKNOWN,
115     G_IDENTITY,
116 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
117 jfenwick 2037 G_UNARY, // pointwise operations with one argument
118 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
119 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
120 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
121 jfenwick 2496 G_TENSORPROD, // general tensor product
122 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
123     G_REDUCTION // non-pointwise unary op with a scalar output
124 jfenwick 1886 };
125    
126    
127    
128    
129 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130     "sin","cos","tan",
131 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
132     "asinh","acosh","atanh",
133 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
134 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135 jfenwick 2066 "symmetric","nonsymmetric",
136 jfenwick 2084 "prod",
137 jfenwick 2496 "transpose", "trace",
138 jfenwick 2721 "swapaxes",
139     "minval", "maxval"};
140     int ES_opcount=43;
141 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142     G_UNARY,G_UNARY,G_UNARY, //10
143     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
144     G_UNARY,G_UNARY,G_UNARY, // 20
145 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
146     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
147 jfenwick 2066 G_NP1OUT,G_NP1OUT,
148 jfenwick 2084 G_TENSORPROD,
149 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
150 jfenwick 2721 G_NP1OUT_2P,
151     G_REDUCTION, G_REDUCTION};
152 jfenwick 1886 inline
153     ES_opgroup
154     getOpgroup(ES_optype op)
155     {
156     return opgroups[op];
157     }
158    
159 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
160     FunctionSpace
161     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162     {
163     // perhaps this should call interpolate and throw or something?
164     // maybe we need an interpolate node -
165     // that way, if interpolate is required in any other op we can just throw a
166     // programming error exception.
167 jfenwick 1879
168 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
169     FunctionSpace r=right->getFunctionSpace();
170     if (l!=r)
171     {
172     if (r.probeInterpolation(l))
173     {
174     return l;
175     }
176     if (l.probeInterpolation(r))
177     {
178     return r;
179     }
180     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
181     }
182     return l;
183 jfenwick 1865 }
184    
185     // return the shape of the result of "left op right"
186 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
187 jfenwick 1865 DataTypes::ShapeType
188     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189     {
190 jfenwick 1879 if (left->getShape()!=right->getShape())
191     {
192 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
193 jfenwick 1908 {
194     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195     }
196 jfenwick 2721
197 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
198     {
199     return right->getShape();
200     }
201     if (right->getRank()==0)
202     {
203     return left->getShape();
204     }
205     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
206 jfenwick 1879 }
207     return left->getShape();
208 jfenwick 1865 }
209    
210 jfenwick 2084 // return the shape for "op left"
211    
212     DataTypes::ShapeType
213 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
214 jfenwick 2084 {
215     switch(op)
216     {
217     case TRANS:
218 jfenwick 2199 { // for the scoping of variables
219     const DataTypes::ShapeType& s=left->getShape();
220     DataTypes::ShapeType sh;
221     int rank=left->getRank();
222     if (axis_offset<0 || axis_offset>rank)
223     {
224     throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
225     }
226     for (int i=0; i<rank; i++)
227     {
228     int index = (axis_offset+i)%rank;
229     sh.push_back(s[index]); // Append to new shape
230     }
231     return sh;
232     }
233 jfenwick 2084 break;
234     case TRACE:
235 jfenwick 2199 {
236     int rank=left->getRank();
237     if (rank<2)
238     {
239     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240     }
241     if ((axis_offset>rank-2) || (axis_offset<0))
242     {
243     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244     }
245     if (rank==2)
246     {
247     return DataTypes::scalarShape;
248     }
249     else if (rank==3)
250     {
251     DataTypes::ShapeType sh;
252     if (axis_offset==0)
253     {
254     sh.push_back(left->getShape()[2]);
255     }
256     else // offset==1
257     {
258     sh.push_back(left->getShape()[0]);
259     }
260     return sh;
261     }
262     else if (rank==4)
263     {
264     DataTypes::ShapeType sh;
265     const DataTypes::ShapeType& s=left->getShape();
266     if (axis_offset==0)
267     {
268     sh.push_back(s[2]);
269     sh.push_back(s[3]);
270     }
271     else if (axis_offset==1)
272     {
273     sh.push_back(s[0]);
274     sh.push_back(s[3]);
275     }
276     else // offset==2
277     {
278     sh.push_back(s[0]);
279     sh.push_back(s[1]);
280     }
281     return sh;
282     }
283     else // unknown rank
284     {
285     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286     }
287     }
288 jfenwick 2084 break;
289     default:
290     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291     }
292     }
293    
294 jfenwick 2496 DataTypes::ShapeType
295     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296     {
297     // This code taken from the Data.cpp swapaxes() method
298     // Some of the checks are probably redundant here
299     int axis0_tmp,axis1_tmp;
300     const DataTypes::ShapeType& s=left->getShape();
301     DataTypes::ShapeType out_shape;
302     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304     int rank=left->getRank();
305     if (rank<2) {
306     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307     }
308     if (axis0<0 || axis0>rank-1) {
309     throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310     }
311     if (axis1<0 || axis1>rank-1) {
312     throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313     }
314     if (axis0 == axis1) {
315     throw DataException("Error - Data::swapaxes: axis indices must be different.");
316     }
317     if (axis0 > axis1) {
318     axis0_tmp=axis1;
319     axis1_tmp=axis0;
320     } else {
321     axis0_tmp=axis0;
322     axis1_tmp=axis1;
323     }
324     for (int i=0; i<rank; i++) {
325     if (i == axis0_tmp) {
326     out_shape.push_back(s[axis1_tmp]);
327     } else if (i == axis1_tmp) {
328     out_shape.push_back(s[axis0_tmp]);
329     } else {
330     out_shape.push_back(s[i]);
331     }
332     }
333     return out_shape;
334     }
335    
336    
337 jfenwick 2066 // determine the output shape for the general tensor product operation
338     // the additional parameters return information required later for the product
339     // the majority of this code is copy pasted from C_General_Tensor_Product
340     DataTypes::ShapeType
341     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
342 jfenwick 1865 {
343 jfenwick 2066
344     // Get rank and shape of inputs
345     int rank0 = left->getRank();
346     int rank1 = right->getRank();
347     const DataTypes::ShapeType& shape0 = left->getShape();
348     const DataTypes::ShapeType& shape1 = right->getShape();
349    
350     // Prepare for the loops of the product and verify compatibility of shapes
351     int start0=0, start1=0;
352     if (transpose == 0) {}
353     else if (transpose == 1) { start0 = axis_offset; }
354     else if (transpose == 2) { start1 = rank1-axis_offset; }
355     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
356    
357 jfenwick 2085 if (rank0<axis_offset)
358     {
359     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
360     }
361 jfenwick 2066
362     // Adjust the shapes for transpose
363     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
364     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
365     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
366     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
367    
368     // Prepare for the loops of the product
369     SL=1, SM=1, SR=1;
370     for (int i=0; i<rank0-axis_offset; i++) {
371     SL *= tmpShape0[i];
372     }
373     for (int i=rank0-axis_offset; i<rank0; i++) {
374     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
375     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
376     }
377     SM *= tmpShape0[i];
378     }
379     for (int i=axis_offset; i<rank1; i++) {
380     SR *= tmpShape1[i];
381     }
382    
383     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
384     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
385     { // block to limit the scope of out_index
386     int out_index=0;
387     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
388     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
389     }
390 jfenwick 2086
391     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
392     {
393     ostringstream os;
394     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
395     throw DataException(os.str());
396     }
397    
398 jfenwick 2066 return shape2;
399 jfenwick 1865 }
400    
401 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
402 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
403 jfenwick 2066 // The same goes for G_TENSORPROD
404 jfenwick 2271 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
405 jfenwick 2157 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
406     // multiple times
407 jfenwick 1879 int
408     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
409     {
410 jfenwick 1886 switch(getOpgroup(op))
411 jfenwick 1879 {
412 jfenwick 1886 case G_IDENTITY: return 1;
413 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414 jfenwick 2721 case G_REDUCTION:
415 jfenwick 2147 case G_UNARY:
416     case G_UNARY_P: return max(left->getBuffsRequired(),1);
417 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420 jfenwick 2496 case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421 jfenwick 1879 default:
422     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }
424     }
425    
426 jfenwick 1899
427 jfenwick 1865 } // end anonymous namespace
428    
429    
430 jfenwick 1899
431     // Return a string representing the operation
432 jfenwick 1865 const std::string&
433     opToString(ES_optype op)
434     {
435     if (op<0 || op>=ES_opcount)
436     {
437     op=UNKNOWNOP;
438     }
439     return ES_opstrings[op];
440     }
441    
442 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
443     void DataLazy::LazyNodeSetup()
444     {
445     #ifdef _OPENMP
446     int numthreads=omp_get_max_threads();
447     m_samples.resize(numthreads*m_samplesize);
448     m_sampleids=new int[numthreads];
449     for (int i=0;i<numthreads;++i)
450     {
451     m_sampleids[i]=-1;
452     }
453     #else
454     m_samples.resize(m_samplesize);
455     m_sampleids=new int[1];
456     m_sampleids[0]=-1;
457     #endif // _OPENMP
458     }
459     #endif // LAZY_NODE_STORAGE
460 jfenwick 1865
461 jfenwick 2500
462     // Creates an identity node
463 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
464 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
465 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
466     ,m_sampleids(0),
467     m_samples(1)
468     #endif
469 jfenwick 1865 {
470 jfenwick 1879 if (p->isLazy())
471     {
472     // I don't want identity of Lazy.
473     // Question: Why would that be so bad?
474     // Answer: We assume that the child of ID is something we can call getVector on
475     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
476     }
477     else
478     {
479 jfenwick 2271 p->makeLazyShared();
480 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481     makeIdentity(dr);
482 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483 jfenwick 1879 }
484 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
485 jfenwick 1865 }
486    
487 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489 jfenwick 2066 m_op(op),
490     m_axis_offset(0),
491     m_transpose(0),
492     m_SL(0), m_SM(0), m_SR(0)
493 jfenwick 1886 {
494 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495 jfenwick 1886 {
496     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }
498 jfenwick 2066
499 jfenwick 1886 DataLazy_ptr lleft;
500     if (!left->isLazy())
501     {
502     lleft=DataLazy_ptr(new DataLazy(left));
503     }
504     else
505     {
506     lleft=dynamic_pointer_cast<DataLazy>(left);
507     }
508 jfenwick 1889 m_readytype=lleft->m_readytype;
509 jfenwick 1886 m_left=lleft;
510 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
511 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
512 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513 jfenwick 2177 m_children=m_left->m_children+1;
514     m_height=m_left->m_height+1;
515 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
516     LazyNodeSetup();
517     #endif
518 jfenwick 2177 SIZELIMIT
519 jfenwick 1886 }
520    
521    
522 jfenwick 1943 // In this constructor we need to consider interpolation
523 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524     : parent(resultFS(left,right,op), resultShape(left,right,op)),
525 jfenwick 2066 m_op(op),
526     m_SL(0), m_SM(0), m_SR(0)
527 jfenwick 1879 {
528 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
530 jfenwick 1886 {
531 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532 jfenwick 1886 }
533 jfenwick 1943
534     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
535     {
536     FunctionSpace fs=getFunctionSpace();
537     Data ltemp(left);
538     Data tmp(ltemp,fs);
539     left=tmp.borrowDataPtr();
540     }
541 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
542 jfenwick 1943 {
543     Data tmp(Data(right),getFunctionSpace());
544     right=tmp.borrowDataPtr();
545 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546 jfenwick 1943 }
547     left->operandCheck(*right);
548    
549 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
550 jfenwick 1879 {
551     m_left=dynamic_pointer_cast<DataLazy>(left);
552 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553 jfenwick 1879 }
554     else
555     {
556     m_left=DataLazy_ptr(new DataLazy(left));
557 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558 jfenwick 1879 }
559     if (right->isLazy())
560     {
561     m_right=dynamic_pointer_cast<DataLazy>(right);
562 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563 jfenwick 1879 }
564     else
565     {
566     m_right=DataLazy_ptr(new DataLazy(right));
567 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568 jfenwick 1879 }
569 jfenwick 1889 char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;
571     if (lt=='E' || rt=='E')
572     {
573     m_readytype='E';
574     }
575     else if (lt=='T' || rt=='T')
576     {
577     m_readytype='T';
578     }
579     else
580     {
581     m_readytype='C';
582     }
583 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
584     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
585 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;
588 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
589     LazyNodeSetup();
590     #endif
591 jfenwick 2177 SIZELIMIT
592 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593 jfenwick 1879 }
594    
595 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597     m_op(op),
598     m_axis_offset(axis_offset),
599     m_transpose(transpose)
600     {
601     if ((getOpgroup(op)!=G_TENSORPROD))
602     {
603     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604     }
605     if ((transpose>2) || (transpose<0))
606     {
607     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608     }
609     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
610     {
611     FunctionSpace fs=getFunctionSpace();
612     Data ltemp(left);
613     Data tmp(ltemp,fs);
614     left=tmp.borrowDataPtr();
615     }
616     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
617     {
618     Data tmp(Data(right),getFunctionSpace());
619     right=tmp.borrowDataPtr();
620     }
621 jfenwick 2195 // left->operandCheck(*right);
622 jfenwick 1879
623 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {
625     m_left=dynamic_pointer_cast<DataLazy>(left);
626     }
627     else
628     {
629     m_left=DataLazy_ptr(new DataLazy(left));
630     }
631     if (right->isLazy())
632     {
633     m_right=dynamic_pointer_cast<DataLazy>(right);
634     }
635     else
636     {
637     m_right=DataLazy_ptr(new DataLazy(right));
638     }
639     char lt=m_left->m_readytype;
640     char rt=m_right->m_readytype;
641     if (lt=='E' || rt=='E')
642     {
643     m_readytype='E';
644     }
645     else if (lt=='T' || rt=='T')
646     {
647     m_readytype='T';
648     }
649     else
650     {
651     m_readytype='C';
652     }
653     m_samplesize=getNumDPPSample()*getNoValues();
654     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
657     m_height=max(m_left->m_height,m_right->m_height)+1;
658 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
659     LazyNodeSetup();
660     #endif
661 jfenwick 2177 SIZELIMIT
662 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663 jfenwick 2066 }
664    
665    
666 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
667 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
668 jfenwick 2084 m_op(op),
669     m_axis_offset(axis_offset),
670 jfenwick 2147 m_transpose(0),
671     m_tol(0)
672 jfenwick 2084 {
673     if ((getOpgroup(op)!=G_NP1OUT_P))
674     {
675     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
676     }
677     DataLazy_ptr lleft;
678     if (!left->isLazy())
679     {
680     lleft=DataLazy_ptr(new DataLazy(left));
681     }
682     else
683     {
684     lleft=dynamic_pointer_cast<DataLazy>(left);
685     }
686     m_readytype=lleft->m_readytype;
687     m_left=lleft;
688     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
689     m_samplesize=getNumDPPSample()*getNoValues();
690     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691 jfenwick 2177 m_children=m_left->m_children+1;
692     m_height=m_left->m_height+1;
693 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
694     LazyNodeSetup();
695     #endif
696 jfenwick 2177 SIZELIMIT
697 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698 jfenwick 2084 }
699    
700 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
701     : parent(left->getFunctionSpace(), left->getShape()),
702     m_op(op),
703     m_axis_offset(0),
704     m_transpose(0),
705     m_tol(tol)
706     {
707     if ((getOpgroup(op)!=G_UNARY_P))
708     {
709     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
710     }
711     DataLazy_ptr lleft;
712     if (!left->isLazy())
713     {
714     lleft=DataLazy_ptr(new DataLazy(left));
715     }
716     else
717     {
718     lleft=dynamic_pointer_cast<DataLazy>(left);
719     }
720     m_readytype=lleft->m_readytype;
721     m_left=lleft;
722     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
723     m_samplesize=getNumDPPSample()*getNoValues();
724     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725 jfenwick 2177 m_children=m_left->m_children+1;
726     m_height=m_left->m_height+1;
727 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
728     LazyNodeSetup();
729     #endif
730 jfenwick 2177 SIZELIMIT
731 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732     }
733 jfenwick 2084
734 jfenwick 2496
735     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737     m_op(op),
738     m_axis_offset(axis0),
739     m_transpose(axis1),
740     m_tol(0)
741     {
742     if ((getOpgroup(op)!=G_NP1OUT_2P))
743     {
744     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745     }
746     DataLazy_ptr lleft;
747     if (!left->isLazy())
748     {
749     lleft=DataLazy_ptr(new DataLazy(left));
750     }
751     else
752     {
753     lleft=dynamic_pointer_cast<DataLazy>(left);
754     }
755     m_readytype=lleft->m_readytype;
756     m_left=lleft;
757     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758     m_samplesize=getNumDPPSample()*getNoValues();
759     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760     m_children=m_left->m_children+1;
761     m_height=m_left->m_height+1;
762 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
763     LazyNodeSetup();
764     #endif
765 jfenwick 2496 SIZELIMIT
766     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767     }
768    
769 jfenwick 1865 DataLazy::~DataLazy()
770     {
771 jfenwick 2500 #ifdef LAZY_NODE_SETUP
772     delete[] m_sampleids;
773     delete[] m_samples;
774     #endif
775 jfenwick 1865 }
776    
777 jfenwick 1879
778     int
779     DataLazy::getBuffsRequired() const
780 jfenwick 1865 {
781 jfenwick 1879 return m_buffsRequired;
782     }
783    
784 jfenwick 1884
785 jfenwick 2066 size_t
786     DataLazy::getMaxSampleSize() const
787     {
788     return m_maxsamplesize;
789     }
790    
791 jfenwick 2271
792    
793     size_t
794     DataLazy::getSampleBufferSize() const
795     {
796     return m_maxsamplesize*(max(1,m_buffsRequired));
797     }
798    
799 jfenwick 1899 /*
800     \brief Evaluates the expression using methods on Data.
801     This does the work for the collapse method.
802     For reasons of efficiency do not call this method on DataExpanded nodes.
803     */
804 jfenwick 1889 DataReady_ptr
805     DataLazy::collapseToReady()
806     {
807     if (m_readytype=='E')
808     { // this is more an efficiency concern than anything else
809     throw DataException("Programmer Error - do not use collapse on Expanded data.");
810     }
811     if (m_op==IDENTITY)
812     {
813     return m_id;
814     }
815     DataReady_ptr pleft=m_left->collapseToReady();
816     Data left(pleft);
817     Data right;
818 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
819 jfenwick 1889 {
820     right=Data(m_right->collapseToReady());
821     }
822     Data result;
823     switch(m_op)
824     {
825     case ADD:
826     result=left+right;
827     break;
828     case SUB:
829     result=left-right;
830     break;
831     case MUL:
832     result=left*right;
833     break;
834     case DIV:
835     result=left/right;
836     break;
837     case SIN:
838     result=left.sin();
839     break;
840     case COS:
841     result=left.cos();
842     break;
843     case TAN:
844     result=left.tan();
845     break;
846     case ASIN:
847     result=left.asin();
848     break;
849     case ACOS:
850     result=left.acos();
851     break;
852     case ATAN:
853     result=left.atan();
854     break;
855     case SINH:
856     result=left.sinh();
857     break;
858     case COSH:
859     result=left.cosh();
860     break;
861     case TANH:
862     result=left.tanh();
863     break;
864     case ERF:
865     result=left.erf();
866     break;
867     case ASINH:
868     result=left.asinh();
869     break;
870     case ACOSH:
871     result=left.acosh();
872     break;
873     case ATANH:
874     result=left.atanh();
875     break;
876     case LOG10:
877     result=left.log10();
878     break;
879     case LOG:
880     result=left.log();
881     break;
882     case SIGN:
883     result=left.sign();
884     break;
885     case ABS:
886     result=left.abs();
887     break;
888     case NEG:
889     result=left.neg();
890     break;
891     case POS:
892     // it doesn't mean anything for delayed.
893     // it will just trigger a deep copy of the lazy object
894     throw DataException("Programmer error - POS not supported for lazy data.");
895     break;
896     case EXP:
897     result=left.exp();
898     break;
899     case SQRT:
900     result=left.sqrt();
901     break;
902     case RECIP:
903     result=left.oneOver();
904     break;
905     case GZ:
906     result=left.wherePositive();
907     break;
908     case LZ:
909     result=left.whereNegative();
910     break;
911     case GEZ:
912     result=left.whereNonNegative();
913     break;
914     case LEZ:
915     result=left.whereNonPositive();
916     break;
917 jfenwick 2147 case NEZ:
918     result=left.whereNonZero(m_tol);
919     break;
920     case EZ:
921     result=left.whereZero(m_tol);
922     break;
923 jfenwick 2037 case SYM:
924     result=left.symmetric();
925     break;
926     case NSYM:
927     result=left.nonsymmetric();
928     break;
929 jfenwick 2066 case PROD:
930     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
931     break;
932 jfenwick 2084 case TRANS:
933     result=left.transpose(m_axis_offset);
934     break;
935     case TRACE:
936     result=left.trace(m_axis_offset);
937     break;
938 jfenwick 2496 case SWAP:
939     result=left.swapaxes(m_axis_offset, m_transpose);
940     break;
941 jfenwick 2721 case MINVAL:
942     result=left.minval();
943     break;
944     case MAXVAL:
945     result=left.minval();
946     break;
947 jfenwick 1889 default:
948 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949 jfenwick 1889 }
950     return result.borrowReadyPtr();
951     }
952    
953 jfenwick 1899 /*
954     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
955     This method uses the original methods on the Data class to evaluate the expressions.
956     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
957     the purpose of using DataLazy in the first place).
958     */
959 jfenwick 1889 void
960     DataLazy::collapse()
961     {
962     if (m_op==IDENTITY)
963     {
964     return;
965     }
966     if (m_readytype=='E')
967     { // this is more an efficiency concern than anything else
968     throw DataException("Programmer Error - do not use collapse on Expanded data.");
969     }
970     m_id=collapseToReady();
971     m_op=IDENTITY;
972     }
973    
974 jfenwick 1899 /*
975 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
976 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
977     \param v A vector to store intermediate results.
978     \param offset Index in v to begin storing results.
979     \param sampleNo Sample number to evaluate.
980     \param roffset (output parameter) the offset in the return vector where the result begins.
981    
982     The return value will be an existing vector so do not deallocate it.
983     If the result is stored in v it should be stored at the offset given.
984     Everything from offset to the end of v should be considered available for this method to use.
985     */
986 jfenwick 1898 DataTypes::ValueType*
987     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
988 jfenwick 1889 {
989     // we assume that any collapsing has been done before we get here
990     // since we only have one argument we don't need to think about only
991     // processing single points.
992     if (m_readytype!='E')
993     {
994     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995     }
996 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997 jfenwick 1898 const double* left=&((*vleft)[roffset]);
998 jfenwick 1889 double* result=&(v[offset]);
999 jfenwick 1898 roffset=offset;
1000 jfenwick 1889 switch (m_op)
1001     {
1002     case SIN:
1003 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1004 jfenwick 1889 break;
1005     case COS:
1006 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1007 jfenwick 1889 break;
1008     case TAN:
1009 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1010 jfenwick 1889 break;
1011     case ASIN:
1012 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1013 jfenwick 1889 break;
1014     case ACOS:
1015 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1016 jfenwick 1889 break;
1017     case ATAN:
1018 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1019 jfenwick 1889 break;
1020     case SINH:
1021 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1022 jfenwick 1889 break;
1023     case COSH:
1024 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1025 jfenwick 1889 break;
1026     case TANH:
1027 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1028 jfenwick 1889 break;
1029     case ERF:
1030 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1031 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1032     #else
1033     tensor_unary_operation(m_samplesize, left, result, ::erf);
1034     break;
1035     #endif
1036     case ASINH:
1037 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1038 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1039     #else
1040     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1041     #endif
1042     break;
1043     case ACOSH:
1044 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1046     #else
1047     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1048     #endif
1049     break;
1050     case ATANH:
1051 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1053     #else
1054     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1055     #endif
1056     break;
1057     case LOG10:
1058 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1059 jfenwick 1889 break;
1060     case LOG:
1061 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1062 jfenwick 1889 break;
1063     case SIGN:
1064     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1065     break;
1066     case ABS:
1067 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1068 jfenwick 1889 break;
1069     case NEG:
1070     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1071     break;
1072     case POS:
1073     // it doesn't mean anything for delayed.
1074     // it will just trigger a deep copy of the lazy object
1075     throw DataException("Programmer error - POS not supported for lazy data.");
1076     break;
1077     case EXP:
1078 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1079 jfenwick 1889 break;
1080     case SQRT:
1081 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1082 jfenwick 1889 break;
1083     case RECIP:
1084     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1085     break;
1086     case GZ:
1087     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1088     break;
1089     case LZ:
1090     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1091     break;
1092     case GEZ:
1093     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1094     break;
1095     case LEZ:
1096     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1097     break;
1098 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1099     case NEZ:
1100     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1101     break;
1102     case EZ:
1103     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1104     break;
1105 jfenwick 1889
1106     default:
1107     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1108     }
1109 jfenwick 1898 return &v;
1110 jfenwick 1889 }
1111    
1112    
1113 jfenwick 2721 /*
1114     \brief Compute the value of the expression (reduction operation) for the given sample.
1115     \return Vector which stores the value of the subexpression for the given sample.
1116     \param v A vector to store intermediate results.
1117     \param offset Index in v to begin storing results.
1118     \param sampleNo Sample number to evaluate.
1119     \param roffset (output parameter) the offset in the return vector where the result begins.
1120 jfenwick 2147
1121 jfenwick 2721 The return value will be an existing vector so do not deallocate it.
1122     If the result is stored in v it should be stored at the offset given.
1123     Everything from offset to the end of v should be considered available for this method to use.
1124     */
1125     DataTypes::ValueType*
1126     DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127     {
1128     // we assume that any collapsing has been done before we get here
1129     // since we only have one argument we don't need to think about only
1130     // processing single points.
1131     if (m_readytype!='E')
1132     {
1133     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134     }
1135     const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136     double* result=&(v[offset]);
1137     roffset=offset;
1138     switch (m_op)
1139     {
1140     case MINVAL:
1141     {
1142     FMin op;
1143     *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1144     }
1145     break;
1146     case MAXVAL:
1147     {
1148     FMax op;
1149     *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1150     }
1151     break;
1152     default:
1153     throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1154     }
1155     return &v;
1156     }
1157 jfenwick 2147
1158    
1159    
1160 jfenwick 2037 /*
1161     \brief Compute the value of the expression (unary operation) for the given sample.
1162     \return Vector which stores the value of the subexpression for the given sample.
1163     \param v A vector to store intermediate results.
1164     \param offset Index in v to begin storing results.
1165     \param sampleNo Sample number to evaluate.
1166     \param roffset (output parameter) the offset in the return vector where the result begins.
1167 jfenwick 1898
1168 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
1169     If the result is stored in v it should be stored at the offset given.
1170     Everything from offset to the end of v should be considered available for this method to use.
1171     */
1172     DataTypes::ValueType*
1173     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1174     {
1175     // we assume that any collapsing has been done before we get here
1176     // since we only have one argument we don't need to think about only
1177     // processing single points.
1178     if (m_readytype!='E')
1179     {
1180     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1181     }
1182     // since we can't write the result over the input, we need a result offset further along
1183     size_t subroffset=roffset+m_samplesize;
1184 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1185 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1186 jfenwick 2037 roffset=offset;
1187 jfenwick 2157 size_t loop=0;
1188     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1189     size_t step=getNoValues();
1190 jfenwick 2037 switch (m_op)
1191     {
1192     case SYM:
1193 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1194     {
1195     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1196     subroffset+=step;
1197     offset+=step;
1198     }
1199 jfenwick 2037 break;
1200     case NSYM:
1201 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1202     {
1203     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1204     subroffset+=step;
1205     offset+=step;
1206     }
1207 jfenwick 2037 break;
1208     default:
1209     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1210     }
1211     return &v;
1212     }
1213 jfenwick 1898
1214 jfenwick 2084 /*
1215     \brief Compute the value of the expression (unary operation) for the given sample.
1216     \return Vector which stores the value of the subexpression for the given sample.
1217     \param v A vector to store intermediate results.
1218     \param offset Index in v to begin storing results.
1219     \param sampleNo Sample number to evaluate.
1220     \param roffset (output parameter) the offset in the return vector where the result begins.
1221 jfenwick 1899
1222 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
1223     If the result is stored in v it should be stored at the offset given.
1224     Everything from offset to the end of v should be considered available for this method to use.
1225     */
1226     DataTypes::ValueType*
1227     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1228     {
1229     // we assume that any collapsing has been done before we get here
1230     // since we only have one argument we don't need to think about only
1231     // processing single points.
1232     if (m_readytype!='E')
1233     {
1234     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1235     }
1236     // since we can't write the result over the input, we need a result offset further along
1237 jfenwick 2157 size_t subroffset;
1238 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1239 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1240     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1241 jfenwick 2084 roffset=offset;
1242 jfenwick 2157 size_t loop=0;
1243     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1244 jfenwick 2166 size_t outstep=getNoValues();
1245     size_t instep=m_left->getNoValues();
1246     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1247 jfenwick 2084 switch (m_op)
1248     {
1249     case TRACE:
1250 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1251     {
1252 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
1253     if (zz==5800)
1254     {
1255     LAZYDEBUG(cerr << "point=" << zz<< endl;)
1256     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1257     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1258     LAZYDEBUG(cerr << subroffset << endl;)
1259     LAZYDEBUG(cerr << "output=" << offset << endl;)
1260     }
1261     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1262     if (zz==5800)
1263     {
1264     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1265     }
1266     subroffset+=instep;
1267     offset+=outstep;
1268 jfenwick 2157 }
1269 jfenwick 2084 break;
1270     case TRANS:
1271 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1272     {
1273     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1274 jfenwick 2166 subroffset+=instep;
1275     offset+=outstep;
1276 jfenwick 2157 }
1277 jfenwick 2084 break;
1278     default:
1279     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1280     }
1281     return &v;
1282     }
1283 jfenwick 2037
1284    
1285 jfenwick 2496 /*
1286     \brief Compute the value of the expression (unary operation with int params) for the given sample.
1287     \return Vector which stores the value of the subexpression for the given sample.
1288     \param v A vector to store intermediate results.
1289     \param offset Index in v to begin storing results.
1290     \param sampleNo Sample number to evaluate.
1291     \param roffset (output parameter) the offset in the return vector where the result begins.
1292    
1293     The return value will be an existing vector so do not deallocate it.
1294     If the result is stored in v it should be stored at the offset given.
1295     Everything from offset to the end of v should be considered available for this method to use.
1296     */
1297     DataTypes::ValueType*
1298     DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1299     {
1300     // we assume that any collapsing has been done before we get here
1301     // since we only have one argument we don't need to think about only
1302     // processing single points.
1303     if (m_readytype!='E')
1304     {
1305     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1306     }
1307     // since we can't write the result over the input, we need a result offset further along
1308     size_t subroffset;
1309 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1310 jfenwick 2496 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1311     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1312     roffset=offset;
1313     size_t loop=0;
1314     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1315     size_t outstep=getNoValues();
1316     size_t instep=m_left->getNoValues();
1317     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1318     switch (m_op)
1319     {
1320     case SWAP:
1321     for (loop=0;loop<numsteps;++loop)
1322     {
1323     DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1324     subroffset+=instep;
1325     offset+=outstep;
1326     }
1327     break;
1328     default:
1329     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1330     }
1331     return &v;
1332     }
1333    
1334    
1335    
1336 phornby 1987 #define PROC_OP(TYPE,X) \
1337 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1338     {\
1339     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1340     { \
1341     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1342 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1343 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1344 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1345 jfenwick 2152 lroffset+=leftstep; \
1346     rroffset+=rightstep; \
1347     }\
1348     lroffset+=oleftstep;\
1349     rroffset+=orightstep;\
1350 jfenwick 1889 }
1351    
1352 jfenwick 1899 /*
1353     \brief Compute the value of the expression (binary operation) for the given sample.
1354     \return Vector which stores the value of the subexpression for the given sample.
1355     \param v A vector to store intermediate results.
1356     \param offset Index in v to begin storing results.
1357     \param sampleNo Sample number to evaluate.
1358     \param roffset (output parameter) the offset in the return vector where the result begins.
1359    
1360     The return value will be an existing vector so do not deallocate it.
1361     If the result is stored in v it should be stored at the offset given.
1362     Everything from offset to the end of v should be considered available for this method to use.
1363     */
1364     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1365     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1366     // If both children are expanded, then we can process them in a single operation (we treat
1367     // the whole sample as one big datapoint.
1368     // If one of the children is not expanded, then we need to treat each point in the sample
1369     // individually.
1370 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1371     // For example, 2+Vector.
1372     // In this case each double within the point is treated individually
1373 jfenwick 1898 DataTypes::ValueType*
1374 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1375 jfenwick 1889 {
1376 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1377 jfenwick 1889
1378 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1379     // first work out which of the children are expanded
1380 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1381     bool rightExp=(m_right->m_readytype=='E');
1382 jfenwick 2084 if (!leftExp && !rightExp)
1383     {
1384     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1385     }
1386     bool leftScalar=(m_left->getRank()==0);
1387     bool rightScalar=(m_right->getRank()==0);
1388 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1389 jfenwick 1908 {
1390 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1391     }
1392     size_t leftsize=m_left->getNoValues();
1393     size_t rightsize=m_right->getNoValues();
1394     size_t chunksize=1; // how many doubles will be processed in one go
1395     int leftstep=0; // how far should the left offset advance after each step
1396     int rightstep=0;
1397     int numsteps=0; // total number of steps for the inner loop
1398     int oleftstep=0; // the o variables refer to the outer loop
1399     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1400     int onumsteps=1;
1401    
1402     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1403     bool RES=(rightExp && rightScalar);
1404     bool LS=(!leftExp && leftScalar); // left is a single scalar
1405     bool RS=(!rightExp && rightScalar);
1406     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1407     bool RN=(!rightExp && !rightScalar);
1408     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1409     bool REN=(rightExp && !rightScalar);
1410    
1411     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1412     {
1413     chunksize=m_left->getNumDPPSample()*leftsize;
1414     leftstep=0;
1415     rightstep=0;
1416     numsteps=1;
1417     }
1418     else if (LES || RES)
1419     {
1420     chunksize=1;
1421     if (LES) // left is an expanded scalar
1422 jfenwick 2084 {
1423 jfenwick 2152 if (RS)
1424     {
1425     leftstep=1;
1426     rightstep=0;
1427     numsteps=m_left->getNumDPPSample();
1428     }
1429     else // RN or REN
1430     {
1431     leftstep=0;
1432     oleftstep=1;
1433     rightstep=1;
1434 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1435 jfenwick 2152 numsteps=rightsize;
1436     onumsteps=m_left->getNumDPPSample();
1437     }
1438 jfenwick 2084 }
1439 jfenwick 2152 else // right is an expanded scalar
1440     {
1441     if (LS)
1442     {
1443     rightstep=1;
1444     leftstep=0;
1445     numsteps=m_right->getNumDPPSample();
1446     }
1447     else
1448     {
1449     rightstep=0;
1450     orightstep=1;
1451     leftstep=1;
1452 jfenwick 2177 oleftstep=(LN ? -(int)leftsize : 0);
1453 jfenwick 2152 numsteps=leftsize;
1454     onumsteps=m_right->getNumDPPSample();
1455     }
1456     }
1457     }
1458     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1459     {
1460     if (LEN) // and Right will be a single value
1461     {
1462     chunksize=rightsize;
1463     leftstep=rightsize;
1464     rightstep=0;
1465     numsteps=m_left->getNumDPPSample();
1466     if (RS)
1467     {
1468     numsteps*=leftsize;
1469     }
1470     }
1471     else // REN
1472     {
1473     chunksize=leftsize;
1474     rightstep=leftsize;
1475     leftstep=0;
1476     numsteps=m_right->getNumDPPSample();
1477     if (LS)
1478     {
1479     numsteps*=rightsize;
1480     }
1481     }
1482     }
1483    
1484     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1485 jfenwick 1899 // Get the values of sub-expressions
1486 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1487 jfenwick 2157 // calcBufss for why we can't put offset as the 2nd param above
1488 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1489 jfenwick 1899 // the right child starts further along.
1490 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1491     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1492     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1493     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1494     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1495     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1496     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1497 jfenwick 2195
1498 jfenwick 2199
1499 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1500 jfenwick 1889 switch(m_op)
1501     {
1502     case ADD:
1503 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1504 jfenwick 1889 break;
1505 jfenwick 1899 case SUB:
1506 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1507 jfenwick 1899 break;
1508     case MUL:
1509 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1510 jfenwick 1899 break;
1511     case DIV:
1512 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1513 jfenwick 1899 break;
1514 jfenwick 1910 case POW:
1515 phornby 1994 PROC_OP(double (double,double),::pow);
1516 jfenwick 1910 break;
1517 jfenwick 1889 default:
1518 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1519 jfenwick 1889 }
1520 jfenwick 2195 roffset=offset;
1521 jfenwick 1898 return &v;
1522 jfenwick 1889 }
1523    
1524 jfenwick 1898
1525 jfenwick 2152
1526 jfenwick 2066 /*
1527     \brief Compute the value of the expression (tensor product) for the given sample.
1528     \return Vector which stores the value of the subexpression for the given sample.
1529     \param v A vector to store intermediate results.
1530     \param offset Index in v to begin storing results.
1531     \param sampleNo Sample number to evaluate.
1532     \param roffset (output parameter) the offset in the return vector where the result begins.
1533 jfenwick 1898
1534 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1535     If the result is stored in v it should be stored at the offset given.
1536     Everything from offset to the end of v should be considered available for this method to use.
1537     */
1538     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1539     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1540     // unlike the other resolve helpers, we must treat these datapoints separately.
1541     DataTypes::ValueType*
1542     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1543     {
1544 jfenwick 2195 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1545 jfenwick 2066
1546     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1547     // first work out which of the children are expanded
1548     bool leftExp=(m_left->m_readytype=='E');
1549     bool rightExp=(m_right->m_readytype=='E');
1550     int steps=getNumDPPSample();
1551 jfenwick 2195 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1552     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1553     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1554     int rightStep=(rightExp?m_right->getNoValues() : 0);
1555    
1556     int resultStep=getNoValues();
1557 jfenwick 2066 // Get the values of sub-expressions (leave a gap of one sample for the result).
1558 jfenwick 2199 int gap=offset+m_samplesize;
1559 jfenwick 2195
1560     LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1561    
1562 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1563 jfenwick 2199 gap+=m_left->getMaxSampleSize();
1564 jfenwick 2195
1565    
1566     LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1567    
1568    
1569 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1570 jfenwick 2195
1571     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1572     cout << getNoValues() << endl;)
1573     LAZYDEBUG(cerr << "Result of left=";)
1574     LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1575 jfenwick 2199
1576     for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1577 jfenwick 2195 {
1578 jfenwick 2199 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1579     if (i%4==0) cout << endl;
1580 jfenwick 2195 })
1581     LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1582 jfenwick 2199 LAZYDEBUG(
1583     for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1584 jfenwick 2195 {
1585 jfenwick 2199 cerr << "[" << setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1586     if (i%4==0) cout << endl;
1587 jfenwick 2195 }
1588     cerr << endl;
1589     )
1590     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1591 jfenwick 2153 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1592     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1593     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1594     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1595     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1596 jfenwick 2195 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1597    
1598 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1599     switch(m_op)
1600     {
1601     case PROD:
1602     for (int i=0;i<steps;++i,resultp+=resultStep)
1603     {
1604 jfenwick 2199
1605 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1606     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1607     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1608 jfenwick 2199
1609 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1610     const double *ptr_1 = &((*right)[rroffset]);
1611 jfenwick 2199
1612 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1613     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1614 jfenwick 2199
1615 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1616 jfenwick 2199
1617 jfenwick 2195 LAZYDEBUG(cout << "Results--\n";
1618 jfenwick 2199 {
1619     DataVector dv(getNoValues());
1620 jfenwick 2195 for (int z=0;z<getNoValues();++z)
1621     {
1622 jfenwick 2199 cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1623     if (z%4==0) cout << endl;
1624     dv[z]=resultp[z];
1625 jfenwick 2195 }
1626 jfenwick 2199 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1627 jfenwick 2195 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1628 jfenwick 2199 }
1629 jfenwick 2195 )
1630 jfenwick 2066 lroffset+=leftStep;
1631     rroffset+=rightStep;
1632     }
1633     break;
1634     default:
1635     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636     }
1637     roffset=offset;
1638     return &v;
1639     }
1640    
1641    
1642 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
1643 jfenwick 2066
1644 jfenwick 2500 // The result will be stored in m_samples
1645     // The return value is a pointer to the DataVector, offset is the offset within the return value
1646     const DataTypes::ValueType*
1647     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1648     {
1649     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1650     // collapse so we have a 'E' node or an IDENTITY for some other type
1651     if (m_readytype!='E' && m_op!=IDENTITY)
1652     {
1653     collapse();
1654     }
1655     if (m_op==IDENTITY)
1656     {
1657     const ValueType& vec=m_id->getVectorRO();
1658     // if (m_readytype=='C')
1659     // {
1660     // roffset=0; // all samples read from the same position
1661     // return &(m_samples);
1662     // }
1663     roffset=m_id->getPointOffset(sampleNo, 0);
1664     return &(vec);
1665     }
1666     if (m_readytype!='E')
1667     {
1668     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1669     }
1670 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1671     {
1672     roffset=tid*m_samplesize;
1673     return &(m_samples); // sample is already resolved
1674     }
1675     m_sampleids[tid]=sampleNo;
1676 jfenwick 2500 switch (getOpgroup(m_op))
1677     {
1678     case G_UNARY:
1679     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1680     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1681     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1682     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1683     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1684     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1685 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1686 jfenwick 2500 default:
1687     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1688     }
1689     }
1690    
1691     const DataTypes::ValueType*
1692     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1693     {
1694     // we assume that any collapsing has been done before we get here
1695     // since we only have one argument we don't need to think about only
1696     // processing single points.
1697     // we will also know we won't get identity nodes
1698     if (m_readytype!='E')
1699     {
1700     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1701     }
1702     if (m_op==IDENTITY)
1703     {
1704     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1705     }
1706     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1707     const double* left=&((*leftres)[roffset]);
1708     roffset=m_samplesize*tid;
1709     double* result=&(m_samples[roffset]);
1710     switch (m_op)
1711     {
1712     case SIN:
1713     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1714     break;
1715     case COS:
1716     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1717     break;
1718     case TAN:
1719     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1720     break;
1721     case ASIN:
1722     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1723     break;
1724     case ACOS:
1725     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1726     break;
1727     case ATAN:
1728     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1729     break;
1730     case SINH:
1731     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1732     break;
1733     case COSH:
1734     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1735     break;
1736     case TANH:
1737     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1738     break;
1739     case ERF:
1740     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1741     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1742     #else
1743     tensor_unary_operation(m_samplesize, left, result, ::erf);
1744     break;
1745     #endif
1746     case ASINH:
1747     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1748     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1749     #else
1750     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1751     #endif
1752     break;
1753     case ACOSH:
1754     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1755     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1756     #else
1757     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1758     #endif
1759     break;
1760     case ATANH:
1761     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1762     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1763     #else
1764     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1765     #endif
1766     break;
1767     case LOG10:
1768     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1769     break;
1770     case LOG:
1771     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1772     break;
1773     case SIGN:
1774     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1775     break;
1776     case ABS:
1777     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1778     break;
1779     case NEG:
1780     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1781     break;
1782     case POS:
1783     // it doesn't mean anything for delayed.
1784     // it will just trigger a deep copy of the lazy object
1785     throw DataException("Programmer error - POS not supported for lazy data.");
1786     break;
1787     case EXP:
1788     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1789     break;
1790     case SQRT:
1791     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1792     break;
1793     case RECIP:
1794     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1795     break;
1796     case GZ:
1797     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1798     break;
1799     case LZ:
1800     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1801     break;
1802     case GEZ:
1803     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1804     break;
1805     case LEZ:
1806     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1807     break;
1808     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1809     case NEZ:
1810     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1811     break;
1812     case EZ:
1813     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1814     break;
1815    
1816     default:
1817     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1818     }
1819     return &(m_samples);
1820     }
1821    
1822    
1823     const DataTypes::ValueType*
1824 jfenwick 2721 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1825     {
1826     // we assume that any collapsing has been done before we get here
1827     // since we only have one argument we don't need to think about only
1828     // processing single points.
1829     // we will also know we won't get identity nodes
1830     if (m_readytype!='E')
1831     {
1832     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1833     }
1834     if (m_op==IDENTITY)
1835     {
1836     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1837     }
1838     size_t loffset=0;
1839     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1840    
1841     roffset=m_samplesize*tid;
1842     double* result=&(m_samples[roffset]);
1843     switch (m_op)
1844     {
1845     case MINVAL:
1846     {
1847     FMin op;
1848     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1849     }
1850     break;
1851     case MAXVAL:
1852     {
1853     FMax op;
1854     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1855     }
1856     break;
1857     default:
1858     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1859     }
1860     return &(m_samples);
1861     }
1862    
1863     const DataTypes::ValueType*
1864 jfenwick 2500 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1865     {
1866     // we assume that any collapsing has been done before we get here
1867     // since we only have one argument we don't need to think about only
1868     // processing single points.
1869     if (m_readytype!='E')
1870     {
1871     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1872     }
1873     if (m_op==IDENTITY)
1874     {
1875     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1876     }
1877     size_t subroffset;
1878     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1879     roffset=m_samplesize*tid;
1880     size_t loop=0;
1881     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1882     size_t step=getNoValues();
1883     size_t offset=roffset;
1884     switch (m_op)
1885     {
1886     case SYM:
1887     for (loop=0;loop<numsteps;++loop)
1888     {
1889     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1890     subroffset+=step;
1891     offset+=step;
1892     }
1893     break;
1894     case NSYM:
1895     for (loop=0;loop<numsteps;++loop)
1896     {
1897     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1898     subroffset+=step;
1899     offset+=step;
1900     }
1901     break;
1902     default:
1903     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1904     }
1905     return &m_samples;
1906     }
1907    
1908     const DataTypes::ValueType*
1909     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1910     {
1911     // we assume that any collapsing has been done before we get here
1912     // since we only have one argument we don't need to think about only
1913     // processing single points.
1914     if (m_readytype!='E')
1915     {
1916     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1917     }
1918     if (m_op==IDENTITY)
1919     {
1920     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1921     }
1922     size_t subroffset;
1923     size_t offset;
1924     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1925     roffset=m_samplesize*tid;
1926     offset=roffset;
1927     size_t loop=0;
1928     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1929     size_t outstep=getNoValues();
1930     size_t instep=m_left->getNoValues();
1931     switch (m_op)
1932     {
1933     case TRACE:
1934     for (loop=0;loop<numsteps;++loop)
1935     {
1936     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1937     subroffset+=instep;
1938     offset+=outstep;
1939     }
1940     break;
1941     case TRANS:
1942     for (loop=0;loop<numsteps;++loop)
1943     {
1944     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1945     subroffset+=instep;
1946     offset+=outstep;
1947     }
1948     break;
1949     default:
1950     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1951     }
1952     return &m_samples;
1953     }
1954    
1955    
1956     const DataTypes::ValueType*
1957     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1958     {
1959     if (m_readytype!='E')
1960     {
1961     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1962     }
1963     if (m_op==IDENTITY)
1964     {
1965     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1966     }
1967     size_t subroffset;
1968     size_t offset;
1969     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1970     roffset=m_samplesize*tid;
1971     offset=roffset;
1972     size_t loop=0;
1973     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1974     size_t outstep=getNoValues();
1975     size_t instep=m_left->getNoValues();
1976     switch (m_op)
1977     {
1978     case SWAP:
1979     for (loop=0;loop<numsteps;++loop)
1980     {
1981     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1982     subroffset+=instep;
1983     offset+=outstep;
1984     }
1985     break;
1986     default:
1987     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1988     }
1989     return &m_samples;
1990     }
1991    
1992    
1993    
1994     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1995     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1996     // If both children are expanded, then we can process them in a single operation (we treat
1997     // the whole sample as one big datapoint.
1998     // If one of the children is not expanded, then we need to treat each point in the sample
1999     // individually.
2000     // There is an additional complication when scalar operations are considered.
2001     // For example, 2+Vector.
2002     // In this case each double within the point is treated individually
2003     const DataTypes::ValueType*
2004     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2005     {
2006     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2007    
2008     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2009     // first work out which of the children are expanded
2010     bool leftExp=(m_left->m_readytype=='E');
2011     bool rightExp=(m_right->m_readytype=='E');
2012     if (!leftExp && !rightExp)
2013     {
2014     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2015     }
2016     bool leftScalar=(m_left->getRank()==0);
2017     bool rightScalar=(m_right->getRank()==0);
2018     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2019     {
2020     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2021     }
2022     size_t leftsize=m_left->getNoValues();
2023     size_t rightsize=m_right->getNoValues();
2024     size_t chunksize=1; // how many doubles will be processed in one go
2025     int leftstep=0; // how far should the left offset advance after each step
2026     int rightstep=0;
2027     int numsteps=0; // total number of steps for the inner loop
2028     int oleftstep=0; // the o variables refer to the outer loop
2029     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2030     int onumsteps=1;
2031    
2032     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2033     bool RES=(rightExp && rightScalar);
2034     bool LS=(!leftExp && leftScalar); // left is a single scalar
2035     bool RS=(!rightExp && rightScalar);
2036     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
2037     bool RN=(!rightExp && !rightScalar);
2038     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
2039     bool REN=(rightExp && !rightScalar);
2040    
2041     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2042     {
2043     chunksize=m_left->getNumDPPSample()*leftsize;
2044     leftstep=0;
2045     rightstep=0;
2046     numsteps=1;
2047     }
2048     else if (LES || RES)
2049     {
2050     chunksize=1;
2051     if (LES) // left is an expanded scalar
2052     {
2053     if (RS)
2054     {
2055     leftstep=1;
2056     rightstep=0;
2057     numsteps=m_left->getNumDPPSample();
2058     }
2059     else // RN or REN
2060     {
2061     leftstep=0;
2062     oleftstep=1;
2063     rightstep=1;
2064     orightstep=(RN ? -(int)rightsize : 0);
2065     numsteps=rightsize;
2066     onumsteps=m_left->getNumDPPSample();
2067     }
2068     }
2069     else // right is an expanded scalar
2070     {
2071     if (LS)
2072     {
2073     rightstep=1;
2074     leftstep=0;
2075     numsteps=m_right->getNumDPPSample();
2076     }
2077     else
2078     {
2079     rightstep=0;
2080     orightstep=1;
2081     leftstep=1;
2082     oleftstep=(LN ? -(int)leftsize : 0);
2083     numsteps=leftsize;
2084     onumsteps=m_right->getNumDPPSample();
2085     }
2086     }
2087     }
2088     else // this leaves (LEN, RS), (LEN, RN) and their transposes
2089     {
2090     if (LEN) // and Right will be a single value
2091     {
2092     chunksize=rightsize;
2093     leftstep=rightsize;
2094     rightstep=0;
2095     numsteps=m_left->getNumDPPSample();
2096     if (RS)
2097     {
2098     numsteps*=leftsize;
2099     }
2100     }
2101     else // REN
2102     {
2103     chunksize=leftsize;
2104     rightstep=leftsize;
2105     leftstep=0;
2106     numsteps=m_right->getNumDPPSample();
2107     if (LS)
2108     {
2109     numsteps*=rightsize;
2110     }
2111     }
2112     }
2113    
2114     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
2115     // Get the values of sub-expressions
2116     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
2117     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2118     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2119     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2120     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2121     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2122     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2123     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2124     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
2125    
2126     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2127     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2128    
2129    
2130     roffset=m_samplesize*tid;
2131     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
2132     switch(m_op)
2133     {
2134     case ADD:
2135     PROC_OP(NO_ARG,plus<double>());
2136     break;
2137     case SUB:
2138     PROC_OP(NO_ARG,minus<double>());
2139     break;
2140     case MUL:
2141     PROC_OP(NO_ARG,multiplies<double>());
2142     break;
2143     case DIV:
2144     PROC_OP(NO_ARG,divides<double>());
2145     break;
2146     case POW:
2147     PROC_OP(double (double,double),::pow);
2148     break;
2149     default:
2150     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2151     }
2152     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2153     return &m_samples;
2154     }
2155    
2156    
2157     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2158     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2159     // unlike the other resolve helpers, we must treat these datapoints separately.
2160     const DataTypes::ValueType*
2161     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2162     {
2163     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2164    
2165     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2166     // first work out which of the children are expanded
2167     bool leftExp=(m_left->m_readytype=='E');
2168     bool rightExp=(m_right->m_readytype=='E');
2169     int steps=getNumDPPSample();
2170     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2171     int rightStep=(rightExp?m_right->getNoValues() : 0);
2172    
2173     int resultStep=getNoValues();
2174     roffset=m_samplesize*tid;
2175     size_t offset=roffset;
2176    
2177     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2178    
2179     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2180    
2181     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2182     cout << getNoValues() << endl;)
2183    
2184    
2185     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2186     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2187     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2188     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2189     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2190     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2191     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2192    
2193     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
2194     switch(m_op)
2195     {
2196     case PROD:
2197     for (int i=0;i<steps;++i,resultp+=resultStep)
2198     {
2199     const double *ptr_0 = &((*left)[lroffset]);
2200     const double *ptr_1 = &((*right)[rroffset]);
2201    
2202     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2203     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2204    
2205     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2206    
2207     lroffset+=leftStep;
2208     rroffset+=rightStep;
2209     }
2210     break;
2211     default:
2212     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2213     }
2214     roffset=offset;
2215     return &m_samples;
2216     }
2217     #endif //LAZY_NODE_STORAGE
2218    
2219 jfenwick 1899 /*
2220     \brief Compute the value of the expression for the given sample.
2221     \return Vector which stores the value of the subexpression for the given sample.
2222     \param v A vector to store intermediate results.
2223     \param offset Index in v to begin storing results.
2224     \param sampleNo Sample number to evaluate.
2225     \param roffset (output parameter) the offset in the return vector where the result begins.
2226 jfenwick 1898
2227 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
2228     */
2229 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
2230     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2231 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
2232     // Regardless, the storage should be assumed to be used, even if it isn't.
2233 jfenwick 1898
2234     // the roffset is the offset within the returned vector where the data begins
2235     const DataTypes::ValueType*
2236 jfenwick 2521 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2237 jfenwick 1879 {
2238 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2239 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
2240     if (m_readytype!='E' && m_op!=IDENTITY)
2241     {
2242     collapse();
2243     }
2244 jfenwick 1885 if (m_op==IDENTITY)
2245 jfenwick 1865 {
2246 jfenwick 2271 const ValueType& vec=m_id->getVectorRO();
2247 jfenwick 1889 if (m_readytype=='C')
2248     {
2249 jfenwick 1898 roffset=0;
2250 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2251 jfenwick 1898 return &(vec);
2252 jfenwick 1889 }
2253 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
2254 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2255 jfenwick 1898 return &(vec);
2256 jfenwick 1865 }
2257 jfenwick 1889 if (m_readytype!='E')
2258     {
2259     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2260     }
2261     switch (getOpgroup(m_op))
2262     {
2263 jfenwick 2147 case G_UNARY:
2264     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2265 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2266 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2267 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2268 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2269 jfenwick 2496 case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2270 jfenwick 2721 case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2271 jfenwick 1889 default:
2272     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2273     }
2274 jfenwick 2152
2275 jfenwick 1889 }
2276    
2277 jfenwick 2271 const DataTypes::ValueType*
2278     DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2279     {
2280     #ifdef _OPENMP
2281     int tid=omp_get_thread_num();
2282     #else
2283     int tid=0;
2284     #endif
2285 jfenwick 2521 #ifdef LAZY_NODE_STORAGE
2286     return resolveNodeSample(tid, sampleNo, roffset);
2287     #else
2288     return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2289     #endif
2290 jfenwick 2271 }
2291    
2292    
2293 jfenwick 2497 // This needs to do the work of the identity constructor
2294 jfenwick 2177 void
2295     DataLazy::resolveToIdentity()
2296     {
2297     if (m_op==IDENTITY)
2298     return;
2299 jfenwick 2500 #ifndef LAZY_NODE_STORAGE
2300 jfenwick 2497 DataReady_ptr p=resolveVectorWorker();
2301 jfenwick 2500 #else
2302     DataReady_ptr p=resolveNodeWorker();
2303     #endif
2304 jfenwick 2177 makeIdentity(p);
2305     }
2306 jfenwick 1889
2307 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
2308     {
2309     m_op=IDENTITY;
2310     m_axis_offset=0;
2311     m_transpose=0;
2312     m_SL=m_SM=m_SR=0;
2313     m_children=m_height=0;
2314     m_id=p;
2315     if(p->isConstant()) {m_readytype='C';}
2316     else if(p->isExpanded()) {m_readytype='E';}
2317     else if (p->isTagged()) {m_readytype='T';}
2318     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2319     m_buffsRequired=1;
2320     m_samplesize=p->getNumDPPSample()*p->getNoValues();
2321     m_maxsamplesize=m_samplesize;
2322     m_left.reset();
2323     m_right.reset();
2324     }
2325    
2326 jfenwick 2497
2327     DataReady_ptr
2328     DataLazy::resolve()
2329     {
2330     resolveToIdentity();
2331     return m_id;
2332     }
2333    
2334 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
2335    
2336     // This version of resolve uses storage in each node to hold results
2337     DataReady_ptr
2338     DataLazy::resolveNodeWorker()
2339     {
2340     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2341     {
2342     collapse();
2343     }
2344     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2345     {
2346     return m_id;
2347     }
2348     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2349     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
2350     ValueType& resvec=result->getVectorRW();
2351     DataReady_ptr resptr=DataReady_ptr(result);
2352    
2353     int sample;
2354     int totalsamples=getNumSamples();
2355     const ValueType* res=0; // Storage for answer
2356     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2357     #pragma omp parallel for private(sample,res) schedule(static)
2358     for (sample=0;sample<totalsamples;++sample)
2359     {
2360     size_t roffset=0;
2361     #ifdef _OPENMP
2362     res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2363     #else
2364     res=resolveNodeSample(0,sample,roffset);
2365     #endif
2366     LAZYDEBUG(cout << "Sample #" << sample << endl;)
2367     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2368