/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2521 - (hide annotations)
Tue Jul 7 00:08:58 2009 UTC (9 years, 9 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 81937 byte(s)
Modified Lazy so that resolving a single sample uses the node cache method.
Fixed some doxygen problems.
1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2472 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49 jfenwick 2501
50 jfenwick 1899 /*
51     How does DataLazy work?
52     ~~~~~~~~~~~~~~~~~~~~~~~
53    
54     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55     denoted left and right.
56    
57     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58     This means that all "internal" nodes in the structure are instances of DataLazy.
59    
60     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61     Note that IDENITY is not considered a unary operation.
62    
63     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64     It must however form a DAG (directed acyclic graph).
65     I will refer to individual DataLazy objects with the structure as nodes.
66    
67     Each node also stores:
68     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69     evaluated.
70     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71     evaluate the expression.
72     - m_samplesize ~ the number of doubles stored in a sample.
73    
74     When a new node is created, the above values are computed based on the values in the child nodes.
75     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76    
77     The resolve method, which produces a DataReady from a DataLazy, does the following:
78     1) Create a DataReady to hold the new result.
79     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81    
82     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83    
84     resolveSample returns a Vector* and an offset within that vector where the result is stored.
85     Normally, this would be v, but for identity nodes their internal vector is returned instead.
86    
87     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88    
89     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91 jfenwick 2037
92 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 jfenwick 2037 1) Add to the ES_optype.
94     2) determine what opgroup your operation belongs to (X)
95     3) add a string for the op to the end of ES_opstrings
96     4) increase ES_opcount
97     5) add an entry (X) to opgroups
98     6) add an entry to the switch in collapseToReady
99     7) add an entry to resolveX
100 jfenwick 1899 */
101    
102    
103 jfenwick 1865 using namespace std;
104 jfenwick 1868 using namespace boost;
105 jfenwick 1865
106     namespace escript
107     {
108    
109     namespace
110     {
111    
112 jfenwick 1886 enum ES_opgroup
113     {
114     G_UNKNOWN,
115     G_IDENTITY,
116 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
117 jfenwick 2037 G_UNARY, // pointwise operations with one argument
118 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
119 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
120 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
121 jfenwick 2496 G_TENSORPROD, // general tensor product
122     G_NP1OUT_2P // non-pointwise op with one output requiring two params
123 jfenwick 1886 };
124    
125    
126    
127    
128 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
129     "sin","cos","tan",
130 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
131     "asinh","acosh","atanh",
132 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
133 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
134 jfenwick 2066 "symmetric","nonsymmetric",
135 jfenwick 2084 "prod",
136 jfenwick 2496 "transpose", "trace",
137     "swapaxes"};
138     int ES_opcount=41;
139 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
140     G_UNARY,G_UNARY,G_UNARY, //10
141     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
142     G_UNARY,G_UNARY,G_UNARY, // 20
143 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
144     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
145 jfenwick 2066 G_NP1OUT,G_NP1OUT,
146 jfenwick 2084 G_TENSORPROD,
147 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
148     G_NP1OUT_2P};
149 jfenwick 1886 inline
150     ES_opgroup
151     getOpgroup(ES_optype op)
152     {
153     return opgroups[op];
154     }
155    
156 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
157     FunctionSpace
158     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
159     {
160     // perhaps this should call interpolate and throw or something?
161     // maybe we need an interpolate node -
162     // that way, if interpolate is required in any other op we can just throw a
163     // programming error exception.
164 jfenwick 1879
165 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
166     FunctionSpace r=right->getFunctionSpace();
167     if (l!=r)
168     {
169     if (r.probeInterpolation(l))
170     {
171     return l;
172     }
173     if (l.probeInterpolation(r))
174     {
175     return r;
176     }
177     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
178     }
179     return l;
180 jfenwick 1865 }
181    
182     // return the shape of the result of "left op right"
183 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
184 jfenwick 1865 DataTypes::ShapeType
185     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
186     {
187 jfenwick 1879 if (left->getShape()!=right->getShape())
188     {
189 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
190 jfenwick 1908 {
191     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
192     }
193     if (left->getRank()==0) // we need to allow scalar * anything
194     {
195     return right->getShape();
196     }
197     if (right->getRank()==0)
198     {
199     return left->getShape();
200     }
201     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
202 jfenwick 1879 }
203     return left->getShape();
204 jfenwick 1865 }
205    
206 jfenwick 2084 // return the shape for "op left"
207    
208     DataTypes::ShapeType
209 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
210 jfenwick 2084 {
211     switch(op)
212     {
213     case TRANS:
214 jfenwick 2199 { // for the scoping of variables
215     const DataTypes::ShapeType& s=left->getShape();
216     DataTypes::ShapeType sh;
217     int rank=left->getRank();
218     if (axis_offset<0 || axis_offset>rank)
219     {
220     throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
221     }
222     for (int i=0; i<rank; i++)
223     {
224     int index = (axis_offset+i)%rank;
225     sh.push_back(s[index]); // Append to new shape
226     }
227     return sh;
228     }
229 jfenwick 2084 break;
230     case TRACE:
231 jfenwick 2199 {
232     int rank=left->getRank();
233     if (rank<2)
234     {
235     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
236     }
237     if ((axis_offset>rank-2) || (axis_offset<0))
238     {
239     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
240     }
241     if (rank==2)
242     {
243     return DataTypes::scalarShape;
244     }
245     else if (rank==3)
246     {
247     DataTypes::ShapeType sh;
248     if (axis_offset==0)
249     {
250     sh.push_back(left->getShape()[2]);
251     }
252     else // offset==1
253     {
254     sh.push_back(left->getShape()[0]);
255     }
256     return sh;
257     }
258     else if (rank==4)
259     {
260     DataTypes::ShapeType sh;
261     const DataTypes::ShapeType& s=left->getShape();
262     if (axis_offset==0)
263     {
264     sh.push_back(s[2]);
265     sh.push_back(s[3]);
266     }
267     else if (axis_offset==1)
268     {
269     sh.push_back(s[0]);
270     sh.push_back(s[3]);
271     }
272     else // offset==2
273     {
274     sh.push_back(s[0]);
275     sh.push_back(s[1]);
276     }
277     return sh;
278     }
279     else // unknown rank
280     {
281     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
282     }
283     }
284 jfenwick 2084 break;
285     default:
286     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
287     }
288     }
289    
290 jfenwick 2496 DataTypes::ShapeType
291     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
292     {
293     // This code taken from the Data.cpp swapaxes() method
294     // Some of the checks are probably redundant here
295     int axis0_tmp,axis1_tmp;
296     const DataTypes::ShapeType& s=left->getShape();
297     DataTypes::ShapeType out_shape;
298     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
299     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
300     int rank=left->getRank();
301     if (rank<2) {
302     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
303     }
304     if (axis0<0 || axis0>rank-1) {
305     throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
306     }
307     if (axis1<0 || axis1>rank-1) {
308     throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
309     }
310     if (axis0 == axis1) {
311     throw DataException("Error - Data::swapaxes: axis indices must be different.");
312     }
313     if (axis0 > axis1) {
314     axis0_tmp=axis1;
315     axis1_tmp=axis0;
316     } else {
317     axis0_tmp=axis0;
318     axis1_tmp=axis1;
319     }
320     for (int i=0; i<rank; i++) {
321     if (i == axis0_tmp) {
322     out_shape.push_back(s[axis1_tmp]);
323     } else if (i == axis1_tmp) {
324     out_shape.push_back(s[axis0_tmp]);
325     } else {
326     out_shape.push_back(s[i]);
327     }
328     }
329     return out_shape;
330     }
331    
332    
333 jfenwick 2066 // determine the output shape for the general tensor product operation
334     // the additional parameters return information required later for the product
335     // the majority of this code is copy pasted from C_General_Tensor_Product
336     DataTypes::ShapeType
337     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
338 jfenwick 1865 {
339 jfenwick 2066
340     // Get rank and shape of inputs
341     int rank0 = left->getRank();
342     int rank1 = right->getRank();
343     const DataTypes::ShapeType& shape0 = left->getShape();
344     const DataTypes::ShapeType& shape1 = right->getShape();
345    
346     // Prepare for the loops of the product and verify compatibility of shapes
347     int start0=0, start1=0;
348     if (transpose == 0) {}
349     else if (transpose == 1) { start0 = axis_offset; }
350     else if (transpose == 2) { start1 = rank1-axis_offset; }
351     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
352    
353 jfenwick 2085 if (rank0<axis_offset)
354     {
355     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
356     }
357 jfenwick 2066
358     // Adjust the shapes for transpose
359     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
360     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
361     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
362     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
363    
364     // Prepare for the loops of the product
365     SL=1, SM=1, SR=1;
366     for (int i=0; i<rank0-axis_offset; i++) {
367     SL *= tmpShape0[i];
368     }
369     for (int i=rank0-axis_offset; i<rank0; i++) {
370     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
371     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
372     }
373     SM *= tmpShape0[i];
374     }
375     for (int i=axis_offset; i<rank1; i++) {
376     SR *= tmpShape1[i];
377     }
378    
379     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
380     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
381     { // block to limit the scope of out_index
382     int out_index=0;
383     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
384     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
385     }
386 jfenwick 2086
387     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
388     {
389     ostringstream os;
390     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
391     throw DataException(os.str());
392     }
393    
394 jfenwick 2066 return shape2;
395 jfenwick 1865 }
396    
397 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
398 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
399 jfenwick 2066 // The same goes for G_TENSORPROD
400 jfenwick 2271 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
401 jfenwick 2157 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
402     // multiple times
403 jfenwick 1879 int
404     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
405     {
406 jfenwick 1886 switch(getOpgroup(op))
407 jfenwick 1879 {
408 jfenwick 1886 case G_IDENTITY: return 1;
409 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
410 jfenwick 2147 case G_UNARY:
411     case G_UNARY_P: return max(left->getBuffsRequired(),1);
412 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
413 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
414 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
415 jfenwick 2496 case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
416 jfenwick 1879 default:
417     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
418     }
419     }
420    
421 jfenwick 1899
422 jfenwick 1865 } // end anonymous namespace
423    
424    
425 jfenwick 1899
426     // Return a string representing the operation
427 jfenwick 1865 const std::string&
428     opToString(ES_optype op)
429     {
430     if (op<0 || op>=ES_opcount)
431     {
432     op=UNKNOWNOP;
433     }
434     return ES_opstrings[op];
435     }
436    
437 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
438     void DataLazy::LazyNodeSetup()
439     {
440     #ifdef _OPENMP
441     int numthreads=omp_get_max_threads();
442     m_samples.resize(numthreads*m_samplesize);
443     m_sampleids=new int[numthreads];
444     for (int i=0;i<numthreads;++i)
445     {
446     m_sampleids[i]=-1;
447     }
448     #else
449     m_samples.resize(m_samplesize);
450     m_sampleids=new int[1];
451     m_sampleids[0]=-1;
452     #endif // _OPENMP
453     }
454     #endif // LAZY_NODE_STORAGE
455 jfenwick 1865
456 jfenwick 2500
457     // Creates an identity node
458 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
459 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
460 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
461     ,m_sampleids(0),
462     m_samples(1)
463     #endif
464 jfenwick 1865 {
465 jfenwick 1879 if (p->isLazy())
466     {
467     // I don't want identity of Lazy.
468     // Question: Why would that be so bad?
469     // Answer: We assume that the child of ID is something we can call getVector on
470     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
471     }
472     else
473     {
474 jfenwick 2271 p->makeLazyShared();
475 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
476     makeIdentity(dr);
477 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
478 jfenwick 1879 }
479 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
480 jfenwick 1865 }
481    
482 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483     : parent(left->getFunctionSpace(),left->getShape()),
484 jfenwick 2066 m_op(op),
485     m_axis_offset(0),
486     m_transpose(0),
487     m_SL(0), m_SM(0), m_SR(0)
488 jfenwick 1886 {
489 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
490 jfenwick 1886 {
491     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
492     }
493 jfenwick 2066
494 jfenwick 1886 DataLazy_ptr lleft;
495     if (!left->isLazy())
496     {
497     lleft=DataLazy_ptr(new DataLazy(left));
498     }
499     else
500     {
501     lleft=dynamic_pointer_cast<DataLazy>(left);
502     }
503 jfenwick 1889 m_readytype=lleft->m_readytype;
504 jfenwick 1886 m_left=lleft;
505 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
506 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
507 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
508 jfenwick 2177 m_children=m_left->m_children+1;
509     m_height=m_left->m_height+1;
510 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
511     LazyNodeSetup();
512     #endif
513 jfenwick 2177 SIZELIMIT
514 jfenwick 1886 }
515    
516    
517 jfenwick 1943 // In this constructor we need to consider interpolation
518 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
519     : parent(resultFS(left,right,op), resultShape(left,right,op)),
520 jfenwick 2066 m_op(op),
521     m_SL(0), m_SM(0), m_SR(0)
522 jfenwick 1879 {
523 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
524 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
525 jfenwick 1886 {
526 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
527 jfenwick 1886 }
528 jfenwick 1943
529     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
530     {
531     FunctionSpace fs=getFunctionSpace();
532     Data ltemp(left);
533     Data tmp(ltemp,fs);
534     left=tmp.borrowDataPtr();
535     }
536 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
537 jfenwick 1943 {
538     Data tmp(Data(right),getFunctionSpace());
539     right=tmp.borrowDataPtr();
540 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
541 jfenwick 1943 }
542     left->operandCheck(*right);
543    
544 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
545 jfenwick 1879 {
546     m_left=dynamic_pointer_cast<DataLazy>(left);
547 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
548 jfenwick 1879 }
549     else
550     {
551     m_left=DataLazy_ptr(new DataLazy(left));
552 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
553 jfenwick 1879 }
554     if (right->isLazy())
555     {
556     m_right=dynamic_pointer_cast<DataLazy>(right);
557 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
558 jfenwick 1879 }
559     else
560     {
561     m_right=DataLazy_ptr(new DataLazy(right));
562 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
563 jfenwick 1879 }
564 jfenwick 1889 char lt=m_left->m_readytype;
565     char rt=m_right->m_readytype;
566     if (lt=='E' || rt=='E')
567     {
568     m_readytype='E';
569     }
570     else if (lt=='T' || rt=='T')
571     {
572     m_readytype='T';
573     }
574     else
575     {
576     m_readytype='C';
577     }
578 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
579     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
580 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
581 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;
583 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
584     LazyNodeSetup();
585     #endif
586 jfenwick 2177 SIZELIMIT
587 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588 jfenwick 1879 }
589    
590 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592     m_op(op),
593     m_axis_offset(axis_offset),
594     m_transpose(transpose)
595     {
596     if ((getOpgroup(op)!=G_TENSORPROD))
597     {
598     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599     }
600     if ((transpose>2) || (transpose<0))
601     {
602     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603     }
604     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
605     {
606     FunctionSpace fs=getFunctionSpace();
607     Data ltemp(left);
608     Data tmp(ltemp,fs);
609     left=tmp.borrowDataPtr();
610     }
611     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
612     {
613     Data tmp(Data(right),getFunctionSpace());
614     right=tmp.borrowDataPtr();
615     }
616 jfenwick 2195 // left->operandCheck(*right);
617 jfenwick 1879
618 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {
620     m_left=dynamic_pointer_cast<DataLazy>(left);
621     }
622     else
623     {
624     m_left=DataLazy_ptr(new DataLazy(left));
625     }
626     if (right->isLazy())
627     {
628     m_right=dynamic_pointer_cast<DataLazy>(right);
629     }
630     else
631     {
632     m_right=DataLazy_ptr(new DataLazy(right));
633     }
634     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')
637     {
638     m_readytype='E';
639     }
640     else if (lt=='T' || rt=='T')
641     {
642     m_readytype='T';
643     }
644     else
645     {
646     m_readytype='C';
647     }
648     m_samplesize=getNumDPPSample()*getNoValues();
649     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
650     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
651 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
652     m_height=max(m_left->m_height,m_right->m_height)+1;
653 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
654     LazyNodeSetup();
655     #endif
656 jfenwick 2177 SIZELIMIT
657 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
658 jfenwick 2066 }
659    
660    
661 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
662 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
663 jfenwick 2084 m_op(op),
664     m_axis_offset(axis_offset),
665 jfenwick 2147 m_transpose(0),
666     m_tol(0)
667 jfenwick 2084 {
668     if ((getOpgroup(op)!=G_NP1OUT_P))
669     {
670     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
671     }
672     DataLazy_ptr lleft;
673     if (!left->isLazy())
674     {
675     lleft=DataLazy_ptr(new DataLazy(left));
676     }
677     else
678     {
679     lleft=dynamic_pointer_cast<DataLazy>(left);
680     }
681     m_readytype=lleft->m_readytype;
682     m_left=lleft;
683     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
684     m_samplesize=getNumDPPSample()*getNoValues();
685     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
686 jfenwick 2177 m_children=m_left->m_children+1;
687     m_height=m_left->m_height+1;
688 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
689     LazyNodeSetup();
690     #endif
691 jfenwick 2177 SIZELIMIT
692 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
693 jfenwick 2084 }
694    
695 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
696     : parent(left->getFunctionSpace(), left->getShape()),
697     m_op(op),
698     m_axis_offset(0),
699     m_transpose(0),
700     m_tol(tol)
701     {
702     if ((getOpgroup(op)!=G_UNARY_P))
703     {
704     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
705     }
706     DataLazy_ptr lleft;
707     if (!left->isLazy())
708     {
709     lleft=DataLazy_ptr(new DataLazy(left));
710     }
711     else
712     {
713     lleft=dynamic_pointer_cast<DataLazy>(left);
714     }
715     m_readytype=lleft->m_readytype;
716     m_left=lleft;
717     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
718     m_samplesize=getNumDPPSample()*getNoValues();
719     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
720 jfenwick 2177 m_children=m_left->m_children+1;
721     m_height=m_left->m_height+1;
722 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
723     LazyNodeSetup();
724     #endif
725 jfenwick 2177 SIZELIMIT
726 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
727     }
728 jfenwick 2084
729 jfenwick 2496
730     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
731     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
732     m_op(op),
733     m_axis_offset(axis0),
734     m_transpose(axis1),
735     m_tol(0)
736     {
737     if ((getOpgroup(op)!=G_NP1OUT_2P))
738     {
739     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
740     }
741     DataLazy_ptr lleft;
742     if (!left->isLazy())
743     {
744     lleft=DataLazy_ptr(new DataLazy(left));
745     }
746     else
747     {
748     lleft=dynamic_pointer_cast<DataLazy>(left);
749     }
750     m_readytype=lleft->m_readytype;
751     m_left=lleft;
752     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
753     m_samplesize=getNumDPPSample()*getNoValues();
754     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
755     m_children=m_left->m_children+1;
756     m_height=m_left->m_height+1;
757 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
758     LazyNodeSetup();
759     #endif
760 jfenwick 2496 SIZELIMIT
761     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
762     }
763    
764 jfenwick 1865 DataLazy::~DataLazy()
765     {
766 jfenwick 2500 #ifdef LAZY_NODE_SETUP
767     delete[] m_sampleids;
768     delete[] m_samples;
769     #endif
770 jfenwick 1865 }
771    
772 jfenwick 1879
773     int
774     DataLazy::getBuffsRequired() const
775 jfenwick 1865 {
776 jfenwick 1879 return m_buffsRequired;
777     }
778    
779 jfenwick 1884
780 jfenwick 2066 size_t
781     DataLazy::getMaxSampleSize() const
782     {
783     return m_maxsamplesize;
784     }
785    
786 jfenwick 2271
787    
788     size_t
789     DataLazy::getSampleBufferSize() const
790     {
791     return m_maxsamplesize*(max(1,m_buffsRequired));
792     }
793    
794 jfenwick 1899 /*
795     \brief Evaluates the expression using methods on Data.
796     This does the work for the collapse method.
797     For reasons of efficiency do not call this method on DataExpanded nodes.
798     */
799 jfenwick 1889 DataReady_ptr
800     DataLazy::collapseToReady()
801     {
802     if (m_readytype=='E')
803     { // this is more an efficiency concern than anything else
804     throw DataException("Programmer Error - do not use collapse on Expanded data.");
805     }
806     if (m_op==IDENTITY)
807     {
808     return m_id;
809     }
810     DataReady_ptr pleft=m_left->collapseToReady();
811     Data left(pleft);
812     Data right;
813 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
814 jfenwick 1889 {
815     right=Data(m_right->collapseToReady());
816     }
817     Data result;
818     switch(m_op)
819     {
820     case ADD:
821     result=left+right;
822     break;
823     case SUB:
824     result=left-right;
825     break;
826     case MUL:
827     result=left*right;
828     break;
829     case DIV:
830     result=left/right;
831     break;
832     case SIN:
833     result=left.sin();
834     break;
835     case COS:
836     result=left.cos();
837     break;
838     case TAN:
839     result=left.tan();
840     break;
841     case ASIN:
842     result=left.asin();
843     break;
844     case ACOS:
845     result=left.acos();
846     break;
847     case ATAN:
848     result=left.atan();
849     break;
850     case SINH:
851     result=left.sinh();
852     break;
853     case COSH:
854     result=left.cosh();
855     break;
856     case TANH:
857     result=left.tanh();
858     break;
859     case ERF:
860     result=left.erf();
861     break;
862     case ASINH:
863     result=left.asinh();
864     break;
865     case ACOSH:
866     result=left.acosh();
867     break;
868     case ATANH:
869     result=left.atanh();
870     break;
871     case LOG10:
872     result=left.log10();
873     break;
874     case LOG:
875     result=left.log();
876     break;
877     case SIGN:
878     result=left.sign();
879     break;
880     case ABS:
881     result=left.abs();
882     break;
883     case NEG:
884     result=left.neg();
885     break;
886     case POS:
887     // it doesn't mean anything for delayed.
888     // it will just trigger a deep copy of the lazy object
889     throw DataException("Programmer error - POS not supported for lazy data.");
890     break;
891     case EXP:
892     result=left.exp();
893     break;
894     case SQRT:
895     result=left.sqrt();
896     break;
897     case RECIP:
898     result=left.oneOver();
899     break;
900     case GZ:
901     result=left.wherePositive();
902     break;
903     case LZ:
904     result=left.whereNegative();
905     break;
906     case GEZ:
907     result=left.whereNonNegative();
908     break;
909     case LEZ:
910     result=left.whereNonPositive();
911     break;
912 jfenwick 2147 case NEZ:
913     result=left.whereNonZero(m_tol);
914     break;
915     case EZ:
916     result=left.whereZero(m_tol);
917     break;
918 jfenwick 2037 case SYM:
919     result=left.symmetric();
920     break;
921     case NSYM:
922     result=left.nonsymmetric();
923     break;
924 jfenwick 2066 case PROD:
925     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
926     break;
927 jfenwick 2084 case TRANS:
928     result=left.transpose(m_axis_offset);
929     break;
930     case TRACE:
931     result=left.trace(m_axis_offset);
932     break;
933 jfenwick 2496 case SWAP:
934     result=left.swapaxes(m_axis_offset, m_transpose);
935     break;
936 jfenwick 1889 default:
937 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
938 jfenwick 1889 }
939     return result.borrowReadyPtr();
940     }
941    
942 jfenwick 1899 /*
943     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
944     This method uses the original methods on the Data class to evaluate the expressions.
945     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
946     the purpose of using DataLazy in the first place).
947     */
948 jfenwick 1889 void
949     DataLazy::collapse()
950     {
951     if (m_op==IDENTITY)
952     {
953     return;
954     }
955     if (m_readytype=='E')
956     { // this is more an efficiency concern than anything else
957     throw DataException("Programmer Error - do not use collapse on Expanded data.");
958     }
959     m_id=collapseToReady();
960     m_op=IDENTITY;
961     }
962    
963 jfenwick 1899 /*
964 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
965 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
966     \param v A vector to store intermediate results.
967     \param offset Index in v to begin storing results.
968     \param sampleNo Sample number to evaluate.
969     \param roffset (output parameter) the offset in the return vector where the result begins.
970    
971     The return value will be an existing vector so do not deallocate it.
972     If the result is stored in v it should be stored at the offset given.
973     Everything from offset to the end of v should be considered available for this method to use.
974     */
975 jfenwick 1898 DataTypes::ValueType*
976     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
977 jfenwick 1889 {
978     // we assume that any collapsing has been done before we get here
979     // since we only have one argument we don't need to think about only
980     // processing single points.
981     if (m_readytype!='E')
982     {
983     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
984     }
985 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
986 jfenwick 1898 const double* left=&((*vleft)[roffset]);
987 jfenwick 1889 double* result=&(v[offset]);
988 jfenwick 1898 roffset=offset;
989 jfenwick 1889 switch (m_op)
990     {
991     case SIN:
992 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
993 jfenwick 1889 break;
994     case COS:
995 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
996 jfenwick 1889 break;
997     case TAN:
998 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
999 jfenwick 1889 break;
1000     case ASIN:
1001 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1002 jfenwick 1889 break;
1003     case ACOS:
1004 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1005 jfenwick 1889 break;
1006     case ATAN:
1007 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1008 jfenwick 1889 break;
1009     case SINH:
1010 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1011 jfenwick 1889 break;
1012     case COSH:
1013 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1014 jfenwick 1889 break;
1015     case TANH:
1016 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1017 jfenwick 1889 break;
1018     case ERF:
1019 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1020 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1021     #else
1022     tensor_unary_operation(m_samplesize, left, result, ::erf);
1023     break;
1024     #endif
1025     case ASINH:
1026 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1027 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1028     #else
1029     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1030     #endif
1031     break;
1032     case ACOSH:
1033 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1034 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1035     #else
1036     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1037     #endif
1038     break;
1039     case ATANH:
1040 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1041 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1042     #else
1043     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1044     #endif
1045     break;
1046     case LOG10:
1047 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1048 jfenwick 1889 break;
1049     case LOG:
1050 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1051 jfenwick 1889 break;
1052     case SIGN:
1053     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1054     break;
1055     case ABS:
1056 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1057 jfenwick 1889 break;
1058     case NEG:
1059     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1060     break;
1061     case POS:
1062     // it doesn't mean anything for delayed.
1063     // it will just trigger a deep copy of the lazy object
1064     throw DataException("Programmer error - POS not supported for lazy data.");
1065     break;
1066     case EXP:
1067 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1068 jfenwick 1889 break;
1069     case SQRT:
1070 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1071 jfenwick 1889 break;
1072     case RECIP:
1073     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1074     break;
1075     case GZ:
1076     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1077     break;
1078     case LZ:
1079     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1080     break;
1081     case GEZ:
1082     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1083     break;
1084     case LEZ:
1085     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1086     break;
1087 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1088     case NEZ:
1089     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1090     break;
1091     case EZ:
1092     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1093     break;
1094 jfenwick 1889
1095     default:
1096     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1097     }
1098 jfenwick 1898 return &v;
1099 jfenwick 1889 }
1100    
1101    
1102 jfenwick 2147
1103    
1104    
1105    
1106 jfenwick 2037 /*
1107     \brief Compute the value of the expression (unary operation) for the given sample.
1108     \return Vector which stores the value of the subexpression for the given sample.
1109     \param v A vector to store intermediate results.
1110     \param offset Index in v to begin storing results.
1111     \param sampleNo Sample number to evaluate.
1112     \param roffset (output parameter) the offset in the return vector where the result begins.
1113 jfenwick 1898
1114 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
1115     If the result is stored in v it should be stored at the offset given.
1116     Everything from offset to the end of v should be considered available for this method to use.
1117     */
1118     DataTypes::ValueType*
1119     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1120     {
1121     // we assume that any collapsing has been done before we get here
1122     // since we only have one argument we don't need to think about only
1123     // processing single points.
1124     if (m_readytype!='E')
1125     {
1126     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1127     }
1128     // since we can't write the result over the input, we need a result offset further along
1129     size_t subroffset=roffset+m_samplesize;
1130 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1131 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1132 jfenwick 2037 roffset=offset;
1133 jfenwick 2157 size_t loop=0;
1134     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1135     size_t step=getNoValues();
1136 jfenwick 2037 switch (m_op)
1137     {
1138     case SYM:
1139 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1140     {
1141     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1142     subroffset+=step;
1143     offset+=step;
1144     }
1145 jfenwick 2037 break;
1146     case NSYM:
1147 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1148     {
1149     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1150     subroffset+=step;
1151     offset+=step;
1152     }
1153 jfenwick 2037 break;
1154     default:
1155     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1156     }
1157     return &v;
1158     }
1159 jfenwick 1898
1160 jfenwick 2084 /*
1161     \brief Compute the value of the expression (unary operation) for the given sample.
1162     \return Vector which stores the value of the subexpression for the given sample.
1163     \param v A vector to store intermediate results.
1164     \param offset Index in v to begin storing results.
1165     \param sampleNo Sample number to evaluate.
1166     \param roffset (output parameter) the offset in the return vector where the result begins.
1167 jfenwick 1899
1168 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
1169     If the result is stored in v it should be stored at the offset given.
1170     Everything from offset to the end of v should be considered available for this method to use.
1171     */
1172     DataTypes::ValueType*
1173     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1174     {
1175     // we assume that any collapsing has been done before we get here
1176     // since we only have one argument we don't need to think about only
1177     // processing single points.
1178     if (m_readytype!='E')
1179     {
1180     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1181     }
1182     // since we can't write the result over the input, we need a result offset further along
1183 jfenwick 2157 size_t subroffset;
1184 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1185 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1186     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1187 jfenwick 2084 roffset=offset;
1188 jfenwick 2157 size_t loop=0;
1189     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190 jfenwick 2166 size_t outstep=getNoValues();
1191     size_t instep=m_left->getNoValues();
1192     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1193 jfenwick 2084 switch (m_op)
1194     {
1195     case TRACE:
1196 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1197     {
1198 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
1199     if (zz==5800)
1200     {
1201     LAZYDEBUG(cerr << "point=" << zz<< endl;)
1202     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1203     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1204     LAZYDEBUG(cerr << subroffset << endl;)
1205     LAZYDEBUG(cerr << "output=" << offset << endl;)
1206     }
1207     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1208     if (zz==5800)
1209     {
1210     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1211     }
1212     subroffset+=instep;
1213     offset+=outstep;
1214 jfenwick 2157 }
1215 jfenwick 2084 break;
1216     case TRANS:
1217 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1218     {
1219     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1220 jfenwick 2166 subroffset+=instep;
1221     offset+=outstep;
1222 jfenwick 2157 }
1223 jfenwick 2084 break;
1224     default:
1225     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1226     }
1227     return &v;
1228     }
1229 jfenwick 2037
1230    
1231 jfenwick 2496 /*
1232     \brief Compute the value of the expression (unary operation with int params) for the given sample.
1233     \return Vector which stores the value of the subexpression for the given sample.
1234     \param v A vector to store intermediate results.
1235     \param offset Index in v to begin storing results.
1236     \param sampleNo Sample number to evaluate.
1237     \param roffset (output parameter) the offset in the return vector where the result begins.
1238    
1239     The return value will be an existing vector so do not deallocate it.
1240     If the result is stored in v it should be stored at the offset given.
1241     Everything from offset to the end of v should be considered available for this method to use.
1242     */
1243     DataTypes::ValueType*
1244     DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1245     {
1246     // we assume that any collapsing has been done before we get here
1247     // since we only have one argument we don't need to think about only
1248     // processing single points.
1249     if (m_readytype!='E')
1250     {
1251     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1252     }
1253     // since we can't write the result over the input, we need a result offset further along
1254     size_t subroffset;
1255 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1256 jfenwick 2496 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1257     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1258     roffset=offset;
1259     size_t loop=0;
1260     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1261     size_t outstep=getNoValues();
1262     size_t instep=m_left->getNoValues();
1263     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1264     switch (m_op)
1265     {
1266     case SWAP:
1267     for (loop=0;loop<numsteps;++loop)
1268     {
1269     DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1270     subroffset+=instep;
1271     offset+=outstep;
1272     }
1273     break;
1274     default:
1275     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1276     }
1277     return &v;
1278     }
1279    
1280    
1281    
1282 phornby 1987 #define PROC_OP(TYPE,X) \
1283 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1284     {\
1285     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1286     { \
1287     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1288 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1289 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1290 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1291 jfenwick 2152 lroffset+=leftstep; \
1292     rroffset+=rightstep; \
1293     }\
1294     lroffset+=oleftstep;\
1295     rroffset+=orightstep;\
1296 jfenwick 1889 }
1297    
1298 jfenwick 1899 /*
1299     \brief Compute the value of the expression (binary operation) for the given sample.
1300     \return Vector which stores the value of the subexpression for the given sample.
1301     \param v A vector to store intermediate results.
1302     \param offset Index in v to begin storing results.
1303     \param sampleNo Sample number to evaluate.
1304     \param roffset (output parameter) the offset in the return vector where the result begins.
1305    
1306     The return value will be an existing vector so do not deallocate it.
1307     If the result is stored in v it should be stored at the offset given.
1308     Everything from offset to the end of v should be considered available for this method to use.
1309     */
1310     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1311     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1312     // If both children are expanded, then we can process them in a single operation (we treat
1313     // the whole sample as one big datapoint.
1314     // If one of the children is not expanded, then we need to treat each point in the sample
1315     // individually.
1316 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1317     // For example, 2+Vector.
1318     // In this case each double within the point is treated individually
1319 jfenwick 1898 DataTypes::ValueType*
1320 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1321 jfenwick 1889 {
1322 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1323 jfenwick 1889
1324 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1325     // first work out which of the children are expanded
1326 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1327     bool rightExp=(m_right->m_readytype=='E');
1328 jfenwick 2084 if (!leftExp && !rightExp)
1329     {
1330     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1331     }
1332     bool leftScalar=(m_left->getRank()==0);
1333     bool rightScalar=(m_right->getRank()==0);
1334 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1335 jfenwick 1908 {
1336 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1337     }
1338     size_t leftsize=m_left->getNoValues();
1339     size_t rightsize=m_right->getNoValues();
1340     size_t chunksize=1; // how many doubles will be processed in one go
1341     int leftstep=0; // how far should the left offset advance after each step
1342     int rightstep=0;
1343     int numsteps=0; // total number of steps for the inner loop
1344     int oleftstep=0; // the o variables refer to the outer loop
1345     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1346     int onumsteps=1;
1347    
1348     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1349     bool RES=(rightExp && rightScalar);
1350     bool LS=(!leftExp && leftScalar); // left is a single scalar
1351     bool RS=(!rightExp && rightScalar);
1352     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1353     bool RN=(!rightExp && !rightScalar);
1354     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1355     bool REN=(rightExp && !rightScalar);
1356    
1357     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1358     {
1359     chunksize=m_left->getNumDPPSample()*leftsize;
1360     leftstep=0;
1361     rightstep=0;
1362     numsteps=1;
1363     }
1364     else if (LES || RES)
1365     {
1366     chunksize=1;
1367     if (LES) // left is an expanded scalar
1368 jfenwick 2084 {
1369 jfenwick 2152 if (RS)
1370     {
1371     leftstep=1;
1372     rightstep=0;
1373     numsteps=m_left->getNumDPPSample();
1374     }
1375     else // RN or REN
1376     {
1377     leftstep=0;
1378     oleftstep=1;
1379     rightstep=1;
1380 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1381 jfenwick 2152 numsteps=rightsize;
1382     onumsteps=m_left->getNumDPPSample();
1383     }
1384 jfenwick 2084 }
1385 jfenwick 2152 else // right is an expanded scalar
1386     {
1387     if (LS)
1388     {
1389     rightstep=1;
1390     leftstep=0;
1391     numsteps=m_right->getNumDPPSample();
1392     }
1393     else
1394     {
1395     rightstep=0;
1396     orightstep=1;
1397     leftstep=1;
1398 jfenwick 2177 oleftstep=(LN ? -(int)leftsize : 0);
1399 jfenwick 2152 numsteps=leftsize;
1400     onumsteps=m_right->getNumDPPSample();
1401     }
1402     }
1403     }
1404     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1405     {
1406     if (LEN) // and Right will be a single value
1407     {
1408     chunksize=rightsize;
1409     leftstep=rightsize;
1410     rightstep=0;
1411     numsteps=m_left->getNumDPPSample();
1412     if (RS)
1413     {
1414     numsteps*=leftsize;
1415     }
1416     }
1417     else // REN
1418     {
1419     chunksize=leftsize;
1420     rightstep=leftsize;
1421     leftstep=0;
1422     numsteps=m_right->getNumDPPSample();
1423     if (LS)
1424     {
1425     numsteps*=rightsize;
1426     }
1427     }
1428     }
1429    
1430     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1431 jfenwick 1899 // Get the values of sub-expressions
1432 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1433 jfenwick 2157 // calcBufss for why we can't put offset as the 2nd param above
1434 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1435 jfenwick 1899 // the right child starts further along.
1436 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1437     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1438     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1439     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1440     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1441     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1442     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1443 jfenwick 2195
1444 jfenwick 2199
1445 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1446 jfenwick 1889 switch(m_op)
1447     {
1448     case ADD:
1449 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1450 jfenwick 1889 break;
1451 jfenwick 1899 case SUB:
1452 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1453 jfenwick 1899 break;
1454     case MUL:
1455 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1456 jfenwick 1899 break;
1457     case DIV:
1458 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1459 jfenwick 1899 break;
1460 jfenwick 1910 case POW:
1461 phornby 1994 PROC_OP(double (double,double),::pow);
1462 jfenwick 1910 break;
1463 jfenwick 1889 default:
1464 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1465 jfenwick 1889 }
1466 jfenwick 2195 roffset=offset;
1467 jfenwick 1898 return &v;
1468 jfenwick 1889 }
1469    
1470 jfenwick 1898
1471 jfenwick 2152
1472 jfenwick 2066 /*
1473     \brief Compute the value of the expression (tensor product) for the given sample.
1474     \return Vector which stores the value of the subexpression for the given sample.
1475     \param v A vector to store intermediate results.
1476     \param offset Index in v to begin storing results.
1477     \param sampleNo Sample number to evaluate.
1478     \param roffset (output parameter) the offset in the return vector where the result begins.
1479 jfenwick 1898
1480 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1481     If the result is stored in v it should be stored at the offset given.
1482     Everything from offset to the end of v should be considered available for this method to use.
1483     */
1484     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1485     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1486     // unlike the other resolve helpers, we must treat these datapoints separately.
1487     DataTypes::ValueType*
1488     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1489     {
1490 jfenwick 2195 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1491 jfenwick 2066
1492     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1493     // first work out which of the children are expanded
1494     bool leftExp=(m_left->m_readytype=='E');
1495     bool rightExp=(m_right->m_readytype=='E');
1496     int steps=getNumDPPSample();
1497 jfenwick 2195 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1498     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1499     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1500     int rightStep=(rightExp?m_right->getNoValues() : 0);
1501    
1502     int resultStep=getNoValues();
1503 jfenwick 2066 // Get the values of sub-expressions (leave a gap of one sample for the result).
1504 jfenwick 2199 int gap=offset+m_samplesize;
1505 jfenwick 2195
1506     LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1507    
1508 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1509 jfenwick 2199 gap+=m_left->getMaxSampleSize();
1510 jfenwick 2195
1511    
1512     LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1513    
1514    
1515 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1516 jfenwick 2195
1517     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1518     cout << getNoValues() << endl;)
1519     LAZYDEBUG(cerr << "Result of left=";)
1520     LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1521 jfenwick 2199
1522     for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1523 jfenwick 2195 {
1524 jfenwick 2199 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1525     if (i%4==0) cout << endl;
1526 jfenwick 2195 })
1527     LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1528 jfenwick 2199 LAZYDEBUG(
1529     for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1530 jfenwick 2195 {
1531 jfenwick 2199 cerr << "[" << setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1532     if (i%4==0) cout << endl;
1533 jfenwick 2195 }
1534     cerr << endl;
1535     )
1536     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1537 jfenwick 2153 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1538     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1539     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1540     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1541     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1542 jfenwick 2195 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1543    
1544 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1545     switch(m_op)
1546     {
1547     case PROD:
1548     for (int i=0;i<steps;++i,resultp+=resultStep)
1549     {
1550 jfenwick 2199
1551 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1552     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1553     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1554 jfenwick 2199
1555 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1556     const double *ptr_1 = &((*right)[rroffset]);
1557 jfenwick 2199
1558 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1559     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1560 jfenwick 2199
1561 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1562 jfenwick 2199
1563 jfenwick 2195 LAZYDEBUG(cout << "Results--\n";
1564 jfenwick 2199 {
1565     DataVector dv(getNoValues());
1566 jfenwick 2195 for (int z=0;z<getNoValues();++z)
1567     {
1568 jfenwick 2199 cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1569     if (z%4==0) cout << endl;
1570     dv[z]=resultp[z];
1571 jfenwick 2195 }
1572 jfenwick 2199 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1573 jfenwick 2195 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1574 jfenwick 2199 }
1575 jfenwick 2195 )
1576 jfenwick 2066 lroffset+=leftStep;
1577     rroffset+=rightStep;
1578     }
1579     break;
1580     default:
1581     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1582     }
1583     roffset=offset;
1584     return &v;
1585     }
1586    
1587    
1588 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
1589 jfenwick 2066
1590 jfenwick 2500 // The result will be stored in m_samples
1591     // The return value is a pointer to the DataVector, offset is the offset within the return value
1592     const DataTypes::ValueType*
1593     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1594     {
1595     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1596     // collapse so we have a 'E' node or an IDENTITY for some other type
1597     if (m_readytype!='E' && m_op!=IDENTITY)
1598     {
1599     collapse();
1600     }
1601     if (m_op==IDENTITY)
1602     {
1603     const ValueType& vec=m_id->getVectorRO();
1604     // if (m_readytype=='C')
1605     // {
1606     // roffset=0; // all samples read from the same position
1607     // return &(m_samples);
1608     // }
1609     roffset=m_id->getPointOffset(sampleNo, 0);
1610     return &(vec);
1611     }
1612     if (m_readytype!='E')
1613     {
1614     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1615     }
1616 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1617     {
1618     roffset=tid*m_samplesize;
1619     return &(m_samples); // sample is already resolved
1620     }
1621     m_sampleids[tid]=sampleNo;
1622 jfenwick 2500 switch (getOpgroup(m_op))
1623     {
1624     case G_UNARY:
1625     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1626     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1627     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1628     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1629     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1630     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1631     default:
1632     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1633     }
1634     }
1635    
1636     const DataTypes::ValueType*
1637     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1638     {
1639     // we assume that any collapsing has been done before we get here
1640     // since we only have one argument we don't need to think about only
1641     // processing single points.
1642     // we will also know we won't get identity nodes
1643     if (m_readytype!='E')
1644     {
1645     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1646     }
1647     if (m_op==IDENTITY)
1648     {
1649     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1650     }
1651     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1652     const double* left=&((*leftres)[roffset]);
1653     roffset=m_samplesize*tid;
1654     double* result=&(m_samples[roffset]);
1655     switch (m_op)
1656     {
1657     case SIN:
1658     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1659     break;
1660     case COS:
1661     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1662     break;
1663     case TAN:
1664     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1665     break;
1666     case ASIN:
1667     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1668     break;
1669     case ACOS:
1670     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1671     break;
1672     case ATAN:
1673     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1674     break;
1675     case SINH:
1676     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1677     break;
1678     case COSH:
1679     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1680     break;
1681     case TANH:
1682     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1683     break;
1684     case ERF:
1685     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1686     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1687     #else
1688     tensor_unary_operation(m_samplesize, left, result, ::erf);
1689     break;
1690     #endif
1691     case ASINH:
1692     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1693     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1694     #else
1695     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1696     #endif
1697     break;
1698     case ACOSH:
1699     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1700     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1701     #else
1702     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1703     #endif
1704     break;
1705     case ATANH:
1706     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1707     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1708     #else
1709     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1710     #endif
1711     break;
1712     case LOG10:
1713     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1714     break;
1715     case LOG:
1716     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1717     break;
1718     case SIGN:
1719     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1720     break;
1721     case ABS:
1722     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1723     break;
1724     case NEG:
1725     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1726     break;
1727     case POS:
1728     // it doesn't mean anything for delayed.
1729     // it will just trigger a deep copy of the lazy object
1730     throw DataException("Programmer error - POS not supported for lazy data.");
1731     break;
1732     case EXP:
1733     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1734     break;
1735     case SQRT:
1736     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1737     break;
1738     case RECIP:
1739     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1740     break;
1741     case GZ:
1742     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1743     break;
1744     case LZ:
1745     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1746     break;
1747     case GEZ:
1748     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1749     break;
1750     case LEZ:
1751     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1752     break;
1753     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1754     case NEZ:
1755     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1756     break;
1757     case EZ:
1758     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1759     break;
1760    
1761     default:
1762     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1763     }
1764     return &(m_samples);
1765     }
1766    
1767    
1768     const DataTypes::ValueType*
1769     DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1770     {
1771     // we assume that any collapsing has been done before we get here
1772     // since we only have one argument we don't need to think about only
1773     // processing single points.
1774     if (m_readytype!='E')
1775     {
1776     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1777     }
1778     if (m_op==IDENTITY)
1779     {
1780     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1781     }
1782     size_t subroffset;
1783     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1784     roffset=m_samplesize*tid;
1785     size_t loop=0;
1786     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1787     size_t step=getNoValues();
1788     size_t offset=roffset;
1789     switch (m_op)
1790     {
1791     case SYM:
1792     for (loop=0;loop<numsteps;++loop)
1793     {
1794     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1795     subroffset+=step;
1796     offset+=step;
1797     }
1798     break;
1799     case NSYM:
1800     for (loop=0;loop<numsteps;++loop)
1801     {
1802     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1803     subroffset+=step;
1804     offset+=step;
1805     }
1806     break;
1807     default:
1808     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1809     }
1810     return &m_samples;
1811     }
1812    
1813     const DataTypes::ValueType*
1814     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1815     {
1816     // we assume that any collapsing has been done before we get here
1817     // since we only have one argument we don't need to think about only
1818     // processing single points.
1819     if (m_readytype!='E')
1820     {
1821     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1822     }
1823     if (m_op==IDENTITY)
1824     {
1825     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1826     }
1827     size_t subroffset;
1828     size_t offset;
1829     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1830     roffset=m_samplesize*tid;
1831     offset=roffset;
1832     size_t loop=0;
1833     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1834     size_t outstep=getNoValues();
1835     size_t instep=m_left->getNoValues();
1836     switch (m_op)
1837     {
1838     case TRACE:
1839     for (loop=0;loop<numsteps;++loop)
1840     {
1841     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1842     subroffset+=instep;
1843     offset+=outstep;
1844     }
1845     break;
1846     case TRANS:
1847     for (loop=0;loop<numsteps;++loop)
1848     {
1849     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1850     subroffset+=instep;
1851     offset+=outstep;
1852     }
1853     break;
1854     default:
1855     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1856     }
1857     return &m_samples;
1858     }
1859    
1860    
1861     const DataTypes::ValueType*
1862     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1863     {
1864     if (m_readytype!='E')
1865     {
1866     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1867     }
1868     if (m_op==IDENTITY)
1869     {
1870     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1871     }
1872     size_t subroffset;
1873     size_t offset;
1874     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1875     roffset=m_samplesize*tid;
1876     offset=roffset;
1877     size_t loop=0;
1878     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1879     size_t outstep=getNoValues();
1880     size_t instep=m_left->getNoValues();
1881     switch (m_op)
1882     {
1883     case SWAP:
1884     for (loop=0;loop<numsteps;++loop)
1885     {
1886     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1887     subroffset+=instep;
1888     offset+=outstep;
1889     }
1890     break;
1891     default:
1892     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1893     }
1894     return &m_samples;
1895     }
1896    
1897    
1898    
1899     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1900     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1901     // If both children are expanded, then we can process them in a single operation (we treat
1902     // the whole sample as one big datapoint.
1903     // If one of the children is not expanded, then we need to treat each point in the sample
1904     // individually.
1905     // There is an additional complication when scalar operations are considered.
1906     // For example, 2+Vector.
1907     // In this case each double within the point is treated individually
1908     const DataTypes::ValueType*
1909     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1910     {
1911     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1912    
1913     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1914     // first work out which of the children are expanded
1915     bool leftExp=(m_left->m_readytype=='E');
1916     bool rightExp=(m_right->m_readytype=='E');
1917     if (!leftExp && !rightExp)
1918     {
1919     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1920     }
1921     bool leftScalar=(m_left->getRank()==0);
1922     bool rightScalar=(m_right->getRank()==0);
1923     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1924     {
1925     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1926     }
1927     size_t leftsize=m_left->getNoValues();
1928     size_t rightsize=m_right->getNoValues();
1929     size_t chunksize=1; // how many doubles will be processed in one go
1930     int leftstep=0; // how far should the left offset advance after each step
1931     int rightstep=0;
1932     int numsteps=0; // total number of steps for the inner loop
1933     int oleftstep=0; // the o variables refer to the outer loop
1934     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1935     int onumsteps=1;
1936    
1937     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1938     bool RES=(rightExp && rightScalar);
1939     bool LS=(!leftExp && leftScalar); // left is a single scalar
1940     bool RS=(!rightExp && rightScalar);
1941     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1942     bool RN=(!rightExp && !rightScalar);
1943     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1944     bool REN=(rightExp && !rightScalar);
1945    
1946     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1947     {
1948     chunksize=m_left->getNumDPPSample()*leftsize;
1949     leftstep=0;
1950     rightstep=0;
1951     numsteps=1;
1952     }
1953     else if (LES || RES)
1954     {
1955     chunksize=1;
1956     if (LES) // left is an expanded scalar
1957     {
1958     if (RS)
1959     {
1960     leftstep=1;
1961     rightstep=0;
1962     numsteps=m_left->getNumDPPSample();
1963     }
1964     else // RN or REN
1965     {
1966     leftstep=0;
1967     oleftstep=1;
1968     rightstep=1;
1969     orightstep=(RN ? -(int)rightsize : 0);
1970     numsteps=rightsize;
1971     onumsteps=m_left->getNumDPPSample();
1972     }
1973     }
1974     else // right is an expanded scalar
1975     {
1976     if (LS)
1977     {
1978     rightstep=1;
1979     leftstep=0;
1980     numsteps=m_right->getNumDPPSample();
1981     }
1982     else
1983     {
1984     rightstep=0;
1985     orightstep=1;
1986     leftstep=1;
1987     oleftstep=(LN ? -(int)leftsize : 0);
1988     numsteps=leftsize;
1989     onumsteps=m_right->getNumDPPSample();
1990     }
1991     }
1992     }
1993     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1994     {
1995     if (LEN) // and Right will be a single value
1996     {
1997     chunksize=rightsize;
1998     leftstep=rightsize;
1999     rightstep=0;
2000     numsteps=m_left->getNumDPPSample();
2001     if (RS)
2002     {
2003     numsteps*=leftsize;
2004     }
2005     }
2006     else // REN
2007     {
2008     chunksize=leftsize;
2009     rightstep=leftsize;
2010     leftstep=0;
2011     numsteps=m_right->getNumDPPSample();
2012     if (LS)
2013     {
2014     numsteps*=rightsize;
2015     }
2016     }
2017     }
2018    
2019     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
2020     // Get the values of sub-expressions
2021     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
2022     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2023     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2024     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2025     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2026     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2027     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2028     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2029     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
2030    
2031     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2032     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2033    
2034    
2035     roffset=m_samplesize*tid;
2036     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
2037     switch(m_op)
2038     {
2039     case ADD:
2040     PROC_OP(NO_ARG,plus<double>());
2041     break;
2042     case SUB:
2043     PROC_OP(NO_ARG,minus<double>());
2044     break;
2045     case MUL:
2046     PROC_OP(NO_ARG,multiplies<double>());
2047     break;
2048     case DIV:
2049     PROC_OP(NO_ARG,divides<double>());
2050     break;
2051     case POW:
2052     PROC_OP(double (double,double),::pow);
2053     break;
2054     default:
2055     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2056     }
2057     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2058     return &m_samples;
2059     }
2060    
2061    
2062     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2063     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2064     // unlike the other resolve helpers, we must treat these datapoints separately.
2065     const DataTypes::ValueType*
2066     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2067     {
2068     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2069    
2070     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2071     // first work out which of the children are expanded
2072     bool leftExp=(m_left->m_readytype=='E');
2073     bool rightExp=(m_right->m_readytype=='E');
2074     int steps=getNumDPPSample();
2075     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2076     int rightStep=(rightExp?m_right->getNoValues() : 0);
2077    
2078     int resultStep=getNoValues();
2079     roffset=m_samplesize*tid;
2080     size_t offset=roffset;
2081    
2082     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2083    
2084     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2085    
2086     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2087     cout << getNoValues() << endl;)
2088    
2089    
2090     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2091     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2092     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2093     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2094     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2095     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2096     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2097    
2098     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
2099     switch(m_op)
2100     {
2101     case PROD:
2102     for (int i=0;i<steps;++i,resultp+=resultStep)
2103     {
2104     const double *ptr_0 = &((*left)[lroffset]);
2105     const double *ptr_1 = &((*right)[rroffset]);
2106    
2107     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2108     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2109    
2110     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2111    
2112     lroffset+=leftStep;
2113     rroffset+=rightStep;
2114     }
2115     break;
2116     default:
2117     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2118     }
2119     roffset=offset;
2120     return &m_samples;
2121     }
2122     #endif //LAZY_NODE_STORAGE
2123    
2124 jfenwick 1899 /*
2125     \brief Compute the value of the expression for the given sample.
2126     \return Vector which stores the value of the subexpression for the given sample.
2127     \param v A vector to store intermediate results.
2128     \param offset Index in v to begin storing results.
2129     \param sampleNo Sample number to evaluate.
2130     \param roffset (output parameter) the offset in the return vector where the result begins.
2131 jfenwick 1898
2132 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
2133     */
2134 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
2135     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2136 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
2137     // Regardless, the storage should be assumed to be used, even if it isn't.
2138 jfenwick 1898
2139     // the roffset is the offset within the returned vector where the data begins
2140     const DataTypes::ValueType*
2141 jfenwick 2521 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2142 jfenwick 1879 {
2143 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2144 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
2145     if (m_readytype!='E' && m_op!=IDENTITY)
2146     {
2147     collapse();
2148     }
2149 jfenwick 1885 if (m_op==IDENTITY)
2150 jfenwick 1865 {
2151 jfenwick 2271 const ValueType& vec=m_id->getVectorRO();
2152 jfenwick 1889 if (m_readytype=='C')
2153     {
2154 jfenwick 1898 roffset=0;
2155 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2156 jfenwick 1898 return &(vec);
2157 jfenwick 1889 }
2158 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
2159 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2160 jfenwick 1898 return &(vec);
2161 jfenwick 1865 }
2162 jfenwick 1889 if (m_readytype!='E')
2163     {
2164     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2165     }
2166     switch (getOpgroup(m_op))
2167     {
2168 jfenwick 2147 case G_UNARY:
2169     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2170 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2171 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2172 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2173 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2174 jfenwick 2496 case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2175 jfenwick 1889 default:
2176     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2177     }
2178 jfenwick 2152
2179 jfenwick 1889 }
2180    
2181 jfenwick 2271 const DataTypes::ValueType*
2182     DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2183     {
2184     #ifdef _OPENMP
2185     int tid=omp_get_thread_num();
2186     #else
2187     int tid=0;
2188     #endif
2189 jfenwick 2521 #ifdef LAZY_NODE_STORAGE
2190     return resolveNodeSample(tid, sampleNo, roffset);
2191     #else
2192     return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2193     #endif
2194 jfenwick 2271 }
2195    
2196    
2197 jfenwick 2497 // This needs to do the work of the identity constructor
2198 jfenwick 2177 void
2199     DataLazy::resolveToIdentity()
2200     {
2201     if (m_op==IDENTITY)
2202     return;
2203 jfenwick 2500 #ifndef LAZY_NODE_STORAGE
2204 jfenwick 2497 DataReady_ptr p=resolveVectorWorker();
2205 jfenwick 2500 #else
2206     DataReady_ptr p=resolveNodeWorker();
2207     #endif
2208 jfenwick 2177 makeIdentity(p);
2209     }
2210 jfenwick 1889
2211 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
2212     {
2213     m_op=IDENTITY;
2214     m_axis_offset=0;
2215     m_transpose=0;
2216     m_SL=m_SM=m_SR=0;
2217     m_children=m_height=0;
2218     m_id=p;
2219     if(p->isConstant()) {m_readytype='C';}
2220     else if(p->isExpanded()) {m_readytype='E';}
2221     else if (p->isTagged()) {m_readytype='T';}
2222     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2223     m_buffsRequired=1;
2224     m_samplesize=p->getNumDPPSample()*p->getNoValues();
2225     m_maxsamplesize=m_samplesize;
2226     m_left.reset();
2227     m_right.reset();
2228     }
2229    
2230 jfenwick 2497
2231     DataReady_ptr
2232     DataLazy::resolve()
2233     {
2234     resolveToIdentity();
2235     return m_id;
2236     }
2237    
2238 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
2239    
2240     // This version of resolve uses storage in each node to hold results
2241     DataReady_ptr
2242     DataLazy::resolveNodeWorker()
2243     {
2244     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2245     {
2246     collapse();
2247     }
2248     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2249     {
2250     return m_id;
2251     }
2252     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2253     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
2254     ValueType& resvec=result->getVectorRW();
2255     DataReady_ptr resptr=DataReady_ptr(result);
2256    
2257     int sample;
2258     int totalsamples=getNumSamples();
2259     const ValueType* res=0; // Storage for answer
2260     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2261     #pragma omp parallel for private(sample,res) schedule(static)
2262     for (sample=0;sample<totalsamples;++sample)
2263     {
2264     size_t roffset=0;
2265     #ifdef _OPENMP
2266     res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2267     #else
2268     res=resolveNodeSample(0,sample,roffset);
2269     #endif
2270     LAZYDEBUG(cout << "Sample #" << sample << endl;)
2271     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2272     DataVector::size_type outoffset=result->getPointOffset(sample,0);
2273     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2274     }
2275     return resptr;
2276     }
2277    
2278     #endif // LAZY_NODE_STORAGE
2279    
2280 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
2281     // Each sample is evaluated independently and copied into the result DataExpanded.
2282 jfenwick 1879 DataReady_ptr
2283 jfenwick 2497 DataLazy::resolveVectorWorker()
2284 jfenwick 1879 {
2285    
2286 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2287     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2288 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2289 jfenwick 1889 {
2290     collapse();
2291     }
2292 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2293 jfenwick 1889 {
2294     return m_id;
2295     }
2296     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2297 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2298 jfenwick 1899 // storage to evaluate its expression
2299 jfenwick 1885 int numthreads=1;
2300     #ifdef _OPENMP
2301 jfenwick 2271 numthreads=omp_get_max_threads();
2302 jfenwick 1885 #endif
2303 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
2304 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2305 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
2306 jfenwick 2271 ValueType& resvec=result->getVectorRW();
2307 jfenwick 1879 DataReady_ptr resptr=DataReady_ptr(result);
2308     int sample;
2309 jfenwick 1898 size_t outoffset; // offset in the output data
2310 jfenwick 1885 int totalsamples=getNumSamples();
2311 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
2312     size_t resoffset=0; // where in the vector to find the answer
2313 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2314 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2315 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
2316 jfenwick 1879 {
2317 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
2318 jfenwick 1885 #ifdef _OPENMP
2319 jfenwick 2521 res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2320 jfenwick 1885 #else
2321 jfenwick 2521 res=resolveVectorSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
2322 jfenwick 1885 #endif
2323 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2324 jfenwick 2157 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2325 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
2326 jfenwick 2157 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2327 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
2328 jfenwick 1879 {
2329 jfenwick 2166 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2330 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
2331 jfenwick 1879 }
2332 jfenwick 2157 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2333 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
2334 jfenwick 1879 }
2335     return resptr;
2336     }
2337    
2338 jfenwick 1865 std::string
2339     DataLazy::toString() const
2340     {
2341 jfenwick 1886 ostringstream oss;
2342     oss << "Lazy Data:";
2343     intoString(oss);
2344     return oss.str();
2345 jfenwick 1865 }
2346    
2347 jfenwick 1899
2348 jfenwick 1886 void
2349     DataLazy::intoString(ostringstream& oss) const
2350     {
2351 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
2352 jfenwick 1886 switch (getOpgroup(m_op))
2353     {
2354 jfenwick 1889 case