/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2500 - (hide annotations)
Tue Jun 30 00:42:38 2009 UTC (9 years, 9 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 81775 byte(s)
Experimental per node cache for lazy evaluation is now available via the 
LAZY_NODE_STORAGE #define 
It's a bit slower and larger for small problems but a bit faster and 
smaller for large (drucker prager) problems.

1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2271 // #define SIZELIMIT
46 jfenwick 2177 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
47 jfenwick 2472 //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}
48 jfenwick 2177
49 jfenwick 2472 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
50    
51 jfenwick 1899 /*
52     How does DataLazy work?
53     ~~~~~~~~~~~~~~~~~~~~~~~
54    
55     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
56     denoted left and right.
57    
58     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
59     This means that all "internal" nodes in the structure are instances of DataLazy.
60    
61     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
62     Note that IDENITY is not considered a unary operation.
63    
64     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
65     It must however form a DAG (directed acyclic graph).
66     I will refer to individual DataLazy objects with the structure as nodes.
67    
68     Each node also stores:
69     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70     evaluated.
71     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72     evaluate the expression.
73     - m_samplesize ~ the number of doubles stored in a sample.
74    
75     When a new node is created, the above values are computed based on the values in the child nodes.
76     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
77    
78     The resolve method, which produces a DataReady from a DataLazy, does the following:
79     1) Create a DataReady to hold the new result.
80     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
81     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
82    
83     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
84    
85     resolveSample returns a Vector* and an offset within that vector where the result is stored.
86     Normally, this would be v, but for identity nodes their internal vector is returned instead.
87    
88     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
89    
90     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92 jfenwick 2037
93 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94 jfenwick 2037 1) Add to the ES_optype.
95     2) determine what opgroup your operation belongs to (X)
96     3) add a string for the op to the end of ES_opstrings
97     4) increase ES_opcount
98     5) add an entry (X) to opgroups
99     6) add an entry to the switch in collapseToReady
100     7) add an entry to resolveX
101 jfenwick 1899 */
102    
103    
104 jfenwick 1865 using namespace std;
105 jfenwick 1868 using namespace boost;
106 jfenwick 1865
107     namespace escript
108     {
109    
110     namespace
111     {
112    
113 jfenwick 1886 enum ES_opgroup
114     {
115     G_UNKNOWN,
116     G_IDENTITY,
117 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
118 jfenwick 2037 G_UNARY, // pointwise operations with one argument
119 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
120 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
121 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
122 jfenwick 2496 G_TENSORPROD, // general tensor product
123     G_NP1OUT_2P // non-pointwise op with one output requiring two params
124 jfenwick 1886 };
125    
126    
127    
128    
129 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130     "sin","cos","tan",
131 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
132     "asinh","acosh","atanh",
133 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
134 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135 jfenwick 2066 "symmetric","nonsymmetric",
136 jfenwick 2084 "prod",
137 jfenwick 2496 "transpose", "trace",
138     "swapaxes"};
139     int ES_opcount=41;
140 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
141     G_UNARY,G_UNARY,G_UNARY, //10
142     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
143     G_UNARY,G_UNARY,G_UNARY, // 20
144 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
145     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
146 jfenwick 2066 G_NP1OUT,G_NP1OUT,
147 jfenwick 2084 G_TENSORPROD,
148 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
149     G_NP1OUT_2P};
150 jfenwick 1886 inline
151     ES_opgroup
152     getOpgroup(ES_optype op)
153     {
154     return opgroups[op];
155     }
156    
157 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
158     FunctionSpace
159     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
160     {
161     // perhaps this should call interpolate and throw or something?
162     // maybe we need an interpolate node -
163     // that way, if interpolate is required in any other op we can just throw a
164     // programming error exception.
165 jfenwick 1879
166 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
167     FunctionSpace r=right->getFunctionSpace();
168     if (l!=r)
169     {
170     if (r.probeInterpolation(l))
171     {
172     return l;
173     }
174     if (l.probeInterpolation(r))
175     {
176     return r;
177     }
178     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
179     }
180     return l;
181 jfenwick 1865 }
182    
183     // return the shape of the result of "left op right"
184 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
185 jfenwick 1865 DataTypes::ShapeType
186     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 jfenwick 1879 if (left->getShape()!=right->getShape())
189     {
190 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
191 jfenwick 1908 {
192     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
193     }
194     if (left->getRank()==0) // we need to allow scalar * anything
195     {
196     return right->getShape();
197     }
198     if (right->getRank()==0)
199     {
200     return left->getShape();
201     }
202     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
203 jfenwick 1879 }
204     return left->getShape();
205 jfenwick 1865 }
206    
207 jfenwick 2084 // return the shape for "op left"
208    
209     DataTypes::ShapeType
210 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
211 jfenwick 2084 {
212     switch(op)
213     {
214     case TRANS:
215 jfenwick 2199 { // for the scoping of variables
216     const DataTypes::ShapeType& s=left->getShape();
217     DataTypes::ShapeType sh;
218     int rank=left->getRank();
219     if (axis_offset<0 || axis_offset>rank)
220     {
221     throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
222     }
223     for (int i=0; i<rank; i++)
224     {
225     int index = (axis_offset+i)%rank;
226     sh.push_back(s[index]); // Append to new shape
227     }
228     return sh;
229     }
230 jfenwick 2084 break;
231     case TRACE:
232 jfenwick 2199 {
233     int rank=left->getRank();
234     if (rank<2)
235     {
236     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
237     }
238     if ((axis_offset>rank-2) || (axis_offset<0))
239     {
240     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
241     }
242     if (rank==2)
243     {
244     return DataTypes::scalarShape;
245     }
246     else if (rank==3)
247     {
248     DataTypes::ShapeType sh;
249     if (axis_offset==0)
250     {
251     sh.push_back(left->getShape()[2]);
252     }
253     else // offset==1
254     {
255     sh.push_back(left->getShape()[0]);
256     }
257     return sh;
258     }
259     else if (rank==4)
260     {
261     DataTypes::ShapeType sh;
262     const DataTypes::ShapeType& s=left->getShape();
263     if (axis_offset==0)
264     {
265     sh.push_back(s[2]);
266     sh.push_back(s[3]);
267     }
268     else if (axis_offset==1)
269     {
270     sh.push_back(s[0]);
271     sh.push_back(s[3]);
272     }
273     else // offset==2
274     {
275     sh.push_back(s[0]);
276     sh.push_back(s[1]);
277     }
278     return sh;
279     }
280     else // unknown rank
281     {
282     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
283     }
284     }
285 jfenwick 2084 break;
286     default:
287     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
288     }
289     }
290    
291 jfenwick 2496 DataTypes::ShapeType
292     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
293     {
294     // This code taken from the Data.cpp swapaxes() method
295     // Some of the checks are probably redundant here
296     int axis0_tmp,axis1_tmp;
297     const DataTypes::ShapeType& s=left->getShape();
298     DataTypes::ShapeType out_shape;
299     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
300     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
301     int rank=left->getRank();
302     if (rank<2) {
303     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
304     }
305     if (axis0<0 || axis0>rank-1) {
306     throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
307     }
308     if (axis1<0 || axis1>rank-1) {
309     throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
310     }
311     if (axis0 == axis1) {
312     throw DataException("Error - Data::swapaxes: axis indices must be different.");
313     }
314     if (axis0 > axis1) {
315     axis0_tmp=axis1;
316     axis1_tmp=axis0;
317     } else {
318     axis0_tmp=axis0;
319     axis1_tmp=axis1;
320     }
321     for (int i=0; i<rank; i++) {
322     if (i == axis0_tmp) {
323     out_shape.push_back(s[axis1_tmp]);
324     } else if (i == axis1_tmp) {
325     out_shape.push_back(s[axis0_tmp]);
326     } else {
327     out_shape.push_back(s[i]);
328     }
329     }
330     return out_shape;
331     }
332    
333    
334 jfenwick 2066 // determine the output shape for the general tensor product operation
335     // the additional parameters return information required later for the product
336     // the majority of this code is copy pasted from C_General_Tensor_Product
337     DataTypes::ShapeType
338     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
339 jfenwick 1865 {
340 jfenwick 2066
341     // Get rank and shape of inputs
342     int rank0 = left->getRank();
343     int rank1 = right->getRank();
344     const DataTypes::ShapeType& shape0 = left->getShape();
345     const DataTypes::ShapeType& shape1 = right->getShape();
346    
347     // Prepare for the loops of the product and verify compatibility of shapes
348     int start0=0, start1=0;
349     if (transpose == 0) {}
350     else if (transpose == 1) { start0 = axis_offset; }
351     else if (transpose == 2) { start1 = rank1-axis_offset; }
352     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
353    
354 jfenwick 2085 if (rank0<axis_offset)
355     {
356     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
357     }
358 jfenwick 2066
359     // Adjust the shapes for transpose
360     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
361     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
362     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
363     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
364    
365     // Prepare for the loops of the product
366     SL=1, SM=1, SR=1;
367     for (int i=0; i<rank0-axis_offset; i++) {
368     SL *= tmpShape0[i];
369     }
370     for (int i=rank0-axis_offset; i<rank0; i++) {
371     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
372     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
373     }
374     SM *= tmpShape0[i];
375     }
376     for (int i=axis_offset; i<rank1; i++) {
377     SR *= tmpShape1[i];
378     }
379    
380     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
381     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
382     { // block to limit the scope of out_index
383     int out_index=0;
384     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
385     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
386     }
387 jfenwick 2086
388     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
389     {
390     ostringstream os;
391     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
392     throw DataException(os.str());
393     }
394    
395 jfenwick 2066 return shape2;
396 jfenwick 1865 }
397    
398 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
399 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
400 jfenwick 2066 // The same goes for G_TENSORPROD
401 jfenwick 2271 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
402 jfenwick 2157 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
403     // multiple times
404 jfenwick 1879 int
405     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
406     {
407 jfenwick 1886 switch(getOpgroup(op))
408 jfenwick 1879 {
409 jfenwick 1886 case G_IDENTITY: return 1;
410 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
411 jfenwick 2147 case G_UNARY:
412     case G_UNARY_P: return max(left->getBuffsRequired(),1);
413 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
414 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
415 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
416 jfenwick 2496 case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
417 jfenwick 1879 default:
418     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
419     }
420     }
421    
422 jfenwick 1899
423 jfenwick 1865 } // end anonymous namespace
424    
425    
426 jfenwick 1899
427     // Return a string representing the operation
428 jfenwick 1865 const std::string&
429     opToString(ES_optype op)
430     {
431     if (op<0 || op>=ES_opcount)
432     {
433     op=UNKNOWNOP;
434     }
435     return ES_opstrings[op];
436     }
437    
438 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
439     void DataLazy::LazyNodeSetup()
440     {
441     #ifdef _OPENMP
442     int numthreads=omp_get_max_threads();
443     m_samples.resize(numthreads*m_samplesize);
444     m_sampleids=new int[numthreads];
445     for (int i=0;i<numthreads;++i)
446     {
447     m_sampleids[i]=-1;
448     }
449     #else
450     m_samples.resize(m_samplesize);
451     m_sampleids=new int[1];
452     m_sampleids[0]=-1;
453     #endif // _OPENMP
454     }
455     #endif // LAZY_NODE_STORAGE
456 jfenwick 1865
457 jfenwick 2500
458     // Creates an identity node
459 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
460 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
461 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
462     ,m_sampleids(0),
463     m_samples(1)
464     #endif
465 jfenwick 1865 {
466 jfenwick 1879 if (p->isLazy())
467     {
468     // I don't want identity of Lazy.
469     // Question: Why would that be so bad?
470     // Answer: We assume that the child of ID is something we can call getVector on
471     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
472     }
473     else
474     {
475 jfenwick 2271 p->makeLazyShared();
476 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
477     makeIdentity(dr);
478 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
479 jfenwick 1879 }
480 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
481 jfenwick 1865 }
482    
483 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
484     : parent(left->getFunctionSpace(),left->getShape()),
485 jfenwick 2066 m_op(op),
486     m_axis_offset(0),
487     m_transpose(0),
488     m_SL(0), m_SM(0), m_SR(0)
489 jfenwick 1886 {
490 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
491 jfenwick 1886 {
492     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
493     }
494 jfenwick 2066
495 jfenwick 1886 DataLazy_ptr lleft;
496     if (!left->isLazy())
497     {
498     lleft=DataLazy_ptr(new DataLazy(left));
499     }
500     else
501     {
502     lleft=dynamic_pointer_cast<DataLazy>(left);
503     }
504 jfenwick 1889 m_readytype=lleft->m_readytype;
505 jfenwick 1886 m_left=lleft;
506 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
507 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
508 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
509 jfenwick 2177 m_children=m_left->m_children+1;
510     m_height=m_left->m_height+1;
511 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
512     LazyNodeSetup();
513     #endif
514 jfenwick 2177 SIZELIMIT
515 jfenwick 1886 }
516    
517    
518 jfenwick 1943 // In this constructor we need to consider interpolation
519 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
520     : parent(resultFS(left,right,op), resultShape(left,right,op)),
521 jfenwick 2066 m_op(op),
522     m_SL(0), m_SM(0), m_SR(0)
523 jfenwick 1879 {
524 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
525 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
526 jfenwick 1886 {
527 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
528 jfenwick 1886 }
529 jfenwick 1943
530     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
531     {
532     FunctionSpace fs=getFunctionSpace();
533     Data ltemp(left);
534     Data tmp(ltemp,fs);
535     left=tmp.borrowDataPtr();
536     }
537 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
538 jfenwick 1943 {
539     Data tmp(Data(right),getFunctionSpace());
540     right=tmp.borrowDataPtr();
541 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
542 jfenwick 1943 }
543     left->operandCheck(*right);
544    
545 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
546 jfenwick 1879 {
547     m_left=dynamic_pointer_cast<DataLazy>(left);
548 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
549 jfenwick 1879 }
550     else
551     {
552     m_left=DataLazy_ptr(new DataLazy(left));
553 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
554 jfenwick 1879 }
555     if (right->isLazy())
556     {
557     m_right=dynamic_pointer_cast<DataLazy>(right);
558 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
559 jfenwick 1879 }
560     else
561     {
562     m_right=DataLazy_ptr(new DataLazy(right));
563 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
564 jfenwick 1879 }
565 jfenwick 1889 char lt=m_left->m_readytype;
566     char rt=m_right->m_readytype;
567     if (lt=='E' || rt=='E')
568     {
569     m_readytype='E';
570     }
571     else if (lt=='T' || rt=='T')
572     {
573     m_readytype='T';
574     }
575     else
576     {
577     m_readytype='C';
578     }
579 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
580     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
581 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
582 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
583     m_height=max(m_left->m_height,m_right->m_height)+1;
584 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
585     LazyNodeSetup();
586     #endif
587 jfenwick 2177 SIZELIMIT
588 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589 jfenwick 1879 }
590    
591 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
592     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
593     m_op(op),
594     m_axis_offset(axis_offset),
595     m_transpose(transpose)
596     {
597     if ((getOpgroup(op)!=G_TENSORPROD))
598     {
599     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
600     }
601     if ((transpose>2) || (transpose<0))
602     {
603     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
604     }
605     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
606     {
607     FunctionSpace fs=getFunctionSpace();
608     Data ltemp(left);
609     Data tmp(ltemp,fs);
610     left=tmp.borrowDataPtr();
611     }
612     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
613     {
614     Data tmp(Data(right),getFunctionSpace());
615     right=tmp.borrowDataPtr();
616     }
617 jfenwick 2195 // left->operandCheck(*right);
618 jfenwick 1879
619 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
620     {
621     m_left=dynamic_pointer_cast<DataLazy>(left);
622     }
623     else
624     {
625     m_left=DataLazy_ptr(new DataLazy(left));
626     }
627     if (right->isLazy())
628     {
629     m_right=dynamic_pointer_cast<DataLazy>(right);
630     }
631     else
632     {
633     m_right=DataLazy_ptr(new DataLazy(right));
634     }
635     char lt=m_left->m_readytype;
636     char rt=m_right->m_readytype;
637     if (lt=='E' || rt=='E')
638     {
639     m_readytype='E';
640     }
641     else if (lt=='T' || rt=='T')
642     {
643     m_readytype='T';
644     }
645     else
646     {
647     m_readytype='C';
648     }
649     m_samplesize=getNumDPPSample()*getNoValues();
650     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
651     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
652 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
653     m_height=max(m_left->m_height,m_right->m_height)+1;
654 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
655     LazyNodeSetup();
656     #endif
657 jfenwick 2177 SIZELIMIT
658 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659 jfenwick 2066 }
660    
661    
662 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
663 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
664 jfenwick 2084 m_op(op),
665     m_axis_offset(axis_offset),
666 jfenwick 2147 m_transpose(0),
667     m_tol(0)
668 jfenwick 2084 {
669     if ((getOpgroup(op)!=G_NP1OUT_P))
670     {
671     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
672     }
673     DataLazy_ptr lleft;
674     if (!left->isLazy())
675     {
676     lleft=DataLazy_ptr(new DataLazy(left));
677     }
678     else
679     {
680     lleft=dynamic_pointer_cast<DataLazy>(left);
681     }
682     m_readytype=lleft->m_readytype;
683     m_left=lleft;
684     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
685     m_samplesize=getNumDPPSample()*getNoValues();
686     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
687 jfenwick 2177 m_children=m_left->m_children+1;
688     m_height=m_left->m_height+1;
689 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
690     LazyNodeSetup();
691     #endif
692 jfenwick 2177 SIZELIMIT
693 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
694 jfenwick 2084 }
695    
696 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
697     : parent(left->getFunctionSpace(), left->getShape()),
698     m_op(op),
699     m_axis_offset(0),
700     m_transpose(0),
701     m_tol(tol)
702     {
703     if ((getOpgroup(op)!=G_UNARY_P))
704     {
705     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
706     }
707     DataLazy_ptr lleft;
708     if (!left->isLazy())
709     {
710     lleft=DataLazy_ptr(new DataLazy(left));
711     }
712     else
713     {
714     lleft=dynamic_pointer_cast<DataLazy>(left);
715     }
716     m_readytype=lleft->m_readytype;
717     m_left=lleft;
718     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
719     m_samplesize=getNumDPPSample()*getNoValues();
720     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
721 jfenwick 2177 m_children=m_left->m_children+1;
722     m_height=m_left->m_height+1;
723 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
724     LazyNodeSetup();
725     #endif
726 jfenwick 2177 SIZELIMIT
727 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
728     }
729 jfenwick 2084
730 jfenwick 2496
731     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
732     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
733     m_op(op),
734     m_axis_offset(axis0),
735     m_transpose(axis1),
736     m_tol(0)
737     {
738     if ((getOpgroup(op)!=G_NP1OUT_2P))
739     {
740     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
741     }
742     DataLazy_ptr lleft;
743     if (!left->isLazy())
744     {
745     lleft=DataLazy_ptr(new DataLazy(left));
746     }
747     else
748     {
749     lleft=dynamic_pointer_cast<DataLazy>(left);
750     }
751     m_readytype=lleft->m_readytype;
752     m_left=lleft;
753     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
754     m_samplesize=getNumDPPSample()*getNoValues();
755     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
756     m_children=m_left->m_children+1;
757     m_height=m_left->m_height+1;
758 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
759     LazyNodeSetup();
760     #endif
761 jfenwick 2496 SIZELIMIT
762     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
763     }
764    
765 jfenwick 1865 DataLazy::~DataLazy()
766     {
767 jfenwick 2500 #ifdef LAZY_NODE_SETUP
768     delete[] m_sampleids;
769     delete[] m_samples;
770     #endif
771 jfenwick 1865 }
772    
773 jfenwick 1879
774     int
775     DataLazy::getBuffsRequired() const
776 jfenwick 1865 {
777 jfenwick 1879 return m_buffsRequired;
778     }
779    
780 jfenwick 1884
781 jfenwick 2066 size_t
782     DataLazy::getMaxSampleSize() const
783     {
784     return m_maxsamplesize;
785     }
786    
787 jfenwick 2271
788    
789     size_t
790     DataLazy::getSampleBufferSize() const
791     {
792     return m_maxsamplesize*(max(1,m_buffsRequired));
793     }
794    
795 jfenwick 1899 /*
796     \brief Evaluates the expression using methods on Data.
797     This does the work for the collapse method.
798     For reasons of efficiency do not call this method on DataExpanded nodes.
799     */
800 jfenwick 1889 DataReady_ptr
801     DataLazy::collapseToReady()
802     {
803     if (m_readytype=='E')
804     { // this is more an efficiency concern than anything else
805     throw DataException("Programmer Error - do not use collapse on Expanded data.");
806     }
807     if (m_op==IDENTITY)
808     {
809     return m_id;
810     }
811     DataReady_ptr pleft=m_left->collapseToReady();
812     Data left(pleft);
813     Data right;
814 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
815 jfenwick 1889 {
816     right=Data(m_right->collapseToReady());
817     }
818     Data result;
819     switch(m_op)
820     {
821     case ADD:
822     result=left+right;
823     break;
824     case SUB:
825     result=left-right;
826     break;
827     case MUL:
828     result=left*right;
829     break;
830     case DIV:
831     result=left/right;
832     break;
833     case SIN:
834     result=left.sin();
835     break;
836     case COS:
837     result=left.cos();
838     break;
839     case TAN:
840     result=left.tan();
841     break;
842     case ASIN:
843     result=left.asin();
844     break;
845     case ACOS:
846     result=left.acos();
847     break;
848     case ATAN:
849     result=left.atan();
850     break;
851     case SINH:
852     result=left.sinh();
853     break;
854     case COSH:
855     result=left.cosh();
856     break;
857     case TANH:
858     result=left.tanh();
859     break;
860     case ERF:
861     result=left.erf();
862     break;
863     case ASINH:
864     result=left.asinh();
865     break;
866     case ACOSH:
867     result=left.acosh();
868     break;
869     case ATANH:
870     result=left.atanh();
871     break;
872     case LOG10:
873     result=left.log10();
874     break;
875     case LOG:
876     result=left.log();
877     break;
878     case SIGN:
879     result=left.sign();
880     break;
881     case ABS:
882     result=left.abs();
883     break;
884     case NEG:
885     result=left.neg();
886     break;
887     case POS:
888     // it doesn't mean anything for delayed.
889     // it will just trigger a deep copy of the lazy object
890     throw DataException("Programmer error - POS not supported for lazy data.");
891     break;
892     case EXP:
893     result=left.exp();
894     break;
895     case SQRT:
896     result=left.sqrt();
897     break;
898     case RECIP:
899     result=left.oneOver();
900     break;
901     case GZ:
902     result=left.wherePositive();
903     break;
904     case LZ:
905     result=left.whereNegative();
906     break;
907     case GEZ:
908     result=left.whereNonNegative();
909     break;
910     case LEZ:
911     result=left.whereNonPositive();
912     break;
913 jfenwick 2147 case NEZ:
914     result=left.whereNonZero(m_tol);
915     break;
916     case EZ:
917     result=left.whereZero(m_tol);
918     break;
919 jfenwick 2037 case SYM:
920     result=left.symmetric();
921     break;
922     case NSYM:
923     result=left.nonsymmetric();
924     break;
925 jfenwick 2066 case PROD:
926     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
927     break;
928 jfenwick 2084 case TRANS:
929     result=left.transpose(m_axis_offset);
930     break;
931     case TRACE:
932     result=left.trace(m_axis_offset);
933     break;
934 jfenwick 2496 case SWAP:
935     result=left.swapaxes(m_axis_offset, m_transpose);
936     break;
937 jfenwick 1889 default:
938 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
939 jfenwick 1889 }
940     return result.borrowReadyPtr();
941     }
942    
943 jfenwick 1899 /*
944     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
945     This method uses the original methods on the Data class to evaluate the expressions.
946     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
947     the purpose of using DataLazy in the first place).
948     */
949 jfenwick 1889 void
950     DataLazy::collapse()
951     {
952     if (m_op==IDENTITY)
953     {
954     return;
955     }
956     if (m_readytype=='E')
957     { // this is more an efficiency concern than anything else
958     throw DataException("Programmer Error - do not use collapse on Expanded data.");
959     }
960     m_id=collapseToReady();
961     m_op=IDENTITY;
962     }
963    
964 jfenwick 1899 /*
965 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
966 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
967     \param v A vector to store intermediate results.
968     \param offset Index in v to begin storing results.
969     \param sampleNo Sample number to evaluate.
970     \param roffset (output parameter) the offset in the return vector where the result begins.
971    
972     The return value will be an existing vector so do not deallocate it.
973     If the result is stored in v it should be stored at the offset given.
974     Everything from offset to the end of v should be considered available for this method to use.
975     */
976 jfenwick 1898 DataTypes::ValueType*
977     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
978 jfenwick 1889 {
979     // we assume that any collapsing has been done before we get here
980     // since we only have one argument we don't need to think about only
981     // processing single points.
982     if (m_readytype!='E')
983     {
984     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
985     }
986 jfenwick 1898 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
987     const double* left=&((*vleft)[roffset]);
988 jfenwick 1889 double* result=&(v[offset]);
989 jfenwick 1898 roffset=offset;
990 jfenwick 1889 switch (m_op)
991     {
992     case SIN:
993 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
994 jfenwick 1889 break;
995     case COS:
996 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
997 jfenwick 1889 break;
998     case TAN:
999 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1000 jfenwick 1889 break;
1001     case ASIN:
1002 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1003 jfenwick 1889 break;
1004     case ACOS:
1005 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1006 jfenwick 1889 break;
1007     case ATAN:
1008 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1009 jfenwick 1889 break;
1010     case SINH:
1011 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1012 jfenwick 1889 break;
1013     case COSH:
1014 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1015 jfenwick 1889 break;
1016     case TANH:
1017 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1018 jfenwick 1889 break;
1019     case ERF:
1020 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1021 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1022     #else
1023     tensor_unary_operation(m_samplesize, left, result, ::erf);
1024     break;
1025     #endif
1026     case ASINH:
1027 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1028 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1029     #else
1030     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1031     #endif
1032     break;
1033     case ACOSH:
1034 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1035 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1036     #else
1037     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1038     #endif
1039     break;
1040     case ATANH:
1041 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1042 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1043     #else
1044     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1045     #endif
1046     break;
1047     case LOG10:
1048 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1049 jfenwick 1889 break;
1050     case LOG:
1051 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1052 jfenwick 1889 break;
1053     case SIGN:
1054     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1055     break;
1056     case ABS:
1057 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1058 jfenwick 1889 break;
1059     case NEG:
1060     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1061     break;
1062     case POS:
1063     // it doesn't mean anything for delayed.
1064     // it will just trigger a deep copy of the lazy object
1065     throw DataException("Programmer error - POS not supported for lazy data.");
1066     break;
1067     case EXP:
1068 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1069 jfenwick 1889 break;
1070     case SQRT:
1071 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1072 jfenwick 1889 break;
1073     case RECIP:
1074     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1075     break;
1076     case GZ:
1077     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1078     break;
1079     case LZ:
1080     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1081     break;
1082     case GEZ:
1083     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1084     break;
1085     case LEZ:
1086     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1087     break;
1088 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1089     case NEZ:
1090     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1091     break;
1092     case EZ:
1093     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1094     break;
1095 jfenwick 1889
1096     default:
1097     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1098     }
1099 jfenwick 1898 return &v;
1100 jfenwick 1889 }
1101    
1102    
1103 jfenwick 2147
1104    
1105    
1106    
1107 jfenwick 2037 /*
1108     \brief Compute the value of the expression (unary operation) for the given sample.
1109     \return Vector which stores the value of the subexpression for the given sample.
1110     \param v A vector to store intermediate results.
1111     \param offset Index in v to begin storing results.
1112     \param sampleNo Sample number to evaluate.
1113     \param roffset (output parameter) the offset in the return vector where the result begins.
1114 jfenwick 1898
1115 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
1116     If the result is stored in v it should be stored at the offset given.
1117     Everything from offset to the end of v should be considered available for this method to use.
1118     */
1119     DataTypes::ValueType*
1120     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1121     {
1122     // we assume that any collapsing has been done before we get here
1123     // since we only have one argument we don't need to think about only
1124     // processing single points.
1125     if (m_readytype!='E')
1126     {
1127     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1128     }
1129     // since we can't write the result over the input, we need a result offset further along
1130     size_t subroffset=roffset+m_samplesize;
1131 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1132     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1133 jfenwick 2037 roffset=offset;
1134 jfenwick 2157 size_t loop=0;
1135     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1136     size_t step=getNoValues();
1137 jfenwick 2037 switch (m_op)
1138     {
1139     case SYM:
1140 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1141     {
1142     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1143     subroffset+=step;
1144     offset+=step;
1145     }
1146 jfenwick 2037 break;
1147     case NSYM:
1148 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1149     {
1150     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1151     subroffset+=step;
1152     offset+=step;
1153     }
1154 jfenwick 2037 break;
1155     default:
1156     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1157     }
1158     return &v;
1159     }
1160 jfenwick 1898
1161 jfenwick 2084 /*
1162     \brief Compute the value of the expression (unary operation) for the given sample.
1163     \return Vector which stores the value of the subexpression for the given sample.
1164     \param v A vector to store intermediate results.
1165     \param offset Index in v to begin storing results.
1166     \param sampleNo Sample number to evaluate.
1167     \param roffset (output parameter) the offset in the return vector where the result begins.
1168 jfenwick 1899
1169 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
1170     If the result is stored in v it should be stored at the offset given.
1171     Everything from offset to the end of v should be considered available for this method to use.
1172     */
1173     DataTypes::ValueType*
1174     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1175     {
1176     // we assume that any collapsing has been done before we get here
1177     // since we only have one argument we don't need to think about only
1178     // processing single points.
1179     if (m_readytype!='E')
1180     {
1181     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1182     }
1183     // since we can't write the result over the input, we need a result offset further along
1184 jfenwick 2157 size_t subroffset;
1185     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1186 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1187     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1188 jfenwick 2084 roffset=offset;
1189 jfenwick 2157 size_t loop=0;
1190     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1191 jfenwick 2166 size_t outstep=getNoValues();
1192     size_t instep=m_left->getNoValues();
1193     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1194 jfenwick 2084 switch (m_op)
1195     {
1196     case TRACE:
1197 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1198     {
1199 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
1200     if (zz==5800)
1201     {
1202     LAZYDEBUG(cerr << "point=" << zz<< endl;)
1203     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1204     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1205     LAZYDEBUG(cerr << subroffset << endl;)
1206     LAZYDEBUG(cerr << "output=" << offset << endl;)
1207     }
1208     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1209     if (zz==5800)
1210     {
1211     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1212     }
1213     subroffset+=instep;
1214     offset+=outstep;
1215 jfenwick 2157 }
1216 jfenwick 2084 break;
1217     case TRANS:
1218 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1219     {
1220     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1221 jfenwick 2166 subroffset+=instep;
1222     offset+=outstep;
1223 jfenwick 2157 }
1224 jfenwick 2084 break;
1225     default:
1226     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1227     }
1228     return &v;
1229     }
1230 jfenwick 2037
1231    
1232 jfenwick 2496 /*
1233     \brief Compute the value of the expression (unary operation with int params) for the given sample.
1234     \return Vector which stores the value of the subexpression for the given sample.
1235     \param v A vector to store intermediate results.
1236     \param offset Index in v to begin storing results.
1237     \param sampleNo Sample number to evaluate.
1238     \param roffset (output parameter) the offset in the return vector where the result begins.
1239    
1240     The return value will be an existing vector so do not deallocate it.
1241     If the result is stored in v it should be stored at the offset given.
1242     Everything from offset to the end of v should be considered available for this method to use.
1243     */
1244     DataTypes::ValueType*
1245     DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1246     {
1247     // we assume that any collapsing has been done before we get here
1248     // since we only have one argument we don't need to think about only
1249     // processing single points.
1250     if (m_readytype!='E')
1251     {
1252     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1253     }
1254     // since we can't write the result over the input, we need a result offset further along
1255     size_t subroffset;
1256     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1257     LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1258     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1259     roffset=offset;
1260     size_t loop=0;
1261     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1262     size_t outstep=getNoValues();
1263     size_t instep=m_left->getNoValues();
1264     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1265     switch (m_op)
1266     {
1267     case SWAP:
1268     for (loop=0;loop<numsteps;++loop)
1269     {
1270     DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1271     subroffset+=instep;
1272     offset+=outstep;
1273     }
1274     break;
1275     default:
1276     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1277     }
1278     return &v;
1279     }
1280    
1281    
1282    
1283 phornby 1987 #define PROC_OP(TYPE,X) \
1284 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1285     {\
1286     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1287     { \
1288     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1289 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1290 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1291 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1292 jfenwick 2152 lroffset+=leftstep; \
1293     rroffset+=rightstep; \
1294     }\
1295     lroffset+=oleftstep;\
1296     rroffset+=orightstep;\
1297 jfenwick 1889 }
1298    
1299 jfenwick 1899 /*
1300     \brief Compute the value of the expression (binary operation) for the given sample.
1301     \return Vector which stores the value of the subexpression for the given sample.
1302     \param v A vector to store intermediate results.
1303     \param offset Index in v to begin storing results.
1304     \param sampleNo Sample number to evaluate.
1305     \param roffset (output parameter) the offset in the return vector where the result begins.
1306    
1307     The return value will be an existing vector so do not deallocate it.
1308     If the result is stored in v it should be stored at the offset given.
1309     Everything from offset to the end of v should be considered available for this method to use.
1310     */
1311     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1312     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1313     // If both children are expanded, then we can process them in a single operation (we treat
1314     // the whole sample as one big datapoint.
1315     // If one of the children is not expanded, then we need to treat each point in the sample
1316     // individually.
1317 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1318     // For example, 2+Vector.
1319     // In this case each double within the point is treated individually
1320 jfenwick 1898 DataTypes::ValueType*
1321 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1322 jfenwick 1889 {
1323 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1324 jfenwick 1889
1325 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1326     // first work out which of the children are expanded
1327 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1328     bool rightExp=(m_right->m_readytype=='E');
1329 jfenwick 2084 if (!leftExp && !rightExp)
1330     {
1331     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1332     }
1333     bool leftScalar=(m_left->getRank()==0);
1334     bool rightScalar=(m_right->getRank()==0);
1335 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1336 jfenwick 1908 {
1337 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1338     }
1339     size_t leftsize=m_left->getNoValues();
1340     size_t rightsize=m_right->getNoValues();
1341     size_t chunksize=1; // how many doubles will be processed in one go
1342     int leftstep=0; // how far should the left offset advance after each step
1343     int rightstep=0;
1344     int numsteps=0; // total number of steps for the inner loop
1345     int oleftstep=0; // the o variables refer to the outer loop
1346     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1347     int onumsteps=1;
1348    
1349     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1350     bool RES=(rightExp && rightScalar);
1351     bool LS=(!leftExp && leftScalar); // left is a single scalar
1352     bool RS=(!rightExp && rightScalar);
1353     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1354     bool RN=(!rightExp && !rightScalar);
1355     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1356     bool REN=(rightExp && !rightScalar);
1357    
1358     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1359     {
1360     chunksize=m_left->getNumDPPSample()*leftsize;
1361     leftstep=0;
1362     rightstep=0;
1363     numsteps=1;
1364     }
1365     else if (LES || RES)
1366     {
1367     chunksize=1;
1368     if (LES) // left is an expanded scalar
1369 jfenwick 2084 {
1370 jfenwick 2152 if (RS)
1371     {
1372     leftstep=1;
1373     rightstep=0;
1374     numsteps=m_left->getNumDPPSample();
1375     }
1376     else // RN or REN
1377     {
1378     leftstep=0;
1379     oleftstep=1;
1380     rightstep=1;
1381 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1382 jfenwick 2152 numsteps=rightsize;
1383     onumsteps=m_left->getNumDPPSample();
1384     }
1385 jfenwick 2084 }
1386 jfenwick 2152 else // right is an expanded scalar
1387     {
1388     if (LS)
1389     {
1390     rightstep=1;
1391     leftstep=0;
1392     numsteps=m_right->getNumDPPSample();
1393     }
1394     else
1395     {
1396     rightstep=0;
1397     orightstep=1;
1398     leftstep=1;
1399 jfenwick 2177 oleftstep=(LN ? -(int)leftsize : 0);
1400 jfenwick 2152 numsteps=leftsize;
1401     onumsteps=m_right->getNumDPPSample();
1402     }
1403     }
1404     }
1405     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1406     {
1407     if (LEN) // and Right will be a single value
1408     {
1409     chunksize=rightsize;
1410     leftstep=rightsize;
1411     rightstep=0;
1412     numsteps=m_left->getNumDPPSample();
1413     if (RS)
1414     {
1415     numsteps*=leftsize;
1416     }
1417     }
1418     else // REN
1419     {
1420     chunksize=leftsize;
1421     rightstep=leftsize;
1422     leftstep=0;
1423     numsteps=m_right->getNumDPPSample();
1424     if (LS)
1425     {
1426     numsteps*=rightsize;
1427     }
1428     }
1429     }
1430    
1431     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1432 jfenwick 1899 // Get the values of sub-expressions
1433 jfenwick 2157 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1434     // calcBufss for why we can't put offset as the 2nd param above
1435     const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1436 jfenwick 1899 // the right child starts further along.
1437 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1438     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1439     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1440     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1441     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1442     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1443     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1444 jfenwick 2195
1445 jfenwick 2199
1446 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1447 jfenwick 1889 switch(m_op)
1448     {
1449     case ADD:
1450 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1451 jfenwick 1889 break;
1452 jfenwick 1899 case SUB:
1453 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1454 jfenwick 1899 break;
1455     case MUL:
1456 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1457 jfenwick 1899 break;
1458     case DIV:
1459 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1460 jfenwick 1899 break;
1461 jfenwick 1910 case POW:
1462 phornby 1994 PROC_OP(double (double,double),::pow);
1463 jfenwick 1910 break;
1464 jfenwick 1889 default:
1465 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1466 jfenwick 1889 }
1467 jfenwick 2195 roffset=offset;
1468 jfenwick 1898 return &v;
1469 jfenwick 1889 }
1470    
1471 jfenwick 1898
1472 jfenwick 2152
1473 jfenwick 2066 /*
1474     \brief Compute the value of the expression (tensor product) for the given sample.
1475     \return Vector which stores the value of the subexpression for the given sample.
1476     \param v A vector to store intermediate results.
1477     \param offset Index in v to begin storing results.
1478     \param sampleNo Sample number to evaluate.
1479     \param roffset (output parameter) the offset in the return vector where the result begins.
1480 jfenwick 1898
1481 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1482     If the result is stored in v it should be stored at the offset given.
1483     Everything from offset to the end of v should be considered available for this method to use.
1484     */
1485     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1486     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1487     // unlike the other resolve helpers, we must treat these datapoints separately.
1488     DataTypes::ValueType*
1489     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1490     {
1491 jfenwick 2195 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1492 jfenwick 2066
1493     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1494     // first work out which of the children are expanded
1495     bool leftExp=(m_left->m_readytype=='E');
1496     bool rightExp=(m_right->m_readytype=='E');
1497     int steps=getNumDPPSample();
1498 jfenwick 2195 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1499     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1500     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1501     int rightStep=(rightExp?m_right->getNoValues() : 0);
1502    
1503     int resultStep=getNoValues();
1504 jfenwick 2066 // Get the values of sub-expressions (leave a gap of one sample for the result).
1505 jfenwick 2199 int gap=offset+m_samplesize;
1506 jfenwick 2195
1507     LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1508    
1509 jfenwick 2153 const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1510 jfenwick 2199 gap+=m_left->getMaxSampleSize();
1511 jfenwick 2195
1512    
1513     LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1514    
1515    
1516     const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1517    
1518     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1519     cout << getNoValues() << endl;)
1520     LAZYDEBUG(cerr << "Result of left=";)
1521     LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1522 jfenwick 2199
1523     for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1524 jfenwick 2195 {
1525 jfenwick 2199 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1526     if (i%4==0) cout << endl;
1527 jfenwick 2195 })
1528     LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1529 jfenwick 2199 LAZYDEBUG(
1530     for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1531 jfenwick 2195 {
1532 jfenwick 2199 cerr << "[" << setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1533     if (i%4==0) cout << endl;
1534 jfenwick 2195 }
1535     cerr << endl;
1536     )
1537     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1538 jfenwick 2153 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1539     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1540     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1541     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1542     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1543 jfenwick 2195 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1544    
1545 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1546     switch(m_op)
1547     {
1548     case PROD:
1549     for (int i=0;i<steps;++i,resultp+=resultStep)
1550     {
1551 jfenwick 2199
1552 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1553     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1554     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1555 jfenwick 2199
1556 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1557     const double *ptr_1 = &((*right)[rroffset]);
1558 jfenwick 2199
1559 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1560     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1561 jfenwick 2199
1562 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1563 jfenwick 2199
1564 jfenwick 2195 LAZYDEBUG(cout << "Results--\n";
1565 jfenwick 2199 {
1566     DataVector dv(getNoValues());
1567 jfenwick 2195 for (int z=0;z<getNoValues();++z)
1568     {
1569 jfenwick 2199 cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1570     if (z%4==0) cout << endl;
1571     dv[z]=resultp[z];
1572 jfenwick 2195 }
1573 jfenwick 2199 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1574 jfenwick 2195 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1575 jfenwick 2199 }
1576 jfenwick 2195 )
1577 jfenwick 2066 lroffset+=leftStep;
1578     rroffset+=rightStep;
1579     }
1580     break;
1581     default:
1582     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1583     }
1584     roffset=offset;
1585     return &v;
1586     }
1587    
1588    
1589 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
1590 jfenwick 2066
1591 jfenwick 2500 // The result will be stored in m_samples
1592     // The return value is a pointer to the DataVector, offset is the offset within the return value
1593     const DataTypes::ValueType*
1594     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1595     {
1596     ENABLEDEBUG
1597     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1598     // collapse so we have a 'E' node or an IDENTITY for some other type
1599     if (m_readytype!='E' && m_op!=IDENTITY)
1600     {
1601     collapse();
1602     }
1603     if (m_op==IDENTITY)
1604     {
1605     const ValueType& vec=m_id->getVectorRO();
1606     // if (m_readytype=='C')
1607     // {
1608     // roffset=0; // all samples read from the same position
1609     // return &(m_samples);
1610     // }
1611     roffset=m_id->getPointOffset(sampleNo, 0);
1612     return &(vec);
1613     }
1614     if (m_readytype!='E')
1615     {
1616     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1617     }
1618     switch (getOpgroup(m_op))
1619     {
1620     case G_UNARY:
1621     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1622     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1623     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1624     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1625     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1626     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1627     default:
1628     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1629     }
1630     }
1631    
1632     const DataTypes::ValueType*
1633     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1634     {
1635     // we assume that any collapsing has been done before we get here
1636     // since we only have one argument we don't need to think about only
1637     // processing single points.
1638     // we will also know we won't get identity nodes
1639     if (m_readytype!='E')
1640     {
1641     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1642     }
1643     if (m_op==IDENTITY)
1644     {
1645     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1646     }
1647     if (m_sampleids[tid]==sampleNo)
1648     {
1649     roffset=tid*m_samplesize;
1650     return &(m_samples); // sample is already resolved
1651     }
1652     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1653     const double* left=&((*leftres)[roffset]);
1654     roffset=m_samplesize*tid;
1655     double* result=&(m_samples[roffset]);
1656     switch (m_op)
1657     {
1658     case SIN:
1659     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1660     break;
1661     case COS:
1662     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1663     break;
1664     case TAN:
1665     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1666     break;
1667     case ASIN:
1668     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1669     break;
1670     case ACOS:
1671     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1672     break;
1673     case ATAN:
1674     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1675     break;
1676     case SINH:
1677     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1678     break;
1679     case COSH:
1680     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1681     break;
1682     case TANH:
1683     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1684     break;
1685     case ERF:
1686     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1687     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1688     #else
1689     tensor_unary_operation(m_samplesize, left, result, ::erf);
1690     break;
1691     #endif
1692     case ASINH:
1693     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1694     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1695     #else
1696     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1697     #endif
1698     break;
1699     case ACOSH:
1700     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1701     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1702     #else
1703     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1704     #endif
1705     break;
1706     case ATANH:
1707     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1708     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1709     #else
1710     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1711     #endif
1712     break;
1713     case LOG10:
1714     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1715     break;
1716     case LOG:
1717     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1718     break;
1719     case SIGN:
1720     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1721     break;
1722     case ABS:
1723     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1724     break;
1725     case NEG:
1726     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1727     break;
1728     case POS:
1729     // it doesn't mean anything for delayed.
1730     // it will just trigger a deep copy of the lazy object
1731     throw DataException("Programmer error - POS not supported for lazy data.");
1732     break;
1733     case EXP:
1734     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1735     break;
1736     case SQRT:
1737     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1738     break;
1739     case RECIP:
1740     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1741     break;
1742     case GZ:
1743     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1744     break;
1745     case LZ:
1746     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1747     break;
1748     case GEZ:
1749     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1750     break;
1751     case LEZ:
1752     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1753     break;
1754     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1755     case NEZ:
1756     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1757     break;
1758     case EZ:
1759     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1760     break;
1761    
1762     default:
1763     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1764     }
1765     return &(m_samples);
1766     }
1767    
1768    
1769     const DataTypes::ValueType*
1770     DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1771     {
1772     // we assume that any collapsing has been done before we get here
1773     // since we only have one argument we don't need to think about only
1774     // processing single points.
1775     if (m_readytype!='E')
1776     {
1777     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1778     }
1779     if (m_op==IDENTITY)
1780     {
1781     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1782     }
1783     size_t subroffset;
1784     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1785     roffset=m_samplesize*tid;
1786     size_t loop=0;
1787     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1788     size_t step=getNoValues();
1789     size_t offset=roffset;
1790     switch (m_op)
1791     {
1792     case SYM:
1793     for (loop=0;loop<numsteps;++loop)
1794     {
1795     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1796     subroffset+=step;
1797     offset+=step;
1798     }
1799     break;
1800     case NSYM:
1801     for (loop=0;loop<numsteps;++loop)
1802     {
1803     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1804     subroffset+=step;
1805     offset+=step;
1806     }
1807     break;
1808     default:
1809     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1810     }
1811     return &m_samples;
1812     }
1813    
1814     const DataTypes::ValueType*
1815     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1816     {
1817     // we assume that any collapsing has been done before we get here
1818     // since we only have one argument we don't need to think about only
1819     // processing single points.
1820     if (m_readytype!='E')
1821     {
1822     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1823     }
1824     if (m_op==IDENTITY)
1825     {
1826     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1827     }
1828     size_t subroffset;
1829     size_t offset;
1830     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1831     roffset=m_samplesize*tid;
1832     offset=roffset;
1833     size_t loop=0;
1834     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1835     size_t outstep=getNoValues();
1836     size_t instep=m_left->getNoValues();
1837     switch (m_op)
1838     {
1839     case TRACE:
1840     for (loop=0;loop<numsteps;++loop)
1841     {
1842     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1843     subroffset+=instep;
1844     offset+=outstep;
1845     }
1846     break;
1847     case TRANS:
1848     for (loop=0;loop<numsteps;++loop)
1849     {
1850     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1851     subroffset+=instep;
1852     offset+=outstep;
1853     }
1854     break;
1855     default:
1856     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1857     }
1858     return &m_samples;
1859     }
1860    
1861    
1862     const DataTypes::ValueType*
1863     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1864     {
1865     if (m_readytype!='E')
1866     {
1867     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1868     }
1869     if (m_op==IDENTITY)
1870     {
1871     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1872     }
1873     size_t subroffset;
1874     size_t offset;
1875     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1876     roffset=m_samplesize*tid;
1877     offset=roffset;
1878     size_t loop=0;
1879     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1880     size_t outstep=getNoValues();
1881     size_t instep=m_left->getNoValues();
1882     switch (m_op)
1883     {
1884     case SWAP:
1885     for (loop=0;loop<numsteps;++loop)
1886     {
1887     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1888     subroffset+=instep;
1889     offset+=outstep;
1890     }
1891     break;
1892     default:
1893     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1894     }
1895     return &m_samples;
1896     }
1897    
1898    
1899    
1900     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1901     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1902     // If both children are expanded, then we can process them in a single operation (we treat
1903     // the whole sample as one big datapoint.
1904     // If one of the children is not expanded, then we need to treat each point in the sample
1905     // individually.
1906     // There is an additional complication when scalar operations are considered.
1907     // For example, 2+Vector.
1908     // In this case each double within the point is treated individually
1909     const DataTypes::ValueType*
1910     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1911     {
1912     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1913    
1914     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1915     // first work out which of the children are expanded
1916     bool leftExp=(m_left->m_readytype=='E');
1917     bool rightExp=(m_right->m_readytype=='E');
1918     if (!leftExp && !rightExp)
1919     {
1920     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1921     }
1922     bool leftScalar=(m_left->getRank()==0);
1923     bool rightScalar=(m_right->getRank()==0);
1924     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1925     {
1926     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1927     }
1928     size_t leftsize=m_left->getNoValues();
1929     size_t rightsize=m_right->getNoValues();
1930     size_t chunksize=1; // how many doubles will be processed in one go
1931     int leftstep=0; // how far should the left offset advance after each step
1932     int rightstep=0;
1933     int numsteps=0; // total number of steps for the inner loop
1934     int oleftstep=0; // the o variables refer to the outer loop
1935     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1936     int onumsteps=1;
1937    
1938     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1939     bool RES=(rightExp && rightScalar);
1940     bool LS=(!leftExp && leftScalar); // left is a single scalar
1941     bool RS=(!rightExp && rightScalar);
1942     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1943     bool RN=(!rightExp && !rightScalar);
1944     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1945     bool REN=(rightExp && !rightScalar);
1946    
1947     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1948     {
1949     chunksize=m_left->getNumDPPSample()*leftsize;
1950     leftstep=0;
1951     rightstep=0;
1952     numsteps=1;
1953     }
1954     else if (LES || RES)
1955     {
1956     chunksize=1;
1957     if (LES) // left is an expanded scalar
1958     {
1959     if (RS)
1960     {
1961     leftstep=1;
1962     rightstep=0;
1963     numsteps=m_left->getNumDPPSample();
1964     }
1965     else // RN or REN
1966     {
1967     leftstep=0;
1968     oleftstep=1;
1969     rightstep=1;
1970     orightstep=(RN ? -(int)rightsize : 0);
1971     numsteps=rightsize;
1972     onumsteps=m_left->getNumDPPSample();
1973     }
1974     }
1975     else // right is an expanded scalar
1976     {
1977     if (LS)
1978     {
1979     rightstep=1;
1980     leftstep=0;
1981     numsteps=m_right->getNumDPPSample();
1982     }
1983     else
1984     {
1985     rightstep=0;
1986     orightstep=1;
1987     leftstep=1;
1988     oleftstep=(LN ? -(int)leftsize : 0);
1989     numsteps=leftsize;
1990     onumsteps=m_right->getNumDPPSample();
1991     }
1992     }
1993     }
1994     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1995     {
1996     if (LEN) // and Right will be a single value
1997     {
1998     chunksize=rightsize;
1999     leftstep=rightsize;
2000     rightstep=0;
2001     numsteps=m_left->getNumDPPSample();
2002     if (RS)
2003     {
2004     numsteps*=leftsize;
2005     }
2006     }
2007     else // REN
2008     {
2009     chunksize=leftsize;
2010     rightstep=leftsize;
2011     leftstep=0;
2012     numsteps=m_right->getNumDPPSample();
2013     if (LS)
2014     {
2015     numsteps*=rightsize;
2016     }
2017     }
2018     }
2019    
2020     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
2021     // Get the values of sub-expressions
2022     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
2023     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2024     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2025     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2026     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2027     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2028     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2029     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2030     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
2031    
2032     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2033     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2034    
2035    
2036     roffset=m_samplesize*tid;
2037     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
2038     switch(m_op)
2039     {
2040     case ADD:
2041     PROC_OP(NO_ARG,plus<double>());
2042     break;
2043     case SUB:
2044     PROC_OP(NO_ARG,minus<double>());
2045     break;
2046     case MUL:
2047     PROC_OP(NO_ARG,multiplies<double>());
2048     break;
2049     case DIV:
2050     PROC_OP(NO_ARG,divides<double>());
2051     break;
2052     case POW:
2053     PROC_OP(double (double,double),::pow);
2054     break;
2055     default:
2056     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2057     }
2058     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2059     return &m_samples;
2060     }
2061    
2062    
2063     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2064     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2065     // unlike the other resolve helpers, we must treat these datapoints separately.
2066     const DataTypes::ValueType*
2067     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2068     {
2069     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2070    
2071     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2072     // first work out which of the children are expanded
2073     bool leftExp=(m_left->m_readytype=='E');
2074     bool rightExp=(m_right->m_readytype=='E');
2075     int steps=getNumDPPSample();
2076     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2077     int rightStep=(rightExp?m_right->getNoValues() : 0);
2078    
2079     int resultStep=getNoValues();
2080     roffset=m_samplesize*tid;
2081     size_t offset=roffset;
2082    
2083     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2084    
2085     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2086    
2087     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2088     cout << getNoValues() << endl;)
2089    
2090    
2091     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2092     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2093     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2094     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2095     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2096     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2097     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2098    
2099     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
2100     switch(m_op)
2101     {
2102     case PROD:
2103     for (int i=0;i<steps;++i,resultp+=resultStep)
2104     {
2105     const double *ptr_0 = &((*left)[lroffset]);
2106     const double *ptr_1 = &((*right)[rroffset]);
2107    
2108     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2109     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2110    
2111     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2112    
2113     lroffset+=leftStep;
2114     rroffset+=rightStep;
2115     }
2116     break;
2117     default:
2118     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2119     }
2120     roffset=offset;
2121     return &m_samples;
2122     }
2123     #endif //LAZY_NODE_STORAGE
2124    
2125 jfenwick 1899 /*
2126     \brief Compute the value of the expression for the given sample.
2127     \return Vector which stores the value of the subexpression for the given sample.
2128     \param v A vector to store intermediate results.
2129     \param offset Index in v to begin storing results.
2130     \param sampleNo Sample number to evaluate.
2131     \param roffset (output parameter) the offset in the return vector where the result begins.
2132 jfenwick 1898
2133 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
2134     */
2135 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
2136     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2137 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
2138     // Regardless, the storage should be assumed to be used, even if it isn't.
2139 jfenwick 1898
2140     // the roffset is the offset within the returned vector where the data begins
2141     const DataTypes::ValueType*
2142     DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2143 jfenwick 1879 {
2144 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2145 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
2146     if (m_readytype!='E' && m_op!=IDENTITY)
2147     {
2148     collapse();
2149     }
2150 jfenwick 1885 if (m_op==IDENTITY)
2151 jfenwick 1865 {
2152 jfenwick 2271 const ValueType& vec=m_id->getVectorRO();
2153 jfenwick 1889 if (m_readytype=='C')
2154     {
2155 jfenwick 1898 roffset=0;
2156 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2157 jfenwick 1898 return &(vec);
2158 jfenwick 1889 }
2159 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
2160 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2161 jfenwick 1898 return &(vec);
2162 jfenwick 1865 }
2163 jfenwick 1889 if (m_readytype!='E')
2164     {
2165     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2166     }
2167     switch (getOpgroup(m_op))
2168     {
2169 jfenwick 2147 case G_UNARY:
2170     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2171 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2172 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2173 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2174 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2175 jfenwick 2496 case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2176 jfenwick 1889 default:
2177     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2178     }
2179 jfenwick 2152
2180 jfenwick 1889 }
2181    
2182 jfenwick 2271 const DataTypes::ValueType*
2183     DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2184     {
2185     #ifdef _OPENMP
2186     int tid=omp_get_thread_num();
2187     #else
2188     int tid=0;
2189     #endif
2190     return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2191     }
2192    
2193    
2194 jfenwick 2497 // This needs to do the work of the identity constructor
2195 jfenwick 2177 void
2196     DataLazy::resolveToIdentity()
2197     {
2198     if (m_op==IDENTITY)
2199     return;
2200 jfenwick 2500 #ifndef LAZY_NODE_STORAGE
2201 jfenwick 2497 DataReady_ptr p=resolveVectorWorker();
2202 jfenwick 2500 #else
2203     DataReady_ptr p=resolveNodeWorker();
2204     #endif
2205 jfenwick 2177 makeIdentity(p);
2206     }
2207 jfenwick 1889
2208 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
2209     {
2210     m_op=IDENTITY;
2211     m_axis_offset=0;
2212     m_transpose=0;
2213     m_SL=m_SM=m_SR=0;
2214     m_children=m_height=0;
2215     m_id=p;
2216     if(p->isConstant()) {m_readytype='C';}
2217     else if(p->isExpanded()) {m_readytype='E';}
2218     else if (p->isTagged()) {m_readytype='T';}
2219     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2220     m_buffsRequired=1;
2221     m_samplesize=p->getNumDPPSample()*p->getNoValues();
2222     m_maxsamplesize=m_samplesize;
2223     m_left.reset();
2224     m_right.reset();
2225     }
2226    
2227 jfenwick 2497
2228     DataReady_ptr
2229     DataLazy::resolve()
2230     {
2231     resolveToIdentity();
2232     return m_id;
2233     }
2234    
2235 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
2236    
2237     // This version of resolve uses storage in each node to hold results
2238     DataReady_ptr
2239     DataLazy::resolveNodeWorker()
2240     {
2241     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2242     {
2243     collapse();
2244     }
2245     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2246     {
2247     return m_id;
2248     }
2249     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2250     DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
2251     ValueType& resvec=result->getVectorRW();
2252     DataReady_ptr resptr=DataReady_ptr(result);
2253    
2254     int sample;
2255     int totalsamples=getNumSamples();
2256     const ValueType* res=0; // Storage for answer
2257     LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2258     #pragma omp parallel for private(sample,res) schedule(static)
2259     for (sample=0;sample<totalsamples;++sample)
2260     {
2261     size_t roffset=0;
2262     #ifdef _OPENMP
2263     res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2264     #else
2265     res=resolveNodeSample(0,sample,roffset);
2266     #endif
2267     LAZYDEBUG(cout << "Sample #" << sample << endl;)
2268     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2269     DataVector::size_type outoffset=result->getPointOffset(sample,0);
2270     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2271     }
2272     return resptr;
2273     }
2274    
2275     #endif // LAZY_NODE_STORAGE
2276    
2277 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
2278     // Each sample is evaluated independently and copied into the result DataExpanded.
2279 jfenwick 1879 DataReady_ptr
2280 jfenwick 2497 DataLazy::resolveVectorWorker()
2281 jfenwick 1879 {
2282    
2283 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2284     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2285 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2286 jfenwick 1889 {
2287     collapse();
2288     }
2289 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2290 jfenwick 1889 {
2291     return m_id;
2292     }
2293     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2294 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2295 jfenwick 1899 // storage to evaluate its expression
2296 jfenwick 1885 int numthreads=1;
2297     #ifdef _OPENMP
2298 jfenwick 2271 numthreads=omp_get_max_threads();
2299 jfenwick 1885 #endif
2300 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
2301 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2302 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
2303 jfenwick 2271 ValueType& resvec=result->getVectorRW();
2304 jfenwick 1879 DataReady_ptr resptr=DataReady_ptr(result);
2305     int sample;
2306 jfenwick 1898 size_t outoffset; // offset in the output data
2307 jfenwick 1885 int totalsamples=getNumSamples();
2308 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
2309     size_t resoffset=0; // where in the vector to find the answer
2310 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2311 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2312 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
2313 jfenwick 1879 {
2314 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
2315 jfenwick 1885 #ifdef _OPENMP
2316 jfenwick 1898 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2317 jfenwick 1885 #else
2318 jfenwick 1899 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
2319 jfenwick 1885 #endif
2320 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2321 jfenwick 2157 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2322 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
2323 jfenwick 2157 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2324 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
2325 jfenwick 1879 {
2326 jfenwick 2166 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2327 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
2328 jfenwick 1879 }
2329 jfenwick 2157 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2330 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
2331 jfenwick 1879 }
2332     return resptr;
2333     }
2334    
2335 jfenwick 1865 std::string
2336     DataLazy::toString() const
2337     {
2338 jfenwick 1886 ostringstream oss;
2339     oss << "Lazy Data:";
2340     intoString(oss);
2341     return oss.str();
2342 jfenwick 1865 }
2343    
2344 jfenwick 1899
2345 jfenwick 1886 void
2346     DataLazy::intoString(ostringstream& oss) const
2347     {
2348 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
2349 jfenwick 1886 switch (getOpgroup(m_op))
2350     {
2351 jfenwick 1889 case G_IDENTITY:
2352     if (m_id->isExpanded())