/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2734 - (hide annotations)
Fri Oct 30 05:39:48 2009 UTC (9 years, 4 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 86177 byte(s)
Fixed bug in lazy maxval and minval

1 jfenwick 1865
2     /*******************************************************
3     *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 jfenwick 1865 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2472 #include "EscriptParams.h"
32    
33 jfenwick 2199 #include <iomanip> // for some fancy formatting in debug
34    
35 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
36 jfenwick 2092 #define LAZYDEBUG(X)
37 jfenwick 2157 namespace
38     {
39     bool privdebug=false;
40 jfenwick 2092
41 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
42     #define DISABLEDEBUG privdebug=false;
43     }
44    
45 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46 jfenwick 2177
47 jfenwick 2472 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49 jfenwick 2501
50 jfenwick 1899 /*
51     How does DataLazy work?
52     ~~~~~~~~~~~~~~~~~~~~~~~
53    
54     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55     denoted left and right.
56    
57     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58     This means that all "internal" nodes in the structure are instances of DataLazy.
59    
60     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61     Note that IDENITY is not considered a unary operation.
62    
63     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64     It must however form a DAG (directed acyclic graph).
65     I will refer to individual DataLazy objects with the structure as nodes.
66    
67     Each node also stores:
68     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69     evaluated.
70     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71     evaluate the expression.
72     - m_samplesize ~ the number of doubles stored in a sample.
73    
74     When a new node is created, the above values are computed based on the values in the child nodes.
75     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76    
77     The resolve method, which produces a DataReady from a DataLazy, does the following:
78     1) Create a DataReady to hold the new result.
79     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81    
82     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83    
84     resolveSample returns a Vector* and an offset within that vector where the result is stored.
85     Normally, this would be v, but for identity nodes their internal vector is returned instead.
86    
87     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88    
89     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91 jfenwick 2037
92 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 jfenwick 2037 1) Add to the ES_optype.
94     2) determine what opgroup your operation belongs to (X)
95     3) add a string for the op to the end of ES_opstrings
96     4) increase ES_opcount
97     5) add an entry (X) to opgroups
98     6) add an entry to the switch in collapseToReady
99     7) add an entry to resolveX
100 jfenwick 1899 */
101    
102    
103 jfenwick 1865 using namespace std;
104 jfenwick 1868 using namespace boost;
105 jfenwick 1865
106     namespace escript
107     {
108    
109     namespace
110     {
111    
112 jfenwick 1886 enum ES_opgroup
113     {
114     G_UNKNOWN,
115     G_IDENTITY,
116 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
117 jfenwick 2037 G_UNARY, // pointwise operations with one argument
118 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
119 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
120 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
121 jfenwick 2496 G_TENSORPROD, // general tensor product
122 jfenwick 2721 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
123     G_REDUCTION // non-pointwise unary op with a scalar output
124 jfenwick 1886 };
125    
126    
127    
128    
129 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130     "sin","cos","tan",
131 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
132     "asinh","acosh","atanh",
133 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
134 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135 jfenwick 2066 "symmetric","nonsymmetric",
136 jfenwick 2084 "prod",
137 jfenwick 2496 "transpose", "trace",
138 jfenwick 2721 "swapaxes",
139     "minval", "maxval"};
140     int ES_opcount=43;
141 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142     G_UNARY,G_UNARY,G_UNARY, //10
143     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
144     G_UNARY,G_UNARY,G_UNARY, // 20
145 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
146     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
147 jfenwick 2066 G_NP1OUT,G_NP1OUT,
148 jfenwick 2084 G_TENSORPROD,
149 jfenwick 2496 G_NP1OUT_P, G_NP1OUT_P,
150 jfenwick 2721 G_NP1OUT_2P,
151     G_REDUCTION, G_REDUCTION};
152 jfenwick 1886 inline
153     ES_opgroup
154     getOpgroup(ES_optype op)
155     {
156     return opgroups[op];
157     }
158    
159 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
160     FunctionSpace
161     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162     {
163     // perhaps this should call interpolate and throw or something?
164     // maybe we need an interpolate node -
165     // that way, if interpolate is required in any other op we can just throw a
166     // programming error exception.
167 jfenwick 1879
168 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
169     FunctionSpace r=right->getFunctionSpace();
170     if (l!=r)
171     {
172     if (r.probeInterpolation(l))
173     {
174     return l;
175     }
176     if (l.probeInterpolation(r))
177     {
178     return r;
179     }
180     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
181     }
182     return l;
183 jfenwick 1865 }
184    
185     // return the shape of the result of "left op right"
186 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
187 jfenwick 1865 DataTypes::ShapeType
188     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189     {
190 jfenwick 1879 if (left->getShape()!=right->getShape())
191     {
192 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
193 jfenwick 1908 {
194     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195     }
196 jfenwick 2721
197 jfenwick 1908 if (left->getRank()==0) // we need to allow scalar * anything
198     {
199     return right->getShape();
200     }
201     if (right->getRank()==0)
202     {
203     return left->getShape();
204     }
205     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
206 jfenwick 1879 }
207     return left->getShape();
208 jfenwick 1865 }
209    
210 jfenwick 2084 // return the shape for "op left"
211    
212     DataTypes::ShapeType
213 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
214 jfenwick 2084 {
215     switch(op)
216     {
217     case TRANS:
218 jfenwick 2199 { // for the scoping of variables
219     const DataTypes::ShapeType& s=left->getShape();
220     DataTypes::ShapeType sh;
221     int rank=left->getRank();
222     if (axis_offset<0 || axis_offset>rank)
223     {
224     throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
225     }
226     for (int i=0; i<rank; i++)
227     {
228     int index = (axis_offset+i)%rank;
229     sh.push_back(s[index]); // Append to new shape
230     }
231     return sh;
232     }
233 jfenwick 2084 break;
234     case TRACE:
235 jfenwick 2199 {
236     int rank=left->getRank();
237     if (rank<2)
238     {
239     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240     }
241     if ((axis_offset>rank-2) || (axis_offset<0))
242     {
243     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244     }
245     if (rank==2)
246     {
247     return DataTypes::scalarShape;
248     }
249     else if (rank==3)
250     {
251     DataTypes::ShapeType sh;
252     if (axis_offset==0)
253     {
254     sh.push_back(left->getShape()[2]);
255     }
256     else // offset==1
257     {
258     sh.push_back(left->getShape()[0]);
259     }
260     return sh;
261     }
262     else if (rank==4)
263     {
264     DataTypes::ShapeType sh;
265     const DataTypes::ShapeType& s=left->getShape();
266     if (axis_offset==0)
267     {
268     sh.push_back(s[2]);
269     sh.push_back(s[3]);
270     }
271     else if (axis_offset==1)
272     {
273     sh.push_back(s[0]);
274     sh.push_back(s[3]);
275     }
276     else // offset==2
277     {
278     sh.push_back(s[0]);
279     sh.push_back(s[1]);
280     }
281     return sh;
282     }
283     else // unknown rank
284     {
285     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286     }
287     }
288 jfenwick 2084 break;
289     default:
290     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291     }
292     }
293    
294 jfenwick 2496 DataTypes::ShapeType
295     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296     {
297     // This code taken from the Data.cpp swapaxes() method
298     // Some of the checks are probably redundant here
299     int axis0_tmp,axis1_tmp;
300     const DataTypes::ShapeType& s=left->getShape();
301     DataTypes::ShapeType out_shape;
302     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304     int rank=left->getRank();
305     if (rank<2) {
306     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307     }
308     if (axis0<0 || axis0>rank-1) {
309     throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310     }
311     if (axis1<0 || axis1>rank-1) {
312     throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313     }
314     if (axis0 == axis1) {
315     throw DataException("Error - Data::swapaxes: axis indices must be different.");
316     }
317     if (axis0 > axis1) {
318     axis0_tmp=axis1;
319     axis1_tmp=axis0;
320     } else {
321     axis0_tmp=axis0;
322     axis1_tmp=axis1;
323     }
324     for (int i=0; i<rank; i++) {
325     if (i == axis0_tmp) {
326     out_shape.push_back(s[axis1_tmp]);
327     } else if (i == axis1_tmp) {
328     out_shape.push_back(s[axis0_tmp]);
329     } else {
330     out_shape.push_back(s[i]);
331     }
332     }
333     return out_shape;
334     }
335    
336    
337 jfenwick 2066 // determine the output shape for the general tensor product operation
338     // the additional parameters return information required later for the product
339     // the majority of this code is copy pasted from C_General_Tensor_Product
340     DataTypes::ShapeType
341     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
342 jfenwick 1865 {
343 jfenwick 2066
344     // Get rank and shape of inputs
345     int rank0 = left->getRank();
346     int rank1 = right->getRank();
347     const DataTypes::ShapeType& shape0 = left->getShape();
348     const DataTypes::ShapeType& shape1 = right->getShape();
349    
350     // Prepare for the loops of the product and verify compatibility of shapes
351     int start0=0, start1=0;
352     if (transpose == 0) {}
353     else if (transpose == 1) { start0 = axis_offset; }
354     else if (transpose == 2) { start1 = rank1-axis_offset; }
355     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
356    
357 jfenwick 2085 if (rank0<axis_offset)
358     {
359     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
360     }
361 jfenwick 2066
362     // Adjust the shapes for transpose
363     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
364     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
365     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
366     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
367    
368     // Prepare for the loops of the product
369     SL=1, SM=1, SR=1;
370     for (int i=0; i<rank0-axis_offset; i++) {
371     SL *= tmpShape0[i];
372     }
373     for (int i=rank0-axis_offset; i<rank0; i++) {
374     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
375     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
376     }
377     SM *= tmpShape0[i];
378     }
379     for (int i=axis_offset; i<rank1; i++) {
380     SR *= tmpShape1[i];
381     }
382    
383     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
384     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
385     { // block to limit the scope of out_index
386     int out_index=0;
387     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
388     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
389     }
390 jfenwick 2086
391     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
392     {
393     ostringstream os;
394     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
395     throw DataException(os.str());
396     }
397    
398 jfenwick 2066 return shape2;
399 jfenwick 1865 }
400    
401 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
402 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
403 jfenwick 2066 // The same goes for G_TENSORPROD
404 jfenwick 2271 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
405 jfenwick 2157 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
406     // multiple times
407 jfenwick 1879 int
408     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
409     {
410 jfenwick 1886 switch(getOpgroup(op))
411 jfenwick 1879 {
412 jfenwick 1886 case G_IDENTITY: return 1;
413 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414 jfenwick 2721 case G_REDUCTION:
415 jfenwick 2147 case G_UNARY:
416     case G_UNARY_P: return max(left->getBuffsRequired(),1);
417 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420 jfenwick 2496 case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421 jfenwick 1879 default:
422     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }
424     }
425    
426 jfenwick 1899
427 jfenwick 1865 } // end anonymous namespace
428    
429    
430 jfenwick 1899
431     // Return a string representing the operation
432 jfenwick 1865 const std::string&
433     opToString(ES_optype op)
434     {
435     if (op<0 || op>=ES_opcount)
436     {
437     op=UNKNOWNOP;
438     }
439     return ES_opstrings[op];
440     }
441    
442 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
443     void DataLazy::LazyNodeSetup()
444     {
445     #ifdef _OPENMP
446     int numthreads=omp_get_max_threads();
447     m_samples.resize(numthreads*m_samplesize);
448     m_sampleids=new int[numthreads];
449     for (int i=0;i<numthreads;++i)
450     {
451     m_sampleids[i]=-1;
452     }
453     #else
454     m_samples.resize(m_samplesize);
455     m_sampleids=new int[1];
456     m_sampleids[0]=-1;
457     #endif // _OPENMP
458     }
459     #endif // LAZY_NODE_STORAGE
460 jfenwick 1865
461 jfenwick 2500
462     // Creates an identity node
463 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
464 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
465 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
466     ,m_sampleids(0),
467     m_samples(1)
468     #endif
469 jfenwick 1865 {
470 jfenwick 1879 if (p->isLazy())
471     {
472     // I don't want identity of Lazy.
473     // Question: Why would that be so bad?
474     // Answer: We assume that the child of ID is something we can call getVector on
475     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
476     }
477     else
478     {
479 jfenwick 2271 p->makeLazyShared();
480 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481     makeIdentity(dr);
482 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483 jfenwick 1879 }
484 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
485 jfenwick 1865 }
486    
487 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488 jfenwick 2721 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489 jfenwick 2066 m_op(op),
490     m_axis_offset(0),
491     m_transpose(0),
492     m_SL(0), m_SM(0), m_SR(0)
493 jfenwick 1886 {
494 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495 jfenwick 1886 {
496     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }
498 jfenwick 2066
499 jfenwick 1886 DataLazy_ptr lleft;
500     if (!left->isLazy())
501     {
502     lleft=DataLazy_ptr(new DataLazy(left));
503     }
504     else
505     {
506     lleft=dynamic_pointer_cast<DataLazy>(left);
507     }
508 jfenwick 1889 m_readytype=lleft->m_readytype;
509 jfenwick 1886 m_left=lleft;
510 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
511 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
512 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513 jfenwick 2177 m_children=m_left->m_children+1;
514     m_height=m_left->m_height+1;
515 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
516     LazyNodeSetup();
517     #endif
518 jfenwick 2177 SIZELIMIT
519 jfenwick 1886 }
520    
521    
522 jfenwick 1943 // In this constructor we need to consider interpolation
523 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524     : parent(resultFS(left,right,op), resultShape(left,right,op)),
525 jfenwick 2066 m_op(op),
526     m_SL(0), m_SM(0), m_SR(0)
527 jfenwick 1879 {
528 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
530 jfenwick 1886 {
531 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532 jfenwick 1886 }
533 jfenwick 1943
534     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
535     {
536     FunctionSpace fs=getFunctionSpace();
537     Data ltemp(left);
538     Data tmp(ltemp,fs);
539     left=tmp.borrowDataPtr();
540     }
541 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
542 jfenwick 1943 {
543     Data tmp(Data(right),getFunctionSpace());
544     right=tmp.borrowDataPtr();
545 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546 jfenwick 1943 }
547     left->operandCheck(*right);
548    
549 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
550 jfenwick 1879 {
551     m_left=dynamic_pointer_cast<DataLazy>(left);
552 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553 jfenwick 1879 }
554     else
555     {
556     m_left=DataLazy_ptr(new DataLazy(left));
557 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558 jfenwick 1879 }
559     if (right->isLazy())
560     {
561     m_right=dynamic_pointer_cast<DataLazy>(right);
562 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563 jfenwick 1879 }
564     else
565     {
566     m_right=DataLazy_ptr(new DataLazy(right));
567 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568 jfenwick 1879 }
569 jfenwick 1889 char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;
571     if (lt=='E' || rt=='E')
572     {
573     m_readytype='E';
574     }
575     else if (lt=='T' || rt=='T')
576     {
577     m_readytype='T';
578     }
579     else
580     {
581     m_readytype='C';
582     }
583 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
584     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
585 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;
588 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
589     LazyNodeSetup();
590     #endif
591 jfenwick 2177 SIZELIMIT
592 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593 jfenwick 1879 }
594    
595 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597     m_op(op),
598     m_axis_offset(axis_offset),
599     m_transpose(transpose)
600     {
601     if ((getOpgroup(op)!=G_TENSORPROD))
602     {
603     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604     }
605     if ((transpose>2) || (transpose<0))
606     {
607     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608     }
609     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
610     {
611     FunctionSpace fs=getFunctionSpace();
612     Data ltemp(left);
613     Data tmp(ltemp,fs);
614     left=tmp.borrowDataPtr();
615     }
616     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
617     {
618     Data tmp(Data(right),getFunctionSpace());
619     right=tmp.borrowDataPtr();
620     }
621 jfenwick 2195 // left->operandCheck(*right);
622 jfenwick 1879
623 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {
625     m_left=dynamic_pointer_cast<DataLazy>(left);
626     }
627     else
628     {
629     m_left=DataLazy_ptr(new DataLazy(left));
630     }
631     if (right->isLazy())
632     {
633     m_right=dynamic_pointer_cast<DataLazy>(right);
634     }
635     else
636     {
637     m_right=DataLazy_ptr(new DataLazy(right));
638     }
639     char lt=m_left->m_readytype;
640     char rt=m_right->m_readytype;
641     if (lt=='E' || rt=='E')
642     {
643     m_readytype='E';
644     }
645     else if (lt=='T' || rt=='T')
646     {
647     m_readytype='T';
648     }
649     else
650     {
651     m_readytype='C';
652     }
653     m_samplesize=getNumDPPSample()*getNoValues();
654     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
657     m_height=max(m_left->m_height,m_right->m_height)+1;
658 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
659     LazyNodeSetup();
660     #endif
661 jfenwick 2177 SIZELIMIT
662 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663 jfenwick 2066 }
664    
665    
666 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
667 jfenwick 2199 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
668 jfenwick 2084 m_op(op),
669     m_axis_offset(axis_offset),
670 jfenwick 2147 m_transpose(0),
671     m_tol(0)
672 jfenwick 2084 {
673     if ((getOpgroup(op)!=G_NP1OUT_P))
674     {
675     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
676     }
677     DataLazy_ptr lleft;
678     if (!left->isLazy())
679     {
680     lleft=DataLazy_ptr(new DataLazy(left));
681     }
682     else
683     {
684     lleft=dynamic_pointer_cast<DataLazy>(left);
685     }
686     m_readytype=lleft->m_readytype;
687     m_left=lleft;
688     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
689     m_samplesize=getNumDPPSample()*getNoValues();
690     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691 jfenwick 2177 m_children=m_left->m_children+1;
692     m_height=m_left->m_height+1;
693 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
694     LazyNodeSetup();
695     #endif
696 jfenwick 2177 SIZELIMIT
697 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698 jfenwick 2084 }
699    
700 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
701     : parent(left->getFunctionSpace(), left->getShape()),
702     m_op(op),
703     m_axis_offset(0),
704     m_transpose(0),
705     m_tol(tol)
706     {
707     if ((getOpgroup(op)!=G_UNARY_P))
708     {
709     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
710     }
711     DataLazy_ptr lleft;
712     if (!left->isLazy())
713     {
714     lleft=DataLazy_ptr(new DataLazy(left));
715     }
716     else
717     {
718     lleft=dynamic_pointer_cast<DataLazy>(left);
719     }
720     m_readytype=lleft->m_readytype;
721     m_left=lleft;
722     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
723     m_samplesize=getNumDPPSample()*getNoValues();
724     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725 jfenwick 2177 m_children=m_left->m_children+1;
726     m_height=m_left->m_height+1;
727 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
728     LazyNodeSetup();
729     #endif
730 jfenwick 2177 SIZELIMIT
731 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732     }
733 jfenwick 2084
734 jfenwick 2496
735     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736     : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737     m_op(op),
738     m_axis_offset(axis0),
739     m_transpose(axis1),
740     m_tol(0)
741     {
742     if ((getOpgroup(op)!=G_NP1OUT_2P))
743     {
744     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745     }
746     DataLazy_ptr lleft;
747     if (!left->isLazy())
748     {
749     lleft=DataLazy_ptr(new DataLazy(left));
750     }
751     else
752     {
753     lleft=dynamic_pointer_cast<DataLazy>(left);
754     }
755     m_readytype=lleft->m_readytype;
756     m_left=lleft;
757     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758     m_samplesize=getNumDPPSample()*getNoValues();
759     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760     m_children=m_left->m_children+1;
761     m_height=m_left->m_height+1;
762 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
763     LazyNodeSetup();
764     #endif
765 jfenwick 2496 SIZELIMIT
766     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767     }
768    
769 jfenwick 1865 DataLazy::~DataLazy()
770     {
771 jfenwick 2500 #ifdef LAZY_NODE_SETUP
772     delete[] m_sampleids;
773     delete[] m_samples;
774     #endif
775 jfenwick 1865 }
776    
777 jfenwick 1879
778     int
779     DataLazy::getBuffsRequired() const
780 jfenwick 1865 {
781 jfenwick 1879 return m_buffsRequired;
782     }
783    
784 jfenwick 1884
785 jfenwick 2066 size_t
786     DataLazy::getMaxSampleSize() const
787     {
788     return m_maxsamplesize;
789     }
790    
791 jfenwick 2271
792    
793     size_t
794     DataLazy::getSampleBufferSize() const
795     {
796     return m_maxsamplesize*(max(1,m_buffsRequired));
797     }
798    
799 jfenwick 1899 /*
800     \brief Evaluates the expression using methods on Data.
801     This does the work for the collapse method.
802     For reasons of efficiency do not call this method on DataExpanded nodes.
803     */
804 jfenwick 1889 DataReady_ptr
805     DataLazy::collapseToReady()
806     {
807     if (m_readytype=='E')
808     { // this is more an efficiency concern than anything else
809     throw DataException("Programmer Error - do not use collapse on Expanded data.");
810     }
811     if (m_op==IDENTITY)
812     {
813     return m_id;
814     }
815     DataReady_ptr pleft=m_left->collapseToReady();
816     Data left(pleft);
817     Data right;
818 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
819 jfenwick 1889 {
820     right=Data(m_right->collapseToReady());
821     }
822     Data result;
823     switch(m_op)
824     {
825     case ADD:
826     result=left+right;
827     break;
828     case SUB:
829     result=left-right;
830     break;
831     case MUL:
832     result=left*right;
833     break;
834     case DIV:
835     result=left/right;
836     break;
837     case SIN:
838     result=left.sin();
839     break;
840     case COS:
841     result=left.cos();
842     break;
843     case TAN:
844     result=left.tan();
845     break;
846     case ASIN:
847     result=left.asin();
848     break;
849     case ACOS:
850     result=left.acos();
851     break;
852     case ATAN:
853     result=left.atan();
854     break;
855     case SINH:
856     result=left.sinh();
857     break;
858     case COSH:
859     result=left.cosh();
860     break;
861     case TANH:
862     result=left.tanh();
863     break;
864     case ERF:
865     result=left.erf();
866     break;
867     case ASINH:
868     result=left.asinh();
869     break;
870     case ACOSH:
871     result=left.acosh();
872     break;
873     case ATANH:
874     result=left.atanh();
875     break;
876     case LOG10:
877     result=left.log10();
878     break;
879     case LOG:
880     result=left.log();
881     break;
882     case SIGN:
883     result=left.sign();
884     break;
885     case ABS:
886     result=left.abs();
887     break;
888     case NEG:
889     result=left.neg();
890     break;
891     case POS:
892     // it doesn't mean anything for delayed.
893     // it will just trigger a deep copy of the lazy object
894     throw DataException("Programmer error - POS not supported for lazy data.");
895     break;
896     case EXP:
897     result=left.exp();
898     break;
899     case SQRT:
900     result=left.sqrt();
901     break;
902     case RECIP:
903     result=left.oneOver();
904     break;
905     case GZ:
906     result=left.wherePositive();
907     break;
908     case LZ:
909     result=left.whereNegative();
910     break;
911     case GEZ:
912     result=left.whereNonNegative();
913     break;
914     case LEZ:
915     result=left.whereNonPositive();
916     break;
917 jfenwick 2147 case NEZ:
918     result=left.whereNonZero(m_tol);
919     break;
920     case EZ:
921     result=left.whereZero(m_tol);
922     break;
923 jfenwick 2037 case SYM:
924     result=left.symmetric();
925     break;
926     case NSYM:
927     result=left.nonsymmetric();
928     break;
929 jfenwick 2066 case PROD:
930     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
931     break;
932 jfenwick 2084 case TRANS:
933     result=left.transpose(m_axis_offset);
934     break;
935     case TRACE:
936     result=left.trace(m_axis_offset);
937     break;
938 jfenwick 2496 case SWAP:
939     result=left.swapaxes(m_axis_offset, m_transpose);
940     break;
941 jfenwick 2721 case MINVAL:
942     result=left.minval();
943     break;
944     case MAXVAL:
945     result=left.minval();
946     break;
947 jfenwick 1889 default:
948 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949 jfenwick 1889 }
950     return result.borrowReadyPtr();
951     }
952    
953 jfenwick 1899 /*
954     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
955     This method uses the original methods on the Data class to evaluate the expressions.
956     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
957     the purpose of using DataLazy in the first place).
958     */
959 jfenwick 1889 void
960     DataLazy::collapse()
961     {
962     if (m_op==IDENTITY)
963     {
964     return;
965     }
966     if (m_readytype=='E')
967     { // this is more an efficiency concern than anything else
968     throw DataException("Programmer Error - do not use collapse on Expanded data.");
969     }
970     m_id=collapseToReady();
971     m_op=IDENTITY;
972     }
973    
974 jfenwick 1899 /*
975 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
976 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
977     \param v A vector to store intermediate results.
978     \param offset Index in v to begin storing results.
979     \param sampleNo Sample number to evaluate.
980     \param roffset (output parameter) the offset in the return vector where the result begins.
981    
982     The return value will be an existing vector so do not deallocate it.
983     If the result is stored in v it should be stored at the offset given.
984     Everything from offset to the end of v should be considered available for this method to use.
985     */
986 jfenwick 1898 DataTypes::ValueType*
987     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
988 jfenwick 1889 {
989     // we assume that any collapsing has been done before we get here
990     // since we only have one argument we don't need to think about only
991     // processing single points.
992     if (m_readytype!='E')
993     {
994     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995     }
996 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997 jfenwick 1898 const double* left=&((*vleft)[roffset]);
998 jfenwick 1889 double* result=&(v[offset]);
999 jfenwick 1898 roffset=offset;
1000 jfenwick 1889 switch (m_op)
1001     {
1002     case SIN:
1003 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1004 jfenwick 1889 break;
1005     case COS:
1006 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1007 jfenwick 1889 break;
1008     case TAN:
1009 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1010 jfenwick 1889 break;
1011     case ASIN:
1012 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1013 jfenwick 1889 break;
1014     case ACOS:
1015 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1016 jfenwick 1889 break;
1017     case ATAN:
1018 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1019 jfenwick 1889 break;
1020     case SINH:
1021 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1022 jfenwick 1889 break;
1023     case COSH:
1024 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1025 jfenwick 1889 break;
1026     case TANH:
1027 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1028 jfenwick 1889 break;
1029     case ERF:
1030 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1031 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1032     #else
1033     tensor_unary_operation(m_samplesize, left, result, ::erf);
1034     break;
1035     #endif
1036     case ASINH:
1037 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1038 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1039     #else
1040     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1041     #endif
1042     break;
1043     case ACOSH:
1044 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1046     #else
1047     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1048     #endif
1049     break;
1050     case ATANH:
1051 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1053     #else
1054     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1055     #endif
1056     break;
1057     case LOG10:
1058 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1059 jfenwick 1889 break;
1060     case LOG:
1061 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1062 jfenwick 1889 break;
1063     case SIGN:
1064     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1065     break;
1066     case ABS:
1067 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1068 jfenwick 1889 break;
1069     case NEG:
1070     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1071     break;
1072     case POS:
1073     // it doesn't mean anything for delayed.
1074     // it will just trigger a deep copy of the lazy object
1075     throw DataException("Programmer error - POS not supported for lazy data.");
1076     break;
1077     case EXP:
1078 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1079 jfenwick 1889 break;
1080     case SQRT:
1081 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1082 jfenwick 1889 break;
1083     case RECIP:
1084     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1085     break;
1086     case GZ:
1087     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1088     break;
1089     case LZ:
1090     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1091     break;
1092     case GEZ:
1093     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1094     break;
1095     case LEZ:
1096     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1097     break;
1098 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1099     case NEZ:
1100     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1101     break;
1102     case EZ:
1103     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1104     break;
1105 jfenwick 1889
1106     default:
1107     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1108     }
1109 jfenwick 1898 return &v;
1110 jfenwick 1889 }
1111    
1112    
1113 jfenwick 2721 /*
1114     \brief Compute the value of the expression (reduction operation) for the given sample.
1115     \return Vector which stores the value of the subexpression for the given sample.
1116     \param v A vector to store intermediate results.
1117     \param offset Index in v to begin storing results.
1118     \param sampleNo Sample number to evaluate.
1119     \param roffset (output parameter) the offset in the return vector where the result begins.
1120 jfenwick 2147
1121 jfenwick 2721 The return value will be an existing vector so do not deallocate it.
1122     If the result is stored in v it should be stored at the offset given.
1123     Everything from offset to the end of v should be considered available for this method to use.
1124     */
1125     DataTypes::ValueType*
1126     DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127     {
1128     // we assume that any collapsing has been done before we get here
1129     // since we only have one argument we don't need to think about only
1130     // processing single points.
1131     if (m_readytype!='E')
1132     {
1133     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134     }
1135     const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136     double* result=&(v[offset]);
1137     roffset=offset;
1138 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1139     unsigned int psize=DataTypes::noValues(getShape());
1140 jfenwick 2721 switch (m_op)
1141     {
1142     case MINVAL:
1143     {
1144 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1145     {
1146     FMin op;
1147     *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1148     roffset+=psize;
1149     result++;
1150     }
1151 jfenwick 2721 }
1152     break;
1153     case MAXVAL:
1154     {
1155 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1156     {
1157     FMax op;
1158     *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1159     roffset+=psize;
1160     result++;
1161     }
1162 jfenwick 2721 }
1163     break;
1164     default:
1165     throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1166     }
1167     return &v;
1168     }
1169 jfenwick 2147
1170    
1171    
1172 jfenwick 2037 /*
1173     \brief Compute the value of the expression (unary operation) for the given sample.
1174     \return Vector which stores the value of the subexpression for the given sample.
1175     \param v A vector to store intermediate results.
1176     \param offset Index in v to begin storing results.
1177     \param sampleNo Sample number to evaluate.
1178     \param roffset (output parameter) the offset in the return vector where the result begins.
1179 jfenwick 1898
1180 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
1181     If the result is stored in v it should be stored at the offset given.
1182     Everything from offset to the end of v should be considered available for this method to use.
1183     */
1184     DataTypes::ValueType*
1185     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1186     {
1187     // we assume that any collapsing has been done before we get here
1188     // since we only have one argument we don't need to think about only
1189     // processing single points.
1190     if (m_readytype!='E')
1191     {
1192     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1193     }
1194     // since we can't write the result over the input, we need a result offset further along
1195     size_t subroffset=roffset+m_samplesize;
1196 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1197 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1198 jfenwick 2037 roffset=offset;
1199 jfenwick 2157 size_t loop=0;
1200     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1201     size_t step=getNoValues();
1202 jfenwick 2037 switch (m_op)
1203     {
1204     case SYM:
1205 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1206     {
1207     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1208     subroffset+=step;
1209     offset+=step;
1210     }
1211 jfenwick 2037 break;
1212     case NSYM:
1213 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1214     {
1215     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1216     subroffset+=step;
1217     offset+=step;
1218     }
1219 jfenwick 2037 break;
1220     default:
1221     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1222     }
1223     return &v;
1224     }
1225 jfenwick 1898
1226 jfenwick 2084 /*
1227     \brief Compute the value of the expression (unary operation) for the given sample.
1228     \return Vector which stores the value of the subexpression for the given sample.
1229     \param v A vector to store intermediate results.
1230     \param offset Index in v to begin storing results.
1231     \param sampleNo Sample number to evaluate.
1232     \param roffset (output parameter) the offset in the return vector where the result begins.
1233 jfenwick 1899
1234 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
1235     If the result is stored in v it should be stored at the offset given.
1236     Everything from offset to the end of v should be considered available for this method to use.
1237     */
1238     DataTypes::ValueType*
1239     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1240     {
1241     // we assume that any collapsing has been done before we get here
1242     // since we only have one argument we don't need to think about only
1243     // processing single points.
1244     if (m_readytype!='E')
1245     {
1246     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1247     }
1248     // since we can't write the result over the input, we need a result offset further along
1249 jfenwick 2157 size_t subroffset;
1250 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1251 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1252     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1253 jfenwick 2084 roffset=offset;
1254 jfenwick 2157 size_t loop=0;
1255     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1256 jfenwick 2166 size_t outstep=getNoValues();
1257     size_t instep=m_left->getNoValues();
1258     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1259 jfenwick 2084 switch (m_op)
1260     {
1261     case TRACE:
1262 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1263     {
1264 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
1265     if (zz==5800)
1266     {
1267     LAZYDEBUG(cerr << "point=" << zz<< endl;)
1268     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1269     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1270     LAZYDEBUG(cerr << subroffset << endl;)
1271     LAZYDEBUG(cerr << "output=" << offset << endl;)
1272     }
1273     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1274     if (zz==5800)
1275     {
1276     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1277     }
1278     subroffset+=instep;
1279     offset+=outstep;
1280 jfenwick 2157 }
1281 jfenwick 2084 break;
1282     case TRANS:
1283 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1284     {
1285     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1286 jfenwick 2166 subroffset+=instep;
1287     offset+=outstep;
1288 jfenwick 2157 }
1289 jfenwick 2084 break;
1290     default:
1291     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1292     }
1293     return &v;
1294     }
1295 jfenwick 2037
1296    
1297 jfenwick 2496 /*
1298     \brief Compute the value of the expression (unary operation with int params) for the given sample.
1299     \return Vector which stores the value of the subexpression for the given sample.
1300     \param v A vector to store intermediate results.
1301     \param offset Index in v to begin storing results.
1302     \param sampleNo Sample number to evaluate.
1303     \param roffset (output parameter) the offset in the return vector where the result begins.
1304    
1305     The return value will be an existing vector so do not deallocate it.
1306     If the result is stored in v it should be stored at the offset given.
1307     Everything from offset to the end of v should be considered available for this method to use.
1308     */
1309     DataTypes::ValueType*
1310     DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1311     {
1312     // we assume that any collapsing has been done before we get here
1313     // since we only have one argument we don't need to think about only
1314     // processing single points.
1315     if (m_readytype!='E')
1316     {
1317     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1318     }
1319     // since we can't write the result over the input, we need a result offset further along
1320     size_t subroffset;
1321 jfenwick 2521 const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1322 jfenwick 2496 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1323     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1324     roffset=offset;
1325     size_t loop=0;
1326     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1327     size_t outstep=getNoValues();
1328     size_t instep=m_left->getNoValues();
1329     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1330     switch (m_op)
1331     {
1332     case SWAP:
1333     for (loop=0;loop<numsteps;++loop)
1334     {
1335     DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1336     subroffset+=instep;
1337     offset+=outstep;
1338     }
1339     break;
1340     default:
1341     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342     }
1343     return &v;
1344     }
1345    
1346    
1347    
1348 phornby 1987 #define PROC_OP(TYPE,X) \
1349 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1350     {\
1351     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1352     { \
1353     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1354 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1355 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1356 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1357 jfenwick 2152 lroffset+=leftstep; \
1358     rroffset+=rightstep; \
1359     }\
1360     lroffset+=oleftstep;\
1361     rroffset+=orightstep;\
1362 jfenwick 1889 }
1363    
1364 jfenwick 1899 /*
1365     \brief Compute the value of the expression (binary operation) for the given sample.
1366     \return Vector which stores the value of the subexpression for the given sample.
1367     \param v A vector to store intermediate results.
1368     \param offset Index in v to begin storing results.
1369     \param sampleNo Sample number to evaluate.
1370     \param roffset (output parameter) the offset in the return vector where the result begins.
1371    
1372     The return value will be an existing vector so do not deallocate it.
1373     If the result is stored in v it should be stored at the offset given.
1374     Everything from offset to the end of v should be considered available for this method to use.
1375     */
1376     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1377     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1378     // If both children are expanded, then we can process them in a single operation (we treat
1379     // the whole sample as one big datapoint.
1380     // If one of the children is not expanded, then we need to treat each point in the sample
1381     // individually.
1382 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1383     // For example, 2+Vector.
1384     // In this case each double within the point is treated individually
1385 jfenwick 1898 DataTypes::ValueType*
1386 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1387 jfenwick 1889 {
1388 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1389 jfenwick 1889
1390 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1391     // first work out which of the children are expanded
1392 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1393     bool rightExp=(m_right->m_readytype=='E');
1394 jfenwick 2084 if (!leftExp && !rightExp)
1395     {
1396     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1397     }
1398     bool leftScalar=(m_left->getRank()==0);
1399     bool rightScalar=(m_right->getRank()==0);
1400 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1401 jfenwick 1908 {
1402 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1403     }
1404     size_t leftsize=m_left->getNoValues();
1405     size_t rightsize=m_right->getNoValues();
1406     size_t chunksize=1; // how many doubles will be processed in one go
1407     int leftstep=0; // how far should the left offset advance after each step
1408     int rightstep=0;
1409     int numsteps=0; // total number of steps for the inner loop
1410     int oleftstep=0; // the o variables refer to the outer loop
1411     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1412     int onumsteps=1;
1413    
1414     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1415     bool RES=(rightExp && rightScalar);
1416     bool LS=(!leftExp && leftScalar); // left is a single scalar
1417     bool RS=(!rightExp && rightScalar);
1418     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1419     bool RN=(!rightExp && !rightScalar);
1420     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1421     bool REN=(rightExp && !rightScalar);
1422    
1423     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1424     {
1425     chunksize=m_left->getNumDPPSample()*leftsize;
1426     leftstep=0;
1427     rightstep=0;
1428     numsteps=1;
1429     }
1430     else if (LES || RES)
1431     {
1432     chunksize=1;
1433     if (LES) // left is an expanded scalar
1434 jfenwick 2084 {
1435 jfenwick 2152 if (RS)
1436     {
1437     leftstep=1;
1438     rightstep=0;
1439     numsteps=m_left->getNumDPPSample();
1440     }
1441     else // RN or REN
1442     {
1443     leftstep=0;
1444     oleftstep=1;
1445     rightstep=1;
1446 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1447 jfenwick 2152 numsteps=rightsize;
1448     onumsteps=m_left->getNumDPPSample();
1449     }
1450 jfenwick 2084 }
1451 jfenwick 2152 else // right is an expanded scalar
1452     {
1453     if (LS)
1454     {
1455     rightstep=1;
1456     leftstep=0;
1457     numsteps=m_right->getNumDPPSample();
1458     }
1459     else
1460     {
1461     rightstep=0;
1462     orightstep=1;
1463     leftstep=1;
1464 jfenwick 2177 oleftstep=(LN ? -(int)leftsize : 0);
1465 jfenwick 2152 numsteps=leftsize;
1466     onumsteps=m_right->getNumDPPSample();
1467     }
1468     }
1469     }
1470     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1471     {
1472     if (LEN) // and Right will be a single value
1473     {
1474     chunksize=rightsize;
1475     leftstep=rightsize;
1476     rightstep=0;
1477     numsteps=m_left->getNumDPPSample();
1478     if (RS)
1479     {
1480     numsteps*=leftsize;
1481     }
1482     }
1483     else // REN
1484     {
1485     chunksize=leftsize;
1486     rightstep=leftsize;
1487     leftstep=0;
1488     numsteps=m_right->getNumDPPSample();
1489     if (LS)
1490     {
1491     numsteps*=rightsize;
1492     }
1493     }
1494     }
1495    
1496     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1497 jfenwick 1899 // Get the values of sub-expressions
1498 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1499 jfenwick 2157 // calcBufss for why we can't put offset as the 2nd param above
1500 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1501 jfenwick 1899 // the right child starts further along.
1502 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1503     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1504     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1505     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1506     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1507     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1508     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1509 jfenwick 2195
1510 jfenwick 2199
1511 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1512 jfenwick 1889 switch(m_op)
1513     {
1514     case ADD:
1515 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1516 jfenwick 1889 break;
1517 jfenwick 1899 case SUB:
1518 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1519 jfenwick 1899 break;
1520     case MUL:
1521 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1522 jfenwick 1899 break;
1523     case DIV:
1524 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1525 jfenwick 1899 break;
1526 jfenwick 1910 case POW:
1527 phornby 1994 PROC_OP(double (double,double),::pow);
1528 jfenwick 1910 break;
1529 jfenwick 1889 default:
1530 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1531 jfenwick 1889 }
1532 jfenwick 2195 roffset=offset;
1533 jfenwick 1898 return &v;
1534 jfenwick 1889 }
1535    
1536 jfenwick 1898
1537 jfenwick 2152
1538 jfenwick 2066 /*
1539     \brief Compute the value of the expression (tensor product) for the given sample.
1540     \return Vector which stores the value of the subexpression for the given sample.
1541     \param v A vector to store intermediate results.
1542     \param offset Index in v to begin storing results.
1543     \param sampleNo Sample number to evaluate.
1544     \param roffset (output parameter) the offset in the return vector where the result begins.
1545 jfenwick 1898
1546 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1547     If the result is stored in v it should be stored at the offset given.
1548     Everything from offset to the end of v should be considered available for this method to use.
1549     */
1550     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1551     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1552     // unlike the other resolve helpers, we must treat these datapoints separately.
1553     DataTypes::ValueType*
1554     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1555     {
1556 jfenwick 2195 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1557 jfenwick 2066
1558     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1559     // first work out which of the children are expanded
1560     bool leftExp=(m_left->m_readytype=='E');
1561     bool rightExp=(m_right->m_readytype=='E');
1562     int steps=getNumDPPSample();
1563 jfenwick 2195 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1564     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1565     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1566     int rightStep=(rightExp?m_right->getNoValues() : 0);
1567    
1568     int resultStep=getNoValues();
1569 jfenwick 2066 // Get the values of sub-expressions (leave a gap of one sample for the result).
1570 jfenwick 2199 int gap=offset+m_samplesize;
1571 jfenwick 2195
1572     LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1573    
1574 jfenwick 2521 const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1575 jfenwick 2199 gap+=m_left->getMaxSampleSize();
1576 jfenwick 2195
1577    
1578     LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1579    
1580    
1581 jfenwick 2521 const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1582 jfenwick 2195
1583     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1584     cout << getNoValues() << endl;)
1585     LAZYDEBUG(cerr << "Result of left=";)
1586     LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1587 jfenwick 2199
1588     for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1589 jfenwick 2195 {
1590 jfenwick 2199 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1591     if (i%4==0) cout << endl;
1592 jfenwick 2195 })
1593     LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1594 jfenwick 2199 LAZYDEBUG(
1595     for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1596 jfenwick 2195 {
1597 jfenwick 2199 cerr << "[" << setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1598     if (i%4==0) cout << endl;
1599 jfenwick 2195 }
1600     cerr << endl;
1601     )
1602     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1603 jfenwick 2153 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1604     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1605     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1606     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1607     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1608 jfenwick 2195 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1609    
1610 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1611     switch(m_op)
1612     {
1613     case PROD:
1614     for (int i=0;i<steps;++i,resultp+=resultStep)
1615     {
1616 jfenwick 2199
1617 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1618     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1619     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1620 jfenwick 2199
1621 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1622     const double *ptr_1 = &((*right)[rroffset]);
1623 jfenwick 2199
1624 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626 jfenwick 2199
1627 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628 jfenwick 2199
1629 jfenwick 2195 LAZYDEBUG(cout << "Results--\n";
1630 jfenwick 2199 {
1631     DataVector dv(getNoValues());
1632 jfenwick 2195 for (int z=0;z<getNoValues();++z)
1633     {
1634 jfenwick 2199 cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1635     if (z%4==0) cout << endl;
1636     dv[z]=resultp[z];
1637 jfenwick 2195 }
1638 jfenwick 2199 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1639 jfenwick 2195 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1640 jfenwick 2199 }
1641 jfenwick 2195 )
1642 jfenwick 2066 lroffset+=leftStep;
1643     rroffset+=rightStep;
1644     }
1645     break;
1646     default:
1647     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1648     }
1649     roffset=offset;
1650     return &v;
1651     }
1652    
1653    
1654 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
1655 jfenwick 2066
1656 jfenwick 2500 // The result will be stored in m_samples
1657     // The return value is a pointer to the DataVector, offset is the offset within the return value
1658     const DataTypes::ValueType*
1659     DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1660     {
1661     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1662     // collapse so we have a 'E' node or an IDENTITY for some other type
1663     if (m_readytype!='E' && m_op!=IDENTITY)
1664     {
1665     collapse();
1666     }
1667     if (m_op==IDENTITY)
1668     {
1669     const ValueType& vec=m_id->getVectorRO();
1670     // if (m_readytype=='C')
1671     // {
1672     // roffset=0; // all samples read from the same position
1673     // return &(m_samples);
1674     // }
1675     roffset=m_id->getPointOffset(sampleNo, 0);
1676     return &(vec);
1677     }
1678     if (m_readytype!='E')
1679     {
1680     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1681     }
1682 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1683     {
1684     roffset=tid*m_samplesize;
1685     return &(m_samples); // sample is already resolved
1686     }
1687     m_sampleids[tid]=sampleNo;
1688 jfenwick 2500 switch (getOpgroup(m_op))
1689     {
1690     case G_UNARY:
1691     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1692     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1693     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1694     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1695     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1696     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1697 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1698 jfenwick 2500 default:
1699     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1700     }
1701     }
1702    
1703     const DataTypes::ValueType*
1704     DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1705     {
1706     // we assume that any collapsing has been done before we get here
1707     // since we only have one argument we don't need to think about only
1708     // processing single points.
1709     // we will also know we won't get identity nodes
1710     if (m_readytype!='E')
1711     {
1712     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1713     }
1714     if (m_op==IDENTITY)
1715     {
1716     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1717     }
1718     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1719     const double* left=&((*leftres)[roffset]);
1720     roffset=m_samplesize*tid;
1721     double* result=&(m_samples[roffset]);
1722     switch (m_op)
1723     {
1724     case SIN:
1725     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1726     break;
1727     case COS:
1728     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1729     break;
1730     case TAN:
1731     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1732     break;
1733     case ASIN:
1734     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1735     break;
1736     case ACOS:
1737     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1738     break;
1739     case ATAN:
1740     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1741     break;
1742     case SINH:
1743     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1744     break;
1745     case COSH:
1746     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1747     break;
1748     case TANH:
1749     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1750     break;
1751     case ERF:
1752     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1753     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1754     #else
1755     tensor_unary_operation(m_samplesize, left, result, ::erf);
1756     break;
1757     #endif
1758     case ASINH:
1759     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1760     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1761     #else
1762     tensor_unary_operation(m_samplesize, left, result, ::asinh);
1763     #endif
1764     break;
1765     case ACOSH:
1766     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1767     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1768     #else
1769     tensor_unary_operation(m_samplesize, left, result, ::acosh);
1770     #endif
1771     break;
1772     case ATANH:
1773     #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1774     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1775     #else
1776     tensor_unary_operation(m_samplesize, left, result, ::atanh);
1777     #endif
1778     break;
1779     case LOG10:
1780     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1781     break;
1782     case LOG:
1783     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1784     break;
1785     case SIGN:
1786     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1787     break;
1788     case ABS:
1789     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1790     break;
1791     case NEG:
1792     tensor_unary_operation(m_samplesize, left, result, negate<double>());
1793     break;
1794     case POS:
1795     // it doesn't mean anything for delayed.
1796     // it will just trigger a deep copy of the lazy object
1797     throw DataException("Programmer error - POS not supported for lazy data.");
1798     break;
1799     case EXP:
1800     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1801     break;
1802     case SQRT:
1803     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1804     break;
1805     case RECIP:
1806     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1807     break;
1808     case GZ:
1809     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1810     break;
1811     case LZ:
1812     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1813     break;
1814     case GEZ:
1815     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1816     break;
1817     case LEZ:
1818     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1819     break;
1820     // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1821     case NEZ:
1822     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1823     break;
1824     case EZ:
1825     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1826     break;
1827    
1828     default:
1829     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1830     }
1831     return &(m_samples);
1832     }
1833    
1834    
1835     const DataTypes::ValueType*
1836 jfenwick 2721 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1837     {
1838     // we assume that any collapsing has been done before we get here
1839     // since we only have one argument we don't need to think about only
1840     // processing single points.
1841     // we will also know we won't get identity nodes
1842     if (m_readytype!='E')
1843     {
1844     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1845     }
1846     if (m_op==IDENTITY)
1847     {
1848     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1849     }
1850     size_t loffset=0;
1851     const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1852    
1853     roffset=m_samplesize*tid;
1854 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1855     unsigned int psize=DataTypes::noValues(getShape());
1856 jfenwick 2721 double* result=&(m_samples[roffset]);
1857     switch (m_op)
1858     {
1859     case MINVAL:
1860     {
1861 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1862     {
1863     FMin op;
1864     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1865     loffset+=psize;
1866     result++;
1867     }
1868 jfenwick 2721 }
1869     break;
1870     case MAXVAL:
1871     {
1872 jfenwick 2734 for (unsigned int z=0;z<ndpps;++z)
1873     {
1874 jfenwick 2721 FMax op;
1875     *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1876 jfenwick 2734 loffset+=psize;
1877     result++;
1878     }
1879 jfenwick 2721 }
1880     break;
1881     default:
1882     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1883     }
1884     return &(m_samples);
1885     }
1886    
1887     const DataTypes::ValueType*
1888 jfenwick 2500 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1889     {
1890     // we assume that any collapsing has been done before we get here
1891     // since we only have one argument we don't need to think about only
1892     // processing single points.
1893     if (m_readytype!='E')
1894     {
1895     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1896     }
1897     if (m_op==IDENTITY)
1898     {
1899     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1900     }
1901     size_t subroffset;
1902     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1903     roffset=m_samplesize*tid;
1904     size_t loop=0;
1905     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1906     size_t step=getNoValues();
1907     size_t offset=roffset;
1908     switch (m_op)
1909     {
1910     case SYM:
1911     for (loop=0;loop<numsteps;++loop)
1912     {
1913     DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1914     subroffset+=step;
1915     offset+=step;
1916     }
1917     break;
1918     case NSYM:
1919     for (loop=0;loop<numsteps;++loop)
1920     {
1921     DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1922     subroffset+=step;
1923     offset+=step;
1924     }
1925     break;
1926     default:
1927     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1928     }
1929     return &m_samples;
1930     }
1931    
1932     const DataTypes::ValueType*
1933     DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1934     {
1935     // we assume that any collapsing has been done before we get here
1936     // since we only have one argument we don't need to think about only
1937     // processing single points.
1938     if (m_readytype!='E')
1939     {
1940     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1941     }
1942     if (m_op==IDENTITY)
1943     {
1944     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1945     }
1946     size_t subroffset;
1947     size_t offset;
1948     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1949     roffset=m_samplesize*tid;
1950     offset=roffset;
1951     size_t loop=0;
1952     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1953     size_t outstep=getNoValues();
1954     size_t instep=m_left->getNoValues();
1955     switch (m_op)
1956     {
1957     case TRACE:
1958     for (loop=0;loop<numsteps;++loop)
1959     {
1960     DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1961     subroffset+=instep;
1962     offset+=outstep;
1963     }
1964     break;
1965     case TRANS:
1966     for (loop=0;loop<numsteps;++loop)
1967     {
1968     DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1969     subroffset+=instep;
1970     offset+=outstep;
1971     }
1972     break;
1973     default:
1974     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1975     }
1976     return &m_samples;
1977     }
1978    
1979    
1980     const DataTypes::ValueType*
1981     DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1982     {
1983     if (m_readytype!='E')
1984     {
1985     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1986     }
1987     if (m_op==IDENTITY)
1988     {
1989     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1990     }
1991     size_t subroffset;
1992     size_t offset;
1993     const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1994     roffset=m_samplesize*tid;
1995     offset=roffset;
1996     size_t loop=0;
1997     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1998     size_t outstep=getNoValues();
1999     size_t instep=m_left->getNoValues();
2000     switch (m_op)
2001     {
2002     case SWAP:
2003     for (loop=0;loop<numsteps;++loop)
2004     {
2005     DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
2006     subroffset+=instep;
2007     offset+=outstep;
2008     }
2009     break;
2010     default:
2011     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
2012     }
2013     return &m_samples;
2014     }
2015    
2016    
2017    
2018     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2019     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2020     // If both children are expanded, then we can process them in a single operation (we treat
2021     // the whole sample as one big datapoint.
2022     // If one of the children is not expanded, then we need to treat each point in the sample
2023     // individually.
2024     // There is an additional complication when scalar operations are considered.
2025     // For example, 2+Vector.
2026     // In this case each double within the point is treated individually
2027     const DataTypes::ValueType*
2028     DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2029     {
2030     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2031    
2032     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2033     // first work out which of the children are expanded
2034     bool leftExp=(m_left->m_readytype=='E');
2035     bool rightExp=(m_right->m_readytype=='E');
2036     if (!leftExp && !rightExp)
2037     {
2038     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2039     }
2040     bool leftScalar=(m_left->getRank()==0);
2041     bool rightScalar=(m_right->getRank()==0);
2042     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2043     {
2044     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2045     }
2046     size_t leftsize=m_left->getNoValues();
2047     size_t rightsize=m_right->getNoValues();
2048     size_t chunksize=1; // how many doubles will be processed in one go
2049     int leftstep=0; // how far should the left offset advance after each step
2050     int rightstep=0;
2051     int numsteps=0; // total number of steps for the inner loop
2052     int oleftstep=0; // the o variables refer to the outer loop
2053     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2054     int onumsteps=1;
2055    
2056     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2057     bool RES=(rightExp && rightScalar);
2058     bool LS=(!leftExp && leftScalar); // left is a single scalar
2059     bool RS=(!rightExp && rightScalar);
2060     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
2061     bool RN=(!rightExp && !rightScalar);
2062     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
2063     bool REN=(rightExp && !rightScalar);
2064    
2065     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2066     {
2067     chunksize=m_left->getNumDPPSample()*leftsize;
2068     leftstep=0;
2069     rightstep=0;
2070     numsteps=1;
2071     }
2072     else if (LES || RES)
2073     {
2074     chunksize=1;
2075     if (LES) // left is an expanded scalar
2076     {
2077     if (RS)
2078     {
2079     leftstep=1;
2080     rightstep=0;
2081     numsteps=m_left->getNumDPPSample();
2082     }
2083     else // RN or REN
2084     {
2085     leftstep=0;
2086     oleftstep=1;
2087     rightstep=1;
2088     orightstep=(RN ? -(int)rightsize : 0);
2089     numsteps=rightsize;
2090     onumsteps=m_left->getNumDPPSample();
2091     }
2092     }
2093     else // right is an expanded scalar
2094     {
2095     if (LS)
2096     {
2097     rightstep=1;
2098     leftstep=0;
2099     numsteps=m_right->getNumDPPSample();
2100     }
2101     else
2102     {
2103     rightstep=0;
2104     orightstep=1;
2105     leftstep=1;
2106     oleftstep=(LN ? -(int)leftsize : 0);
2107     numsteps=leftsize;
2108     onumsteps=m_right->getNumDPPSample();
2109     }
2110     }
2111     }
2112     else // this leaves (LEN, RS), (LEN, RN) and their transposes
2113     {
2114     if (LEN) // and Right will be a single value
2115     {
2116     chunksize=rightsize;
2117     leftstep=rightsize;
2118     rightstep=0;
2119     numsteps=m_left->getNumDPPSample();
2120     if (RS)
2121     {
2122     numsteps*=leftsize;
2123     }
2124     }
2125     else // REN
2126     {
2127     chunksize=leftsize;
2128     rightstep=leftsize;
2129     leftstep=0;
2130     numsteps=m_right->getNumDPPSample();
2131     if (LS)
2132     {
2133     numsteps*=rightsize;
2134     }
2135     }
2136     }
2137    
2138     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
2139     // Get the values of sub-expressions
2140     const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
2141     const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2142     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2143     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2144     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2145     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2146     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2147     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2148     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
2149    
2150     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2151     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2152    
2153    
2154     roffset=m_samplesize*tid;
2155     double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
2156     switch(m_op)
2157     {
2158     case ADD:
2159     PROC_OP(NO_ARG,plus<double>());
2160     break;
2161     case SUB:
2162     PROC_OP(NO_ARG,minus<double>());
2163     break;
2164     case MUL:
2165     PROC_OP(NO_ARG,multiplies<double>());
2166     break;
2167     case DIV:
2168     PROC_OP(NO_ARG,divides<double>());
2169     break;
2170     case POW:
2171     PROC_OP(double (double,double),::pow);
2172     break;
2173     default:
2174     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2175     }
2176     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2177     return &m_samples;
2178     }
2179    
2180    
2181     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2182     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2183     // unlike the other resolve helpers, we must treat these datapoints separately.
2184     const DataTypes::ValueType*
2185     DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2186     {
2187     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2188    
2189     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2190     // first work out which of the children are expanded
2191     bool leftExp=(m_left->m_readytype=='E');
2192     bool rightExp=(m_right->m_readytype=='E');
2193     int steps=getNumDPPSample();
2194     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2195     int rightStep=(rightExp?m_right->getNoValues() : 0);
2196    
2197     int resultStep=getNoValues();
2198     roffset=m_samplesize*tid;
2199     size_t offset=roffset;
2200    
2201     const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2202    
2203     const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2204    
2205     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2206     cout << getNoValues() << endl;)
2207    
2208    
2209     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2210     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2211     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2212     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2213     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2214     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2215     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2216    
2217     double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
2218     switch(m_op)
2219     {
2220     case PROD:
2221     for (int i=0;i<steps;++i,resultp+=resultStep)
2222     {
2223     const double *ptr_0 = &((*left)[lroffset]);
2224     const double *ptr_1 = &((*right)[rroffset]);
2225    
2226     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2227     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2228    
2229     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2230    
2231     lroffset+=leftStep;
2232     rroffset+=rightStep;
2233     }
2234     break;
2235     default:
2236     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2237     }
2238     roffset=offset;
2239     return &m_samples;
2240     }
2241     #endif //LAZY_NODE_STORAGE
2242    
2243 jfenwick 1899 /*
2244     \brief Compute the value of the expression for the given sample.
2245     \return Vector which stores the value of the subexpression for the given sample.
2246     \param v A vector to store intermediate results.
2247     \param offset Index in v to begin storing results.
2248     \param sampleNo Sample number to evaluate.
2249     \param roffset (output parameter) the offset in the return vector where the result begins.
2250 jfenwick 1898
2251 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
2252     */
2253 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
2254     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2255 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
2256     // Regardless, the storage should be assumed to be used, even if it isn't.
2257 jfenwick 1898
2258     // the roffset is the offset within the returned vector where the data begins
2259     const DataTypes::ValueType*
2260 jfenwick 2521 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2261 jfenwick 1879 {
2262 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2263 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
2264     if (m_readytype!='E' && m_op!=IDENTITY)
2265     {
2266     collapse();
2267     }
2268 jfenwick 1885 if (m_op==IDENTITY)
2269 jfenwick 1865 {
2270 jfenwick 2271 const ValueType& vec=m_id->getVectorRO();
2271 jfenwick 1889 if (m_readytype=='C')
2272     {
2273 jfenwick 1898 roffset=0;
2274 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2275 jfenwick 1898 return &(vec);
2276 jfenwick 1889 }
2277 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
2278 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
2279 jfenwick 1898 return &(vec);
2280 jfenwick 1865 }
2281 jfenwick 1889 if (m_readytype!='E')
2282     {
2283     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2284     }
2285     switch (getOpgroup(m_op))
2286     {
2287 jfenwick 2147 case G_UNARY:
2288     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2289 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2290 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2291 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2292 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2293 jfenwick 2496 case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2294 jfenwick 2721 case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2295 jfenwick 1889 default:
2296     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2297     }
2298 jfenwick 2152
2299 jfenwick 1889 }
2300    
2301 jfenwick 2271 const DataTypes::ValueType*
2302     DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2303     {
2304     #ifdef _OPENMP
2305     int tid=omp_get_thread_num();
2306     #else
2307     int tid=0;
2308     #endif
2309 jfenwick 2521 #ifdef LAZY_NODE_STORAGE
2310     return resolveNodeSample(tid, sampleNo, roffset);
2311     #else
2312     return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2313     #endif
2314 jfenwick 2271 }
2315    
2316    
2317 jfenwick 2497 // This needs to do the work of the identity constructor
2318 jfenwick 2177 void
2319     DataLazy::resolveToIdentity()
2320     {
2321     if (m_op==IDENTITY)
2322     return;
2323 jfenwick 2500 #ifndef LAZY_NODE_STORAGE
2324 jfenwick 2497 DataReady_ptr p=resolveVectorWorker();
2325 jfenwick 2500 #else
2326     DataReady_ptr p=resolveNodeWorker();
2327     #endif
2328 jfenwick 2177 makeIdentity(p);
2329     }
2330 jfenwick 1889
2331 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
2332     {
2333     m_op=IDENTITY;
2334     m_axis_offset=0;
2335     m_transpose=0;
2336     m_SL=m_SM=m_SR=0;
2337     m_children=m_height=0;
2338     m_id=p;
2339     if(p->isConstant()) {m_readytype='C';}
2340     else if(p->isExpanded()) {m_readytype='E';}
2341     else if (p->isTagged()) {m_readytype='T';}
2342     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2343     m_buffsRequired=1;
2344     m_samplesize=p->getNumDPPSample()*p->getNoValues();
2345     m_maxsamplesize=m_samplesize;
2346     m_left.reset();
2347     m_right.reset();
2348     }
2349    
2350 jfenwick 2497
2351     DataReady_ptr
2352     DataLazy::resolve()
2353     {
2354     resolveToIdentity();
2355     return m_id;
2356     }
2357    
2358 jfenwick 2500 #ifdef LAZY_NODE_STORAGE
2359    
2360     // This version of resolve uses storage in each node to hold results
2361     DataReady_ptr
2362     DataLazy::resolveNodeWorker()
2363     {
2364     if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2365     {
2366     collapse();
2367     }
2368     if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2369