/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2171 - (hide annotations)
Wed Dec 17 08:21:29 2008 UTC (10 years, 6 months ago) by phornby
File size: 51182 byte(s)
Unitary minus applied to unsigned returns unsigned error fixed
lines 1124 & 1142. Probably not what was intended......
1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 jfenwick 1899 /*
42     How does DataLazy work?
43     ~~~~~~~~~~~~~~~~~~~~~~~
44    
45     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
46     denoted left and right.
47    
48     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
49     This means that all "internal" nodes in the structure are instances of DataLazy.
50    
51     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
52     Note that IDENITY is not considered a unary operation.
53    
54     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
55     It must however form a DAG (directed acyclic graph).
56     I will refer to individual DataLazy objects with the structure as nodes.
57    
58     Each node also stores:
59     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
60     evaluated.
61     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
62     evaluate the expression.
63     - m_samplesize ~ the number of doubles stored in a sample.
64    
65     When a new node is created, the above values are computed based on the values in the child nodes.
66     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
67    
68     The resolve method, which produces a DataReady from a DataLazy, does the following:
69     1) Create a DataReady to hold the new result.
70     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
71     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
72    
73     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
74    
75     resolveSample returns a Vector* and an offset within that vector where the result is stored.
76     Normally, this would be v, but for identity nodes their internal vector is returned instead.
77    
78     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
79    
80     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
81     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
82 jfenwick 2037
83 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
84 jfenwick 2037 1) Add to the ES_optype.
85     2) determine what opgroup your operation belongs to (X)
86     3) add a string for the op to the end of ES_opstrings
87     4) increase ES_opcount
88     5) add an entry (X) to opgroups
89     6) add an entry to the switch in collapseToReady
90     7) add an entry to resolveX
91 jfenwick 1899 */
92    
93    
94 jfenwick 1865 using namespace std;
95 jfenwick 1868 using namespace boost;
96 jfenwick 1865
97     namespace escript
98     {
99    
100     namespace
101     {
102    
103 jfenwick 1886 enum ES_opgroup
104     {
105     G_UNKNOWN,
106     G_IDENTITY,
107 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
108 jfenwick 2037 G_UNARY, // pointwise operations with one argument
109 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
110 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
111 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
112 jfenwick 2066 G_TENSORPROD // general tensor product
113 jfenwick 1886 };
114    
115    
116    
117    
118 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
119     "sin","cos","tan",
120 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
121     "asinh","acosh","atanh",
122 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
123 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
124 jfenwick 2066 "symmetric","nonsymmetric",
125 jfenwick 2084 "prod",
126 jfenwick 2147 "transpose", "trace"};
127     int ES_opcount=40;
128 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
129     G_UNARY,G_UNARY,G_UNARY, //10
130     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
131     G_UNARY,G_UNARY,G_UNARY, // 20
132 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
133     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
134 jfenwick 2066 G_NP1OUT,G_NP1OUT,
135 jfenwick 2084 G_TENSORPROD,
136     G_NP1OUT_P, G_NP1OUT_P};
137 jfenwick 1886 inline
138     ES_opgroup
139     getOpgroup(ES_optype op)
140     {
141     return opgroups[op];
142     }
143    
144 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
145     FunctionSpace
146     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
147     {
148     // perhaps this should call interpolate and throw or something?
149     // maybe we need an interpolate node -
150     // that way, if interpolate is required in any other op we can just throw a
151     // programming error exception.
152 jfenwick 1879
153 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
154     FunctionSpace r=right->getFunctionSpace();
155     if (l!=r)
156     {
157     if (r.probeInterpolation(l))
158     {
159     return l;
160     }
161     if (l.probeInterpolation(r))
162     {
163     return r;
164     }
165     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
166     }
167     return l;
168 jfenwick 1865 }
169    
170     // return the shape of the result of "left op right"
171 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
172 jfenwick 1865 DataTypes::ShapeType
173     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
174     {
175 jfenwick 1879 if (left->getShape()!=right->getShape())
176     {
177 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
178 jfenwick 1908 {
179     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
180     }
181     if (left->getRank()==0) // we need to allow scalar * anything
182     {
183     return right->getShape();
184     }
185     if (right->getRank()==0)
186     {
187     return left->getShape();
188     }
189     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
190 jfenwick 1879 }
191     return left->getShape();
192 jfenwick 1865 }
193    
194 jfenwick 2084 // return the shape for "op left"
195    
196     DataTypes::ShapeType
197     resultShape(DataAbstract_ptr left, ES_optype op)
198     {
199     switch(op)
200     {
201     case TRANS:
202     return left->getShape();
203     break;
204     case TRACE:
205     return DataTypes::scalarShape;
206     break;
207     default:
208     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
209     }
210     }
211    
212 jfenwick 2066 // determine the output shape for the general tensor product operation
213     // the additional parameters return information required later for the product
214     // the majority of this code is copy pasted from C_General_Tensor_Product
215     DataTypes::ShapeType
216     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
217 jfenwick 1865 {
218 jfenwick 2066
219     // Get rank and shape of inputs
220     int rank0 = left->getRank();
221     int rank1 = right->getRank();
222     const DataTypes::ShapeType& shape0 = left->getShape();
223     const DataTypes::ShapeType& shape1 = right->getShape();
224    
225     // Prepare for the loops of the product and verify compatibility of shapes
226     int start0=0, start1=0;
227     if (transpose == 0) {}
228     else if (transpose == 1) { start0 = axis_offset; }
229     else if (transpose == 2) { start1 = rank1-axis_offset; }
230     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
231    
232 jfenwick 2085 if (rank0<axis_offset)
233     {
234     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
235     }
236 jfenwick 2066
237     // Adjust the shapes for transpose
238     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
239     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
240     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
241     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
242    
243     // Prepare for the loops of the product
244     SL=1, SM=1, SR=1;
245     for (int i=0; i<rank0-axis_offset; i++) {
246     SL *= tmpShape0[i];
247     }
248     for (int i=rank0-axis_offset; i<rank0; i++) {
249     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
250     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
251     }
252     SM *= tmpShape0[i];
253     }
254     for (int i=axis_offset; i<rank1; i++) {
255     SR *= tmpShape1[i];
256     }
257    
258     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
259     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
260     { // block to limit the scope of out_index
261     int out_index=0;
262     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
263     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
264     }
265 jfenwick 2086
266     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
267     {
268     ostringstream os;
269     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
270     throw DataException(os.str());
271     }
272    
273 jfenwick 2066 return shape2;
274 jfenwick 1865 }
275    
276 jfenwick 2066
277     // determine the number of points in the result of "left op right"
278     // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
279     // size_t
280     // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
281     // {
282     // switch (getOpgroup(op))
283     // {
284     // case G_BINARY: return left->getLength();
285     // case G_UNARY: return left->getLength();
286     // case G_NP1OUT: return left->getLength();
287     // default:
288     // throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
289     // }
290     // }
291    
292 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
293 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294 jfenwick 2066 // The same goes for G_TENSORPROD
295 jfenwick 2157 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296     // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297     // multiple times
298 jfenwick 1879 int
299     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300     {
301 jfenwick 1886 switch(getOpgroup(op))
302 jfenwick 1879 {
303 jfenwick 1886 case G_IDENTITY: return 1;
304 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305 jfenwick 2147 case G_UNARY:
306     case G_UNARY_P: return max(left->getBuffsRequired(),1);
307 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
308 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
309 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
310 jfenwick 1879 default:
311     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
312     }
313     }
314    
315 jfenwick 1899
316 jfenwick 1865 } // end anonymous namespace
317    
318    
319 jfenwick 1899
320     // Return a string representing the operation
321 jfenwick 1865 const std::string&
322     opToString(ES_optype op)
323     {
324     if (op<0 || op>=ES_opcount)
325     {
326     op=UNKNOWNOP;
327     }
328     return ES_opstrings[op];
329     }
330    
331    
332     DataLazy::DataLazy(DataAbstract_ptr p)
333     : parent(p->getFunctionSpace(),p->getShape()),
334 jfenwick 2066 m_op(IDENTITY),
335     m_axis_offset(0),
336     m_transpose(0),
337     m_SL(0), m_SM(0), m_SR(0)
338 jfenwick 1865 {
339 jfenwick 1879 if (p->isLazy())
340     {
341     // I don't want identity of Lazy.
342     // Question: Why would that be so bad?
343     // Answer: We assume that the child of ID is something we can call getVector on
344     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
345     }
346     else
347     {
348     m_id=dynamic_pointer_cast<DataReady>(p);
349 jfenwick 1889 if(p->isConstant()) {m_readytype='C';}
350     else if(p->isExpanded()) {m_readytype='E';}
351     else if (p->isTagged()) {m_readytype='T';}
352     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
353 jfenwick 1879 }
354 jfenwick 1886 m_buffsRequired=1;
355 jfenwick 1879 m_samplesize=getNumDPPSample()*getNoValues();
356 jfenwick 2066 m_maxsamplesize=m_samplesize;
357 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
358 jfenwick 1865 }
359    
360 jfenwick 1899
361    
362 jfenwick 1901
363 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
364     : parent(left->getFunctionSpace(),left->getShape()),
365 jfenwick 2066 m_op(op),
366     m_axis_offset(0),
367     m_transpose(0),
368     m_SL(0), m_SM(0), m_SR(0)
369 jfenwick 1886 {
370 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
371 jfenwick 1886 {
372     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
373     }
374 jfenwick 2066
375 jfenwick 1886 DataLazy_ptr lleft;
376     if (!left->isLazy())
377     {
378     lleft=DataLazy_ptr(new DataLazy(left));
379     }
380     else
381     {
382     lleft=dynamic_pointer_cast<DataLazy>(left);
383     }
384 jfenwick 1889 m_readytype=lleft->m_readytype;
385 jfenwick 1886 m_left=lleft;
386 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
387 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
388 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
389 jfenwick 1886 }
390    
391    
392 jfenwick 1943 // In this constructor we need to consider interpolation
393 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
394     : parent(resultFS(left,right,op), resultShape(left,right,op)),
395 jfenwick 2066 m_op(op),
396     m_SL(0), m_SM(0), m_SR(0)
397 jfenwick 1879 {
398 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
399 jfenwick 1886 {
400 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
401 jfenwick 1886 }
402 jfenwick 1943
403     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
404     {
405     FunctionSpace fs=getFunctionSpace();
406     Data ltemp(left);
407     Data tmp(ltemp,fs);
408     left=tmp.borrowDataPtr();
409     }
410 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
411 jfenwick 1943 {
412     Data tmp(Data(right),getFunctionSpace());
413     right=tmp.borrowDataPtr();
414     }
415     left->operandCheck(*right);
416    
417 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
418 jfenwick 1879 {
419     m_left=dynamic_pointer_cast<DataLazy>(left);
420     }
421     else
422     {
423     m_left=DataLazy_ptr(new DataLazy(left));
424     }
425     if (right->isLazy())
426     {
427     m_right=dynamic_pointer_cast<DataLazy>(right);
428     }
429     else
430     {
431     m_right=DataLazy_ptr(new DataLazy(right));
432     }
433 jfenwick 1889 char lt=m_left->m_readytype;
434     char rt=m_right->m_readytype;
435     if (lt=='E' || rt=='E')
436     {
437     m_readytype='E';
438     }
439     else if (lt=='T' || rt=='T')
440     {
441     m_readytype='T';
442     }
443     else
444     {
445     m_readytype='C';
446     }
447 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
448     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
449 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
450 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
451 jfenwick 1879 }
452    
453 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
454     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
455     m_op(op),
456     m_axis_offset(axis_offset),
457     m_transpose(transpose)
458     {
459     if ((getOpgroup(op)!=G_TENSORPROD))
460     {
461     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
462     }
463     if ((transpose>2) || (transpose<0))
464     {
465     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
466     }
467     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
468     {
469     FunctionSpace fs=getFunctionSpace();
470     Data ltemp(left);
471     Data tmp(ltemp,fs);
472     left=tmp.borrowDataPtr();
473     }
474     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
475     {
476     Data tmp(Data(right),getFunctionSpace());
477     right=tmp.borrowDataPtr();
478     }
479     left->operandCheck(*right);
480 jfenwick 1879
481 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
482     {
483     m_left=dynamic_pointer_cast<DataLazy>(left);
484     }
485     else
486     {
487     m_left=DataLazy_ptr(new DataLazy(left));
488     }
489     if (right->isLazy())
490     {
491     m_right=dynamic_pointer_cast<DataLazy>(right);
492     }
493     else
494     {
495     m_right=DataLazy_ptr(new DataLazy(right));
496     }
497     char lt=m_left->m_readytype;
498     char rt=m_right->m_readytype;
499     if (lt=='E' || rt=='E')
500     {
501     m_readytype='E';
502     }
503     else if (lt=='T' || rt=='T')
504     {
505     m_readytype='T';
506     }
507     else
508     {
509     m_readytype='C';
510     }
511     m_samplesize=getNumDPPSample()*getNoValues();
512     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
513     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
514 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
515 jfenwick 2066 }
516    
517    
518 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
519     : parent(left->getFunctionSpace(), resultShape(left,op)),
520     m_op(op),
521     m_axis_offset(axis_offset),
522 jfenwick 2147 m_transpose(0),
523     m_tol(0)
524 jfenwick 2084 {
525     if ((getOpgroup(op)!=G_NP1OUT_P))
526     {
527     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
528     }
529     DataLazy_ptr lleft;
530     if (!left->isLazy())
531     {
532     lleft=DataLazy_ptr(new DataLazy(left));
533     }
534     else
535     {
536     lleft=dynamic_pointer_cast<DataLazy>(left);
537     }
538     m_readytype=lleft->m_readytype;
539     m_left=lleft;
540     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
541     m_samplesize=getNumDPPSample()*getNoValues();
542     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
543 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
544 jfenwick 2084 }
545    
546 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
547     : parent(left->getFunctionSpace(), left->getShape()),
548     m_op(op),
549     m_axis_offset(0),
550     m_transpose(0),
551     m_tol(tol)
552     {
553     if ((getOpgroup(op)!=G_UNARY_P))
554     {
555     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
556     }
557     DataLazy_ptr lleft;
558     if (!left->isLazy())
559     {
560     lleft=DataLazy_ptr(new DataLazy(left));
561     }
562     else
563     {
564     lleft=dynamic_pointer_cast<DataLazy>(left);
565     }
566     m_readytype=lleft->m_readytype;
567     m_left=lleft;
568     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
569     m_samplesize=getNumDPPSample()*getNoValues();
570     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
571     LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572     }
573 jfenwick 2084
574 jfenwick 1865 DataLazy::~DataLazy()
575     {
576     }
577    
578 jfenwick 1879
579     int
580     DataLazy::getBuffsRequired() const
581 jfenwick 1865 {
582 jfenwick 1879 return m_buffsRequired;
583     }
584    
585 jfenwick 1884
586 jfenwick 2066 size_t
587     DataLazy::getMaxSampleSize() const
588     {
589     return m_maxsamplesize;
590     }
591    
592 jfenwick 1899 /*
593     \brief Evaluates the expression using methods on Data.
594     This does the work for the collapse method.
595     For reasons of efficiency do not call this method on DataExpanded nodes.
596     */
597 jfenwick 1889 DataReady_ptr
598     DataLazy::collapseToReady()
599     {
600     if (m_readytype=='E')
601     { // this is more an efficiency concern than anything else
602     throw DataException("Programmer Error - do not use collapse on Expanded data.");
603     }
604     if (m_op==IDENTITY)
605     {
606     return m_id;
607     }
608     DataReady_ptr pleft=m_left->collapseToReady();
609     Data left(pleft);
610     Data right;
611 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612 jfenwick 1889 {
613     right=Data(m_right->collapseToReady());
614     }
615     Data result;
616     switch(m_op)
617     {
618     case ADD:
619     result=left+right;
620     break;
621     case SUB:
622     result=left-right;
623     break;
624     case MUL:
625     result=left*right;
626     break;
627     case DIV:
628     result=left/right;
629     break;
630     case SIN:
631     result=left.sin();
632     break;
633     case COS:
634     result=left.cos();
635     break;
636     case TAN:
637     result=left.tan();
638     break;
639     case ASIN:
640     result=left.asin();
641     break;
642     case ACOS:
643     result=left.acos();
644     break;
645     case ATAN:
646     result=left.atan();
647     break;
648     case SINH:
649     result=left.sinh();
650     break;
651     case COSH:
652     result=left.cosh();
653     break;
654     case TANH:
655     result=left.tanh();
656     break;
657     case ERF:
658     result=left.erf();
659     break;
660     case ASINH:
661     result=left.asinh();
662     break;
663     case ACOSH:
664     result=left.acosh();
665     break;
666     case ATANH:
667     result=left.atanh();
668     break;
669     case LOG10:
670     result=left.log10();
671     break;
672     case LOG:
673     result=left.log();
674     break;
675     case SIGN:
676     result=left.sign();
677     break;
678     case ABS:
679     result=left.abs();
680     break;
681     case NEG:
682     result=left.neg();
683     break;
684     case POS:
685     // it doesn't mean anything for delayed.
686     // it will just trigger a deep copy of the lazy object
687     throw DataException("Programmer error - POS not supported for lazy data.");
688     break;
689     case EXP:
690     result=left.exp();
691     break;
692     case SQRT:
693     result=left.sqrt();
694     break;
695     case RECIP:
696     result=left.oneOver();
697     break;
698     case GZ:
699     result=left.wherePositive();
700     break;
701     case LZ:
702     result=left.whereNegative();
703     break;
704     case GEZ:
705     result=left.whereNonNegative();
706     break;
707     case LEZ:
708     result=left.whereNonPositive();
709     break;
710 jfenwick 2147 case NEZ:
711     result=left.whereNonZero(m_tol);
712     break;
713     case EZ:
714     result=left.whereZero(m_tol);
715     break;
716 jfenwick 2037 case SYM:
717     result=left.symmetric();
718     break;
719     case NSYM:
720     result=left.nonsymmetric();
721     break;
722 jfenwick 2066 case PROD:
723     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724     break;
725 jfenwick 2084 case TRANS:
726     result=left.transpose(m_axis_offset);
727     break;
728     case TRACE:
729     result=left.trace(m_axis_offset);
730     break;
731 jfenwick 1889 default:
732 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733 jfenwick 1889 }
734     return result.borrowReadyPtr();
735     }
736    
737 jfenwick 1899 /*
738     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739     This method uses the original methods on the Data class to evaluate the expressions.
740     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741     the purpose of using DataLazy in the first place).
742     */
743 jfenwick 1889 void
744     DataLazy::collapse()
745     {
746     if (m_op==IDENTITY)
747     {
748     return;
749     }
750     if (m_readytype=='E')
751     { // this is more an efficiency concern than anything else
752     throw DataException("Programmer Error - do not use collapse on Expanded data.");
753     }
754     m_id=collapseToReady();
755     m_op=IDENTITY;
756     }
757    
758 jfenwick 1899 /*
759 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
760 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
761     \param v A vector to store intermediate results.
762     \param offset Index in v to begin storing results.
763     \param sampleNo Sample number to evaluate.
764     \param roffset (output parameter) the offset in the return vector where the result begins.
765    
766     The return value will be an existing vector so do not deallocate it.
767     If the result is stored in v it should be stored at the offset given.
768     Everything from offset to the end of v should be considered available for this method to use.
769     */
770 jfenwick 1898 DataTypes::ValueType*
771     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772 jfenwick 1889 {
773     // we assume that any collapsing has been done before we get here
774     // since we only have one argument we don't need to think about only
775     // processing single points.
776     if (m_readytype!='E')
777     {
778     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779     }
780 jfenwick 1898 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781     const double* left=&((*vleft)[roffset]);
782 jfenwick 1889 double* result=&(v[offset]);
783 jfenwick 1898 roffset=offset;
784 jfenwick 1889 switch (m_op)
785     {
786     case SIN:
787 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788 jfenwick 1889 break;
789     case COS:
790 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791 jfenwick 1889 break;
792     case TAN:
793 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794 jfenwick 1889 break;
795     case ASIN:
796 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797 jfenwick 1889 break;
798     case ACOS:
799 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800 jfenwick 1889 break;
801     case ATAN:
802 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803 jfenwick 1889 break;
804     case SINH:
805 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806 jfenwick 1889 break;
807     case COSH:
808 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809 jfenwick 1889 break;
810     case TANH:
811 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812 jfenwick 1889 break;
813     case ERF:
814 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816     #else
817     tensor_unary_operation(m_samplesize, left, result, ::erf);
818     break;
819     #endif
820     case ASINH:
821 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823     #else
824     tensor_unary_operation(m_samplesize, left, result, ::asinh);
825     #endif
826     break;
827     case ACOSH:
828 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830     #else
831     tensor_unary_operation(m_samplesize, left, result, ::acosh);
832     #endif
833     break;
834     case ATANH:
835 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837     #else
838     tensor_unary_operation(m_samplesize, left, result, ::atanh);
839     #endif
840     break;
841     case LOG10:
842 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843 jfenwick 1889 break;
844     case LOG:
845 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846 jfenwick 1889 break;
847     case SIGN:
848     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849     break;
850     case ABS:
851 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852 jfenwick 1889 break;
853     case NEG:
854     tensor_unary_operation(m_samplesize, left, result, negate<double>());
855     break;
856     case POS:
857     // it doesn't mean anything for delayed.
858     // it will just trigger a deep copy of the lazy object
859     throw DataException("Programmer error - POS not supported for lazy data.");
860     break;
861     case EXP:
862 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863 jfenwick 1889 break;
864     case SQRT:
865 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866 jfenwick 1889 break;
867     case RECIP:
868     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
869     break;
870     case GZ:
871     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
872     break;
873     case LZ:
874     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
875     break;
876     case GEZ:
877     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
878     break;
879     case LEZ:
880     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881     break;
882 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883     case NEZ:
884     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885     break;
886     case EZ:
887     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888     break;
889 jfenwick 1889
890     default:
891     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892     }
893 jfenwick 1898 return &v;
894 jfenwick 1889 }
895    
896    
897 jfenwick 2147
898    
899    
900    
901 jfenwick 2037 /*
902     \brief Compute the value of the expression (unary operation) for the given sample.
903     \return Vector which stores the value of the subexpression for the given sample.
904     \param v A vector to store intermediate results.
905     \param offset Index in v to begin storing results.
906     \param sampleNo Sample number to evaluate.
907     \param roffset (output parameter) the offset in the return vector where the result begins.
908 jfenwick 1898
909 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
910     If the result is stored in v it should be stored at the offset given.
911     Everything from offset to the end of v should be considered available for this method to use.
912     */
913     DataTypes::ValueType*
914     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915     {
916     // we assume that any collapsing has been done before we get here
917     // since we only have one argument we don't need to think about only
918     // processing single points.
919     if (m_readytype!='E')
920     {
921     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922     }
923     // since we can't write the result over the input, we need a result offset further along
924     size_t subroffset=roffset+m_samplesize;
925 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927 jfenwick 2037 roffset=offset;
928 jfenwick 2157 size_t loop=0;
929     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930     size_t step=getNoValues();
931 jfenwick 2037 switch (m_op)
932     {
933     case SYM:
934 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
935     {
936     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937     subroffset+=step;
938     offset+=step;
939     }
940 jfenwick 2037 break;
941     case NSYM:
942 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
943     {
944     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945     subroffset+=step;
946     offset+=step;
947     }
948 jfenwick 2037 break;
949     default:
950     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951     }
952     return &v;
953     }
954 jfenwick 1898
955 jfenwick 2084 /*
956     \brief Compute the value of the expression (unary operation) for the given sample.
957     \return Vector which stores the value of the subexpression for the given sample.
958     \param v A vector to store intermediate results.
959     \param offset Index in v to begin storing results.
960     \param sampleNo Sample number to evaluate.
961     \param roffset (output parameter) the offset in the return vector where the result begins.
962 jfenwick 1899
963 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
964     If the result is stored in v it should be stored at the offset given.
965     Everything from offset to the end of v should be considered available for this method to use.
966     */
967     DataTypes::ValueType*
968     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969     {
970     // we assume that any collapsing has been done before we get here
971     // since we only have one argument we don't need to think about only
972     // processing single points.
973     if (m_readytype!='E')
974     {
975     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976     }
977     // since we can't write the result over the input, we need a result offset further along
978 jfenwick 2157 size_t subroffset;
979     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
981     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
982 jfenwick 2084 roffset=offset;
983 jfenwick 2157 size_t loop=0;
984     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
985 jfenwick 2166 size_t outstep=getNoValues();
986     size_t instep=m_left->getNoValues();
987     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
988 jfenwick 2084 switch (m_op)
989     {
990     case TRACE:
991 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
992     {
993 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
994     if (zz==5800)
995     {
996     LAZYDEBUG(cerr << "point=" << zz<< endl;)
997     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
998     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
999     LAZYDEBUG(cerr << subroffset << endl;)
1000     LAZYDEBUG(cerr << "output=" << offset << endl;)
1001     }
1002     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1003     if (zz==5800)
1004     {
1005     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1006     }
1007     subroffset+=instep;
1008     offset+=outstep;
1009 jfenwick 2157 }
1010 jfenwick 2084 break;
1011     case TRANS:
1012 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1013     {
1014     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1015 jfenwick 2166 subroffset+=instep;
1016     offset+=outstep;
1017 jfenwick 2157 }
1018 jfenwick 2084 break;
1019     default:
1020     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1021     }
1022     return &v;
1023     }
1024 jfenwick 2037
1025    
1026 phornby 1987 #define PROC_OP(TYPE,X) \
1027 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1028     {\
1029     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1030     { \
1031     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1032 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1033 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1034 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1035 jfenwick 2152 lroffset+=leftstep; \
1036     rroffset+=rightstep; \
1037     }\
1038     lroffset+=oleftstep;\
1039     rroffset+=orightstep;\
1040 jfenwick 1889 }
1041    
1042 jfenwick 1899 /*
1043     \brief Compute the value of the expression (binary operation) for the given sample.
1044     \return Vector which stores the value of the subexpression for the given sample.
1045     \param v A vector to store intermediate results.
1046     \param offset Index in v to begin storing results.
1047     \param sampleNo Sample number to evaluate.
1048     \param roffset (output parameter) the offset in the return vector where the result begins.
1049    
1050     The return value will be an existing vector so do not deallocate it.
1051     If the result is stored in v it should be stored at the offset given.
1052     Everything from offset to the end of v should be considered available for this method to use.
1053     */
1054     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1055     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1056     // If both children are expanded, then we can process them in a single operation (we treat
1057     // the whole sample as one big datapoint.
1058     // If one of the children is not expanded, then we need to treat each point in the sample
1059     // individually.
1060 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1061     // For example, 2+Vector.
1062     // In this case each double within the point is treated individually
1063 jfenwick 1898 DataTypes::ValueType*
1064 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1065 jfenwick 1889 {
1066 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1067 jfenwick 1889
1068 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1069     // first work out which of the children are expanded
1070 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1071     bool rightExp=(m_right->m_readytype=='E');
1072 jfenwick 2084 if (!leftExp && !rightExp)
1073     {
1074     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1075     }
1076     bool leftScalar=(m_left->getRank()==0);
1077     bool rightScalar=(m_right->getRank()==0);
1078 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1079 jfenwick 1908 {
1080 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1081     }
1082     size_t leftsize=m_left->getNoValues();
1083     size_t rightsize=m_right->getNoValues();
1084     size_t chunksize=1; // how many doubles will be processed in one go
1085     int leftstep=0; // how far should the left offset advance after each step
1086     int rightstep=0;
1087     int numsteps=0; // total number of steps for the inner loop
1088     int oleftstep=0; // the o variables refer to the outer loop
1089     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1090     int onumsteps=1;
1091    
1092     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1093     bool RES=(rightExp && rightScalar);
1094     bool LS=(!leftExp && leftScalar); // left is a single scalar
1095     bool RS=(!rightExp && rightScalar);
1096     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1097     bool RN=(!rightExp && !rightScalar);
1098     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1099     bool REN=(rightExp && !rightScalar);
1100    
1101     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1102     {
1103     chunksize=m_left->getNumDPPSample()*leftsize;
1104     leftstep=0;
1105     rightstep=0;
1106     numsteps=1;
1107     }
1108     else if (LES || RES)
1109     {
1110     chunksize=1;
1111     if (LES) // left is an expanded scalar
1112 jfenwick 2084 {
1113 jfenwick 2152 if (RS)
1114     {
1115     leftstep=1;
1116     rightstep=0;
1117     numsteps=m_left->getNumDPPSample();
1118     }
1119     else // RN or REN
1120     {
1121     leftstep=0;
1122     oleftstep=1;
1123     rightstep=1;
1124 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1125 jfenwick 2152 numsteps=rightsize;
1126     onumsteps=m_left->getNumDPPSample();
1127     }
1128 jfenwick 2084 }
1129 jfenwick 2152 else // right is an expanded scalar
1130     {
1131     if (LS)
1132     {
1133     rightstep=1;
1134     leftstep=0;
1135     numsteps=m_right->getNumDPPSample();
1136     }
1137     else
1138     {
1139     rightstep=0;
1140     orightstep=1;
1141     leftstep=1;
1142 phornby 2171 oleftstep=(RN ? -(int)leftsize : 0);
1143 jfenwick 2152 numsteps=leftsize;
1144     onumsteps=m_right->getNumDPPSample();
1145     }
1146     }
1147     }
1148     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1149     {
1150     if (LEN) // and Right will be a single value
1151     {
1152     chunksize=rightsize;
1153     leftstep=rightsize;
1154     rightstep=0;
1155     numsteps=m_left->getNumDPPSample();
1156     if (RS)
1157     {
1158     numsteps*=leftsize;
1159     }
1160     }
1161     else // REN
1162     {
1163     chunksize=leftsize;
1164     rightstep=leftsize;
1165     leftstep=0;
1166     numsteps=m_right->getNumDPPSample();
1167     if (LS)
1168     {
1169     numsteps*=rightsize;
1170     }
1171     }
1172     }
1173    
1174     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1175 jfenwick 1899 // Get the values of sub-expressions
1176 jfenwick 2157 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1177     // calcBufss for why we can't put offset as the 2nd param above
1178     const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1179 jfenwick 1899 // the right child starts further along.
1180 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1181     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1182     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1183     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1184     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1185     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1186     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1187 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1188 jfenwick 1889 switch(m_op)
1189     {
1190     case ADD:
1191 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1192 jfenwick 1889 break;
1193 jfenwick 1899 case SUB:
1194 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1195 jfenwick 1899 break;
1196     case MUL:
1197 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1198 jfenwick 1899 break;
1199     case DIV:
1200 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1201 jfenwick 1899 break;
1202 jfenwick 1910 case POW:
1203 phornby 1994 PROC_OP(double (double,double),::pow);
1204 jfenwick 1910 break;
1205 jfenwick 1889 default:
1206 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1207 jfenwick 1889 }
1208 jfenwick 1899 roffset=offset;
1209 jfenwick 1898 return &v;
1210 jfenwick 1889 }
1211    
1212 jfenwick 1898
1213 jfenwick 2152
1214 jfenwick 2066 /*
1215     \brief Compute the value of the expression (tensor product) for the given sample.
1216     \return Vector which stores the value of the subexpression for the given sample.
1217     \param v A vector to store intermediate results.
1218     \param offset Index in v to begin storing results.
1219     \param sampleNo Sample number to evaluate.
1220     \param roffset (output parameter) the offset in the return vector where the result begins.
1221 jfenwick 1898
1222 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1223     If the result is stored in v it should be stored at the offset given.
1224     Everything from offset to the end of v should be considered available for this method to use.
1225     */
1226     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1227     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1228     // unlike the other resolve helpers, we must treat these datapoints separately.
1229     DataTypes::ValueType*
1230     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1231     {
1232 jfenwick 2092 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1233 jfenwick 2066
1234     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1235     // first work out which of the children are expanded
1236     bool leftExp=(m_left->m_readytype=='E');
1237     bool rightExp=(m_right->m_readytype=='E');
1238     int steps=getNumDPPSample();
1239     int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1240     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1241     int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1242     // Get the values of sub-expressions (leave a gap of one sample for the result).
1243 jfenwick 2153 int gap=offset+m_left->getMaxSampleSize(); // actually only needs to be m_left->m_samplesize
1244     const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1245     gap+=m_right->getMaxSampleSize();
1246     const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1247     LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1248     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1249     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1250     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1251     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1252     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1253 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1254     switch(m_op)
1255     {
1256     case PROD:
1257     for (int i=0;i<steps;++i,resultp+=resultStep)
1258     {
1259 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1260     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1261     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1262 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1263     const double *ptr_1 = &((*right)[rroffset]);
1264 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1265     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1266 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1267     lroffset+=leftStep;
1268     rroffset+=rightStep;
1269     }
1270     break;
1271     default:
1272     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1273     }
1274     roffset=offset;
1275     return &v;
1276     }
1277    
1278    
1279    
1280 jfenwick 1899 /*
1281     \brief Compute the value of the expression for the given sample.
1282     \return Vector which stores the value of the subexpression for the given sample.
1283     \param v A vector to store intermediate results.
1284     \param offset Index in v to begin storing results.
1285     \param sampleNo Sample number to evaluate.
1286     \param roffset (output parameter) the offset in the return vector where the result begins.
1287 jfenwick 1898
1288 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
1289     */
1290 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
1291     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1292 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
1293     // Regardless, the storage should be assumed to be used, even if it isn't.
1294 jfenwick 1898
1295     // the roffset is the offset within the returned vector where the data begins
1296     const DataTypes::ValueType*
1297     DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1298 jfenwick 1879 {
1299 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1300 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
1301     if (m_readytype!='E' && m_op!=IDENTITY)
1302     {
1303     collapse();
1304     }
1305 jfenwick 1885 if (m_op==IDENTITY)
1306 jfenwick 1865 {
1307 jfenwick 1879 const ValueType& vec=m_id->getVector();
1308 jfenwick 1889 if (m_readytype=='C')
1309     {
1310 jfenwick 1898 roffset=0;
1311 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1312 jfenwick 1898 return &(vec);
1313 jfenwick 1889 }
1314 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
1315 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1316 jfenwick 1898 return &(vec);
1317 jfenwick 1865 }
1318 jfenwick 1889 if (m_readytype!='E')
1319     {
1320     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1321     }
1322     switch (getOpgroup(m_op))
1323     {
1324 jfenwick 2147 case G_UNARY:
1325     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1326 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1327 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1328 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1329 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1330 jfenwick 1889 default:
1331     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1332     }
1333 jfenwick 2152
1334 jfenwick 1889 }
1335    
1336    
1337 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1338     // Each sample is evaluated independently and copied into the result DataExpanded.
1339 jfenwick 1879 DataReady_ptr
1340     DataLazy::resolve()
1341     {
1342    
1343 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1344     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1345 jfenwick 1879
1346 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1347 jfenwick 1889 {
1348     collapse();
1349     }
1350 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1351 jfenwick 1889 {
1352     return m_id;
1353     }
1354     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1355 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1356 jfenwick 1899 // storage to evaluate its expression
1357 jfenwick 1885 int numthreads=1;
1358     #ifdef _OPENMP
1359 jfenwick 1889 numthreads=getNumberOfThreads();
1360 jfenwick 1885 #endif
1361 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
1362 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1363 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1364 jfenwick 1879 ValueType& resvec=result->getVector();
1365     DataReady_ptr resptr=DataReady_ptr(result);
1366     int sample;
1367 jfenwick 1898 size_t outoffset; // offset in the output data
1368 jfenwick 1885 int totalsamples=getNumSamples();
1369 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
1370     size_t resoffset=0; // where in the vector to find the answer
1371 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1372 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1373 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
1374 jfenwick 1879 {
1375 jfenwick 2157 if (sample==0) {ENABLEDEBUG}
1376 jfenwick 2166
1377     // if (sample==5800/getNumDPPSample()) {ENABLEDEBUG}
1378 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
1379 jfenwick 1885 #ifdef _OPENMP
1380 jfenwick 1898 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1381 jfenwick 1885 #else
1382 jfenwick 1899 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1383 jfenwick 1885 #endif
1384 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1385 jfenwick 2157 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1386 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
1387 jfenwick 2157 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1388 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1389 jfenwick 1879 {
1390 jfenwick 2166 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1391 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
1392 jfenwick 1879 }
1393 jfenwick 2157 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1394 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
1395 jfenwick 2157 DISABLEDEBUG
1396 jfenwick 1879 }
1397     return resptr;
1398     }
1399    
1400 jfenwick 1865 std::string
1401     DataLazy::toString() const
1402     {
1403 jfenwick 1886 ostringstream oss;
1404     oss << "Lazy Data:";
1405     intoString(oss);
1406     return oss.str();
1407 jfenwick 1865 }
1408    
1409 jfenwick 1899
1410 jfenwick 1886 void
1411     DataLazy::intoString(ostringstream& oss) const
1412     {
1413     switch (getOpgroup(m_op))
1414     {
1415 jfenwick 1889 case G_IDENTITY:
1416     if (m_id->isExpanded())
1417     {
1418     oss << "E";
1419     }
1420     else if (m_id->isTagged())
1421     {
1422     oss << "T";
1423     }
1424     else if (m_id->isConstant())
1425     {
1426     oss << "C";
1427     }
1428     else
1429     {
1430     oss << "?";
1431     }
1432 jfenwick 1886 oss << '@' << m_id.get();
1433     break;
1434     case G_BINARY:
1435     oss << '(';
1436     m_left->intoString(oss);
1437     oss << ' ' << opToString(m_op) << ' ';
1438     m_right->intoString(oss);
1439     oss << ')';
1440     break;
1441     case G_UNARY:
1442 jfenwick 2147 case G_UNARY_P:
1443 jfenwick 2037 case G_NP1OUT:
1444 jfenwick 2084 case G_NP1OUT_P:
1445 jfenwick 1886 oss << opToString(m_op) << '(';
1446     m_left->intoString(oss);
1447     oss << ')';
1448     break;
1449 jfenwick 2066 case G_TENSORPROD:
1450     oss << opToString(m_op) << '(';
1451     m_left->intoString(oss);
1452     oss << ", ";
1453     m_right->intoString(oss);
1454     oss << ')';
1455     break;
1456 jfenwick 1886 default:
1457     oss << "UNKNOWN";
1458     }
1459     }
1460    
1461 jfenwick 1865 DataAbstract*
1462     DataLazy::deepCopy()
1463     {
1464 jfenwick 1935 switch (getOpgroup(m_op))
1465 jfenwick 1865 {
1466 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1467     case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1468     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1469 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1470     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1471 jfenwick 1935 default:
1472     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1473 jfenwick 1865 }
1474     }
1475    
1476    
1477 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1478     // Instances of DataReady can look at the size of their vectors.
1479     // For lazy though, it could be the size the data would be if it were resolved;
1480     // or it could be some function of the lengths of the DataReady instances which
1481     // form part of the expression.
1482     // Rather than have people making assumptions, I have disabled the method.
1483 jfenwick 1865 DataTypes::ValueType::size_type
1484     DataLazy::getLength() const
1485     {
1486 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1487 jfenwick 1865 }
1488    
1489    
1490     DataAbstract*
1491     DataLazy::getSlice(const DataTypes::RegionType& region) const
1492     {
1493 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1494 jfenwick 1865 }
1495    
1496 jfenwick 1935
1497     // To do this we need to rely on our child nodes
1498 jfenwick 1865 DataTypes::ValueType::size_type
1499     DataLazy::getPointOffset(int sampleNo,
1500 jfenwick 1935 int dataPointNo)
1501     {
1502     if (m_op==IDENTITY)
1503     {
1504     return m_id->getPointOffset(sampleNo,dataPointNo);
1505     }
1506     if (m_readytype!='E')
1507     {
1508     collapse();
1509     return m_id->getPointOffset(sampleNo,dataPointNo);
1510     }
1511     // at this point we do not have an identity node and the expression will be Expanded
1512     // so we only need to know which child to ask
1513     if (m_left->m_readytype=='E')
1514     {
1515     return m_left->getPointOffset(sampleNo,dataPointNo);
1516     }
1517     else
1518     {
1519     return m_right->getPointOffset(sampleNo,dataPointNo);
1520     }
1521     }
1522    
1523     // To do this we need to rely on our child nodes
1524     DataTypes::ValueType::size_type
1525     DataLazy::getPointOffset(int sampleNo,
1526 jfenwick 1865 int dataPointNo) const
1527     {
1528 jfenwick 1935 if (m_op==IDENTITY)
1529     {
1530     return m_id->getPointOffset(sampleNo,dataPointNo);
1531     }
1532     if (m_readytype=='E')
1533     {
1534     // at this point we do not have an identity node and the expression will be Expanded
1535     // so we only need to know which child to ask
1536     if (m_left->m_readytype=='E')
1537     {
1538     return m_left->getPointOffset(sampleNo,dataPointNo);
1539     }
1540     else
1541     {
1542     return m_right->getPointOffset(sampleNo,dataPointNo);
1543     }
1544     }
1545     if (m_readytype=='C')
1546     {
1547     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1548     }
1549     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1550 jfenwick 1865 }
1551    
1552 jfenwick 1901 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1553     // to zero, all the tags from all the DataTags would be in the result.
1554     // However since they all have the same value (0) whether they are there or not should not matter.
1555     // So I have decided that for all types this method will create a constant 0.
1556     // It can be promoted up as required.
1557     // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1558     // but we can deal with that if it arrises.
1559     void
1560     DataLazy::setToZero()
1561     {
1562     DataTypes::ValueType v(getNoValues(),0);
1563     m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1564     m_op=IDENTITY;
1565     m_right.reset();
1566     m_left.reset();
1567     m_readytype='C';
1568     m_buffsRequired=1;
1569     }
1570    
1571 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26