/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Annotation of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2152 - (hide annotations)
Thu Dec 11 04:26:42 2008 UTC (10 years, 5 months ago) by jfenwick
File size: 47508 byte(s)
Fixed some errors related to lazy processing of scalars.
AutoLazy passes more unit tests before failing.
1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2092 // #define LAZYDEBUG(X) X;
32     #define LAZYDEBUG(X)
33    
34 jfenwick 1899 /*
35     How does DataLazy work?
36     ~~~~~~~~~~~~~~~~~~~~~~~
37    
38     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
39     denoted left and right.
40    
41     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
42     This means that all "internal" nodes in the structure are instances of DataLazy.
43    
44     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
45     Note that IDENITY is not considered a unary operation.
46    
47     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
48     It must however form a DAG (directed acyclic graph).
49     I will refer to individual DataLazy objects with the structure as nodes.
50    
51     Each node also stores:
52     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
53     evaluated.
54     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
55     evaluate the expression.
56     - m_samplesize ~ the number of doubles stored in a sample.
57    
58     When a new node is created, the above values are computed based on the values in the child nodes.
59     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
60    
61     The resolve method, which produces a DataReady from a DataLazy, does the following:
62     1) Create a DataReady to hold the new result.
63     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
64     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
65    
66     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
67    
68     resolveSample returns a Vector* and an offset within that vector where the result is stored.
69     Normally, this would be v, but for identity nodes their internal vector is returned instead.
70    
71     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
72    
73     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
74     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
75 jfenwick 2037
76 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
77 jfenwick 2037 1) Add to the ES_optype.
78     2) determine what opgroup your operation belongs to (X)
79     3) add a string for the op to the end of ES_opstrings
80     4) increase ES_opcount
81     5) add an entry (X) to opgroups
82     6) add an entry to the switch in collapseToReady
83     7) add an entry to resolveX
84 jfenwick 1899 */
85    
86    
87 jfenwick 1865 using namespace std;
88 jfenwick 1868 using namespace boost;
89 jfenwick 1865
90     namespace escript
91     {
92    
93     namespace
94     {
95    
96 jfenwick 1886 enum ES_opgroup
97     {
98     G_UNKNOWN,
99     G_IDENTITY,
100 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
101 jfenwick 2037 G_UNARY, // pointwise operations with one argument
102 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
103 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
104 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
105 jfenwick 2066 G_TENSORPROD // general tensor product
106 jfenwick 1886 };
107    
108    
109    
110    
111 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
112     "sin","cos","tan",
113 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
114     "asinh","acosh","atanh",
115 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
116 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
117 jfenwick 2066 "symmetric","nonsymmetric",
118 jfenwick 2084 "prod",
119 jfenwick 2147 "transpose", "trace"};
120     int ES_opcount=40;
121 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
122     G_UNARY,G_UNARY,G_UNARY, //10
123     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
124     G_UNARY,G_UNARY,G_UNARY, // 20
125 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
126     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
127 jfenwick 2066 G_NP1OUT,G_NP1OUT,
128 jfenwick 2084 G_TENSORPROD,
129     G_NP1OUT_P, G_NP1OUT_P};
130 jfenwick 1886 inline
131     ES_opgroup
132     getOpgroup(ES_optype op)
133     {
134     return opgroups[op];
135     }
136    
137 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
138     FunctionSpace
139     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140     {
141     // perhaps this should call interpolate and throw or something?
142     // maybe we need an interpolate node -
143     // that way, if interpolate is required in any other op we can just throw a
144     // programming error exception.
145 jfenwick 1879
146 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
147     FunctionSpace r=right->getFunctionSpace();
148     if (l!=r)
149     {
150     if (r.probeInterpolation(l))
151     {
152     return l;
153     }
154     if (l.probeInterpolation(r))
155     {
156     return r;
157     }
158     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
159     }
160     return l;
161 jfenwick 1865 }
162    
163     // return the shape of the result of "left op right"
164 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
165 jfenwick 1865 DataTypes::ShapeType
166     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
167     {
168 jfenwick 1879 if (left->getShape()!=right->getShape())
169     {
170 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
171 jfenwick 1908 {
172     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
173     }
174     if (left->getRank()==0) // we need to allow scalar * anything
175     {
176     return right->getShape();
177     }
178     if (right->getRank()==0)
179     {
180     return left->getShape();
181     }
182     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
183 jfenwick 1879 }
184     return left->getShape();
185 jfenwick 1865 }
186    
187 jfenwick 2084 // return the shape for "op left"
188    
189     DataTypes::ShapeType
190     resultShape(DataAbstract_ptr left, ES_optype op)
191     {
192     switch(op)
193     {
194     case TRANS:
195     return left->getShape();
196     break;
197     case TRACE:
198     return DataTypes::scalarShape;
199     break;
200     default:
201     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
202     }
203     }
204    
205 jfenwick 2066 // determine the output shape for the general tensor product operation
206     // the additional parameters return information required later for the product
207     // the majority of this code is copy pasted from C_General_Tensor_Product
208     DataTypes::ShapeType
209     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
210 jfenwick 1865 {
211 jfenwick 2066
212     // Get rank and shape of inputs
213     int rank0 = left->getRank();
214     int rank1 = right->getRank();
215     const DataTypes::ShapeType& shape0 = left->getShape();
216     const DataTypes::ShapeType& shape1 = right->getShape();
217    
218     // Prepare for the loops of the product and verify compatibility of shapes
219     int start0=0, start1=0;
220     if (transpose == 0) {}
221     else if (transpose == 1) { start0 = axis_offset; }
222     else if (transpose == 2) { start1 = rank1-axis_offset; }
223     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
224    
225 jfenwick 2085 if (rank0<axis_offset)
226     {
227     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
228     }
229 jfenwick 2066
230     // Adjust the shapes for transpose
231     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
232     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
233     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
234     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
235    
236     // Prepare for the loops of the product
237     SL=1, SM=1, SR=1;
238     for (int i=0; i<rank0-axis_offset; i++) {
239     SL *= tmpShape0[i];
240     }
241     for (int i=rank0-axis_offset; i<rank0; i++) {
242     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
243     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
244     }
245     SM *= tmpShape0[i];
246     }
247     for (int i=axis_offset; i<rank1; i++) {
248     SR *= tmpShape1[i];
249     }
250    
251     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
252     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
253     { // block to limit the scope of out_index
254     int out_index=0;
255     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
256     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
257     }
258 jfenwick 2086
259     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
260     {
261     ostringstream os;
262     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
263     throw DataException(os.str());
264     }
265    
266 jfenwick 2066 return shape2;
267 jfenwick 1865 }
268    
269 jfenwick 2066
270     // determine the number of points in the result of "left op right"
271     // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
272     // size_t
273     // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
274     // {
275     // switch (getOpgroup(op))
276     // {
277     // case G_BINARY: return left->getLength();
278     // case G_UNARY: return left->getLength();
279     // case G_NP1OUT: return left->getLength();
280     // default:
281     // throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
282     // }
283     // }
284    
285 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
286 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
287 jfenwick 2066 // The same goes for G_TENSORPROD
288 jfenwick 1879 int
289     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
290     {
291 jfenwick 1886 switch(getOpgroup(op))
292 jfenwick 1879 {
293 jfenwick 1886 case G_IDENTITY: return 1;
294     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);
295 jfenwick 2147 case G_UNARY:
296     case G_UNARY_P: return max(left->getBuffsRequired(),1);
297 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
298 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
299 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
300 jfenwick 1879 default:
301     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
302     }
303     }
304    
305 jfenwick 1899
306 jfenwick 1865 } // end anonymous namespace
307    
308    
309 jfenwick 1899
310     // Return a string representing the operation
311 jfenwick 1865 const std::string&
312     opToString(ES_optype op)
313     {
314     if (op<0 || op>=ES_opcount)
315     {
316     op=UNKNOWNOP;
317     }
318     return ES_opstrings[op];
319     }
320    
321    
322     DataLazy::DataLazy(DataAbstract_ptr p)
323     : parent(p->getFunctionSpace(),p->getShape()),
324 jfenwick 2066 m_op(IDENTITY),
325     m_axis_offset(0),
326     m_transpose(0),
327     m_SL(0), m_SM(0), m_SR(0)
328 jfenwick 1865 {
329 jfenwick 1879 if (p->isLazy())
330     {
331     // I don't want identity of Lazy.
332     // Question: Why would that be so bad?
333     // Answer: We assume that the child of ID is something we can call getVector on
334     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
335     }
336     else
337     {
338     m_id=dynamic_pointer_cast<DataReady>(p);
339 jfenwick 1889 if(p->isConstant()) {m_readytype='C';}
340     else if(p->isExpanded()) {m_readytype='E';}
341     else if (p->isTagged()) {m_readytype='T';}
342     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
343 jfenwick 1879 }
344 jfenwick 1886 m_buffsRequired=1;
345 jfenwick 1879 m_samplesize=getNumDPPSample()*getNoValues();
346 jfenwick 2066 m_maxsamplesize=m_samplesize;
347 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
348 jfenwick 1865 }
349    
350 jfenwick 1899
351    
352 jfenwick 1901
353 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
354     : parent(left->getFunctionSpace(),left->getShape()),
355 jfenwick 2066 m_op(op),
356     m_axis_offset(0),
357     m_transpose(0),
358     m_SL(0), m_SM(0), m_SR(0)
359 jfenwick 1886 {
360 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
361 jfenwick 1886 {
362     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
363     }
364 jfenwick 2066
365 jfenwick 1886 DataLazy_ptr lleft;
366     if (!left->isLazy())
367     {
368     lleft=DataLazy_ptr(new DataLazy(left));
369     }
370     else
371     {
372     lleft=dynamic_pointer_cast<DataLazy>(left);
373     }
374 jfenwick 1889 m_readytype=lleft->m_readytype;
375 jfenwick 1886 m_left=lleft;
376 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
377 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
378 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
379 jfenwick 1886 }
380    
381    
382 jfenwick 1943 // In this constructor we need to consider interpolation
383 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
384     : parent(resultFS(left,right,op), resultShape(left,right,op)),
385 jfenwick 2066 m_op(op),
386     m_SL(0), m_SM(0), m_SR(0)
387 jfenwick 1879 {
388 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
389 jfenwick 1886 {
390 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
391 jfenwick 1886 }
392 jfenwick 1943
393     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
394     {
395     FunctionSpace fs=getFunctionSpace();
396     Data ltemp(left);
397     Data tmp(ltemp,fs);
398     left=tmp.borrowDataPtr();
399     }
400 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
401 jfenwick 1943 {
402     Data tmp(Data(right),getFunctionSpace());
403     right=tmp.borrowDataPtr();
404     }
405     left->operandCheck(*right);
406    
407 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
408 jfenwick 1879 {
409     m_left=dynamic_pointer_cast<DataLazy>(left);
410     }
411     else
412     {
413     m_left=DataLazy_ptr(new DataLazy(left));
414     }
415     if (right->isLazy())
416     {
417     m_right=dynamic_pointer_cast<DataLazy>(right);
418     }
419     else
420     {
421     m_right=DataLazy_ptr(new DataLazy(right));
422     }
423 jfenwick 1889 char lt=m_left->m_readytype;
424     char rt=m_right->m_readytype;
425     if (lt=='E' || rt=='E')
426     {
427     m_readytype='E';
428     }
429     else if (lt=='T' || rt=='T')
430     {
431     m_readytype='T';
432     }
433     else
434     {
435     m_readytype='C';
436     }
437 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
438     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
439 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
440 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
441 jfenwick 1879 }
442    
443 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
444     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
445     m_op(op),
446     m_axis_offset(axis_offset),
447     m_transpose(transpose)
448     {
449     if ((getOpgroup(op)!=G_TENSORPROD))
450     {
451     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
452     }
453     if ((transpose>2) || (transpose<0))
454     {
455     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
456     }
457     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
458     {
459     FunctionSpace fs=getFunctionSpace();
460     Data ltemp(left);
461     Data tmp(ltemp,fs);
462     left=tmp.borrowDataPtr();
463     }
464     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
465     {
466     Data tmp(Data(right),getFunctionSpace());
467     right=tmp.borrowDataPtr();
468     }
469     left->operandCheck(*right);
470 jfenwick 1879
471 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
472     {
473     m_left=dynamic_pointer_cast<DataLazy>(left);
474     }
475     else
476     {
477     m_left=DataLazy_ptr(new DataLazy(left));
478     }
479     if (right->isLazy())
480     {
481     m_right=dynamic_pointer_cast<DataLazy>(right);
482     }
483     else
484     {
485     m_right=DataLazy_ptr(new DataLazy(right));
486     }
487     char lt=m_left->m_readytype;
488     char rt=m_right->m_readytype;
489     if (lt=='E' || rt=='E')
490     {
491     m_readytype='E';
492     }
493     else if (lt=='T' || rt=='T')
494     {
495     m_readytype='T';
496     }
497     else
498     {
499     m_readytype='C';
500     }
501     m_samplesize=getNumDPPSample()*getNoValues();
502     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
503     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
504 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
505 jfenwick 2066 }
506    
507    
508 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
509     : parent(left->getFunctionSpace(), resultShape(left,op)),
510     m_op(op),
511     m_axis_offset(axis_offset),
512 jfenwick 2147 m_transpose(0),
513     m_tol(0)
514 jfenwick 2084 {
515     if ((getOpgroup(op)!=G_NP1OUT_P))
516     {
517     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
518     }
519     DataLazy_ptr lleft;
520     if (!left->isLazy())
521     {
522     lleft=DataLazy_ptr(new DataLazy(left));
523     }
524     else
525     {
526     lleft=dynamic_pointer_cast<DataLazy>(left);
527     }
528     m_readytype=lleft->m_readytype;
529     m_left=lleft;
530     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
531     m_samplesize=getNumDPPSample()*getNoValues();
532     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
533 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
534 jfenwick 2084 }
535    
536 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
537     : parent(left->getFunctionSpace(), left->getShape()),
538     m_op(op),
539     m_axis_offset(0),
540     m_transpose(0),
541     m_tol(tol)
542     {
543     if ((getOpgroup(op)!=G_UNARY_P))
544     {
545     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
546     }
547     DataLazy_ptr lleft;
548     if (!left->isLazy())
549     {
550     lleft=DataLazy_ptr(new DataLazy(left));
551     }
552     else
553     {
554     lleft=dynamic_pointer_cast<DataLazy>(left);
555     }
556     m_readytype=lleft->m_readytype;
557     m_left=lleft;
558     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
559     m_samplesize=getNumDPPSample()*getNoValues();
560     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
561     LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
562     }
563 jfenwick 2084
564 jfenwick 1865 DataLazy::~DataLazy()
565     {
566     }
567    
568 jfenwick 1879
569     int
570     DataLazy::getBuffsRequired() const
571 jfenwick 1865 {
572 jfenwick 1879 return m_buffsRequired;
573     }
574    
575 jfenwick 1884
576 jfenwick 2066 size_t
577     DataLazy::getMaxSampleSize() const
578     {
579     return m_maxsamplesize;
580     }
581    
582 jfenwick 1899 /*
583     \brief Evaluates the expression using methods on Data.
584     This does the work for the collapse method.
585     For reasons of efficiency do not call this method on DataExpanded nodes.
586     */
587 jfenwick 1889 DataReady_ptr
588     DataLazy::collapseToReady()
589     {
590     if (m_readytype=='E')
591     { // this is more an efficiency concern than anything else
592     throw DataException("Programmer Error - do not use collapse on Expanded data.");
593     }
594     if (m_op==IDENTITY)
595     {
596     return m_id;
597     }
598     DataReady_ptr pleft=m_left->collapseToReady();
599     Data left(pleft);
600     Data right;
601 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
602 jfenwick 1889 {
603     right=Data(m_right->collapseToReady());
604     }
605     Data result;
606     switch(m_op)
607     {
608     case ADD:
609     result=left+right;
610     break;
611     case SUB:
612     result=left-right;
613     break;
614     case MUL:
615     result=left*right;
616     break;
617     case DIV:
618     result=left/right;
619     break;
620     case SIN:
621     result=left.sin();
622     break;
623     case COS:
624     result=left.cos();
625     break;
626     case TAN:
627     result=left.tan();
628     break;
629     case ASIN:
630     result=left.asin();
631     break;
632     case ACOS:
633     result=left.acos();
634     break;
635     case ATAN:
636     result=left.atan();
637     break;
638     case SINH:
639     result=left.sinh();
640     break;
641     case COSH:
642     result=left.cosh();
643     break;
644     case TANH:
645     result=left.tanh();
646     break;
647     case ERF:
648     result=left.erf();
649     break;
650     case ASINH:
651     result=left.asinh();
652     break;
653     case ACOSH:
654     result=left.acosh();
655     break;
656     case ATANH:
657     result=left.atanh();
658     break;
659     case LOG10:
660     result=left.log10();
661     break;
662     case LOG:
663     result=left.log();
664     break;
665     case SIGN:
666     result=left.sign();
667     break;
668     case ABS:
669     result=left.abs();
670     break;
671     case NEG:
672     result=left.neg();
673     break;
674     case POS:
675     // it doesn't mean anything for delayed.
676     // it will just trigger a deep copy of the lazy object
677     throw DataException("Programmer error - POS not supported for lazy data.");
678     break;
679     case EXP:
680     result=left.exp();
681     break;
682     case SQRT:
683     result=left.sqrt();
684     break;
685     case RECIP:
686     result=left.oneOver();
687     break;
688     case GZ:
689     result=left.wherePositive();
690     break;
691     case LZ:
692     result=left.whereNegative();
693     break;
694     case GEZ:
695     result=left.whereNonNegative();
696     break;
697     case LEZ:
698     result=left.whereNonPositive();
699     break;
700 jfenwick 2147 case NEZ:
701     result=left.whereNonZero(m_tol);
702     break;
703     case EZ:
704     result=left.whereZero(m_tol);
705     break;
706 jfenwick 2037 case SYM:
707     result=left.symmetric();
708     break;
709     case NSYM:
710     result=left.nonsymmetric();
711     break;
712 jfenwick 2066 case PROD:
713     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
714     break;
715 jfenwick 2084 case TRANS:
716     result=left.transpose(m_axis_offset);
717     break;
718     case TRACE:
719     result=left.trace(m_axis_offset);
720     break;
721 jfenwick 1889 default:
722 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
723 jfenwick 1889 }
724     return result.borrowReadyPtr();
725     }
726    
727 jfenwick 1899 /*
728     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
729     This method uses the original methods on the Data class to evaluate the expressions.
730     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
731     the purpose of using DataLazy in the first place).
732     */
733 jfenwick 1889 void
734     DataLazy::collapse()
735     {
736     if (m_op==IDENTITY)
737     {
738     return;
739     }
740     if (m_readytype=='E')
741     { // this is more an efficiency concern than anything else
742     throw DataException("Programmer Error - do not use collapse on Expanded data.");
743     }
744     m_id=collapseToReady();
745     m_op=IDENTITY;
746     }
747    
748 jfenwick 1899 /*
749 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
750 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
751     \param v A vector to store intermediate results.
752     \param offset Index in v to begin storing results.
753     \param sampleNo Sample number to evaluate.
754     \param roffset (output parameter) the offset in the return vector where the result begins.
755    
756     The return value will be an existing vector so do not deallocate it.
757     If the result is stored in v it should be stored at the offset given.
758     Everything from offset to the end of v should be considered available for this method to use.
759     */
760 jfenwick 1898 DataTypes::ValueType*
761     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
762 jfenwick 1889 {
763     // we assume that any collapsing has been done before we get here
764     // since we only have one argument we don't need to think about only
765     // processing single points.
766     if (m_readytype!='E')
767     {
768     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
769     }
770 jfenwick 1898 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
771     const double* left=&((*vleft)[roffset]);
772 jfenwick 1889 double* result=&(v[offset]);
773 jfenwick 1898 roffset=offset;
774 jfenwick 1889 switch (m_op)
775     {
776     case SIN:
777 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
778 jfenwick 1889 break;
779     case COS:
780 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
781 jfenwick 1889 break;
782     case TAN:
783 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
784 jfenwick 1889 break;
785     case ASIN:
786 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
787 jfenwick 1889 break;
788     case ACOS:
789 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
790 jfenwick 1889 break;
791     case ATAN:
792 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
793 jfenwick 1889 break;
794     case SINH:
795 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
796 jfenwick 1889 break;
797     case COSH:
798 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
799 jfenwick 1889 break;
800     case TANH:
801 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
802 jfenwick 1889 break;
803     case ERF:
804 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
805 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
806     #else
807     tensor_unary_operation(m_samplesize, left, result, ::erf);
808     break;
809     #endif
810     case ASINH:
811 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
812 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
813     #else
814     tensor_unary_operation(m_samplesize, left, result, ::asinh);
815     #endif
816     break;
817     case ACOSH:
818 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
819 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
820     #else
821     tensor_unary_operation(m_samplesize, left, result, ::acosh);
822     #endif
823     break;
824     case ATANH:
825 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
826 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
827     #else
828     tensor_unary_operation(m_samplesize, left, result, ::atanh);
829     #endif
830     break;
831     case LOG10:
832 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
833 jfenwick 1889 break;
834     case LOG:
835 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
836 jfenwick 1889 break;
837     case SIGN:
838     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
839     break;
840     case ABS:
841 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
842 jfenwick 1889 break;
843     case NEG:
844     tensor_unary_operation(m_samplesize, left, result, negate<double>());
845     break;
846     case POS:
847     // it doesn't mean anything for delayed.
848     // it will just trigger a deep copy of the lazy object
849     throw DataException("Programmer error - POS not supported for lazy data.");
850     break;
851     case EXP:
852 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
853 jfenwick 1889 break;
854     case SQRT:
855 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
856 jfenwick 1889 break;
857     case RECIP:
858     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
859     break;
860     case GZ:
861     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
862     break;
863     case LZ:
864     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
865     break;
866     case GEZ:
867     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
868     break;
869     case LEZ:
870     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
871     break;
872 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
873     case NEZ:
874     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
875     break;
876     case EZ:
877     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
878     break;
879 jfenwick 1889
880     default:
881     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
882     }
883 jfenwick 1898 return &v;
884 jfenwick 1889 }
885    
886    
887 jfenwick 2147
888    
889    
890    
891 jfenwick 2037 /*
892     \brief Compute the value of the expression (unary operation) for the given sample.
893     \return Vector which stores the value of the subexpression for the given sample.
894     \param v A vector to store intermediate results.
895     \param offset Index in v to begin storing results.
896     \param sampleNo Sample number to evaluate.
897     \param roffset (output parameter) the offset in the return vector where the result begins.
898 jfenwick 1898
899 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
900     If the result is stored in v it should be stored at the offset given.
901     Everything from offset to the end of v should be considered available for this method to use.
902     */
903     DataTypes::ValueType*
904     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
905     {
906     // we assume that any collapsing has been done before we get here
907     // since we only have one argument we don't need to think about only
908     // processing single points.
909     if (m_readytype!='E')
910     {
911     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
912     }
913     // since we can't write the result over the input, we need a result offset further along
914     size_t subroffset=roffset+m_samplesize;
915     const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
916     roffset=offset;
917     switch (m_op)
918     {
919     case SYM:
920     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
921     break;
922     case NSYM:
923     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
924     break;
925     default:
926     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
927     }
928     return &v;
929     }
930 jfenwick 1898
931 jfenwick 2084 /*
932     \brief Compute the value of the expression (unary operation) for the given sample.
933     \return Vector which stores the value of the subexpression for the given sample.
934     \param v A vector to store intermediate results.
935     \param offset Index in v to begin storing results.
936     \param sampleNo Sample number to evaluate.
937     \param roffset (output parameter) the offset in the return vector where the result begins.
938 jfenwick 1899
939 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
940     If the result is stored in v it should be stored at the offset given.
941     Everything from offset to the end of v should be considered available for this method to use.
942     */
943     DataTypes::ValueType*
944     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
945     {
946     // we assume that any collapsing has been done before we get here
947     // since we only have one argument we don't need to think about only
948     // processing single points.
949     if (m_readytype!='E')
950     {
951     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
952     }
953     // since we can't write the result over the input, we need a result offset further along
954     size_t subroffset=roffset+m_samplesize;
955     const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
956     roffset=offset;
957     switch (m_op)
958     {
959     case TRACE:
960     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
961     break;
962     case TRANS:
963     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
964     break;
965     default:
966     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
967     }
968     return &v;
969     }
970 jfenwick 2037
971    
972 phornby 1987 #define PROC_OP(TYPE,X) \
973 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
974     {\
975     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
976     { \
977     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
978     tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
979     lroffset+=leftstep; \
980     rroffset+=rightstep; \
981     }\
982     lroffset+=oleftstep;\
983     rroffset+=orightstep;\
984 jfenwick 1889 }
985    
986 jfenwick 1899 /*
987     \brief Compute the value of the expression (binary operation) for the given sample.
988     \return Vector which stores the value of the subexpression for the given sample.
989     \param v A vector to store intermediate results.
990     \param offset Index in v to begin storing results.
991     \param sampleNo Sample number to evaluate.
992     \param roffset (output parameter) the offset in the return vector where the result begins.
993    
994     The return value will be an existing vector so do not deallocate it.
995     If the result is stored in v it should be stored at the offset given.
996     Everything from offset to the end of v should be considered available for this method to use.
997     */
998     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
999     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1000     // If both children are expanded, then we can process them in a single operation (we treat
1001     // the whole sample as one big datapoint.
1002     // If one of the children is not expanded, then we need to treat each point in the sample
1003     // individually.
1004 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1005     // For example, 2+Vector.
1006     // In this case each double within the point is treated individually
1007 jfenwick 1898 DataTypes::ValueType*
1008 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1009 jfenwick 1889 {
1010 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1011 jfenwick 1889
1012 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1013     // first work out which of the children are expanded
1014 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1015     bool rightExp=(m_right->m_readytype=='E');
1016 jfenwick 2084 if (!leftExp && !rightExp)
1017     {
1018     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1019     }
1020     bool leftScalar=(m_left->getRank()==0);
1021     bool rightScalar=(m_right->getRank()==0);
1022 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1023 jfenwick 1908 {
1024 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1025     }
1026     size_t leftsize=m_left->getNoValues();
1027     size_t rightsize=m_right->getNoValues();
1028     size_t chunksize=1; // how many doubles will be processed in one go
1029     int leftstep=0; // how far should the left offset advance after each step
1030     int rightstep=0;
1031     int numsteps=0; // total number of steps for the inner loop
1032     int oleftstep=0; // the o variables refer to the outer loop
1033     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1034     int onumsteps=1;
1035    
1036     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1037     bool RES=(rightExp && rightScalar);
1038     bool LS=(!leftExp && leftScalar); // left is a single scalar
1039     bool RS=(!rightExp && rightScalar);
1040     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1041     bool RN=(!rightExp && !rightScalar);
1042     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1043     bool REN=(rightExp && !rightScalar);
1044    
1045     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1046     {
1047     chunksize=m_left->getNumDPPSample()*leftsize;
1048     leftstep=0;
1049     rightstep=0;
1050     numsteps=1;
1051     }
1052     else if (LES || RES)
1053     {
1054     chunksize=1;
1055     if (LES) // left is an expanded scalar
1056 jfenwick 2084 {
1057 jfenwick 2152 if (RS)
1058     {
1059     leftstep=1;
1060     rightstep=0;
1061     numsteps=m_left->getNumDPPSample();
1062     }
1063     else // RN or REN
1064     {
1065     leftstep=0;
1066     oleftstep=1;
1067     rightstep=1;
1068     orightstep=(RN?-rightsize:0);
1069     numsteps=rightsize;
1070     onumsteps=m_left->getNumDPPSample();
1071     }
1072 jfenwick 2084 }
1073 jfenwick 2152 else // right is an expanded scalar
1074     {
1075     if (LS)
1076     {
1077     rightstep=1;
1078     leftstep=0;
1079     numsteps=m_right->getNumDPPSample();
1080     }
1081     else
1082     {
1083     rightstep=0;
1084     orightstep=1;
1085     leftstep=1;
1086     oleftstep=(LN?-leftsize:0);
1087     numsteps=leftsize;
1088     onumsteps=m_right->getNumDPPSample();
1089     }
1090     }
1091     }
1092     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1093     {
1094     if (LEN) // and Right will be a single value
1095     {
1096     chunksize=rightsize;
1097     leftstep=rightsize;
1098     rightstep=0;
1099     numsteps=m_left->getNumDPPSample();
1100     if (RS)
1101     {
1102     numsteps*=leftsize;
1103     }
1104     }
1105     else // REN
1106     {
1107     chunksize=leftsize;
1108     rightstep=leftsize;
1109     leftstep=0;
1110     numsteps=m_right->getNumDPPSample();
1111     if (LS)
1112     {
1113     numsteps*=rightsize;
1114     }
1115     }
1116     }
1117    
1118     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1119 jfenwick 1899 // Get the values of sub-expressions
1120 jfenwick 1898 const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);
1121 jfenwick 1899 const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note
1122     // the right child starts further along.
1123 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1124     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1125     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1126     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1127     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1128     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1129     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1130 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1131 jfenwick 1889 switch(m_op)
1132     {
1133     case ADD:
1134 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1135 jfenwick 1889 break;
1136 jfenwick 1899 case SUB:
1137 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1138 jfenwick 1899 break;
1139     case MUL:
1140 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1141 jfenwick 1899 break;
1142     case DIV:
1143 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1144 jfenwick 1899 break;
1145 jfenwick 1910 case POW:
1146 phornby 1994 PROC_OP(double (double,double),::pow);
1147 jfenwick 1910 break;
1148 jfenwick 1889 default:
1149 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1150 jfenwick 1889 }
1151 jfenwick 1899 roffset=offset;
1152 jfenwick 1898 return &v;
1153 jfenwick 1889 }
1154    
1155 jfenwick 1898
1156 jfenwick 2152
1157 jfenwick 2066 /*
1158     \brief Compute the value of the expression (tensor product) for the given sample.
1159     \return Vector which stores the value of the subexpression for the given sample.
1160     \param v A vector to store intermediate results.
1161     \param offset Index in v to begin storing results.
1162     \param sampleNo Sample number to evaluate.
1163     \param roffset (output parameter) the offset in the return vector where the result begins.
1164 jfenwick 1898
1165 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1166     If the result is stored in v it should be stored at the offset given.
1167     Everything from offset to the end of v should be considered available for this method to use.
1168     */
1169     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1170     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1171     // unlike the other resolve helpers, we must treat these datapoints separately.
1172     DataTypes::ValueType*
1173     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1174     {
1175 jfenwick 2092 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1176 jfenwick 2066
1177     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1178     // first work out which of the children are expanded
1179     bool leftExp=(m_left->m_readytype=='E');
1180     bool rightExp=(m_right->m_readytype=='E');
1181     int steps=getNumDPPSample();
1182     int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1183     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1184     int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1185     // Get the values of sub-expressions (leave a gap of one sample for the result).
1186     const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);
1187     const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);
1188     double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1189     switch(m_op)
1190     {
1191     case PROD:
1192     for (int i=0;i<steps;++i,resultp+=resultStep)
1193     {
1194     const double *ptr_0 = &((*left)[lroffset]);
1195     const double *ptr_1 = &((*right)[rroffset]);
1196     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1197     lroffset+=leftStep;
1198     rroffset+=rightStep;
1199     }
1200     break;
1201     default:
1202     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1203     }
1204     roffset=offset;
1205     return &v;
1206     }
1207    
1208    
1209    
1210 jfenwick 1899 /*
1211     \brief Compute the value of the expression for the given sample.
1212     \return Vector which stores the value of the subexpression for the given sample.
1213     \param v A vector to store intermediate results.
1214     \param offset Index in v to begin storing results.
1215     \param sampleNo Sample number to evaluate.
1216     \param roffset (output parameter) the offset in the return vector where the result begins.
1217 jfenwick 1898
1218 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
1219     */
1220 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
1221     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1222 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
1223     // Regardless, the storage should be assumed to be used, even if it isn't.
1224 jfenwick 1898
1225     // the roffset is the offset within the returned vector where the data begins
1226     const DataTypes::ValueType*
1227     DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1228 jfenwick 1879 {
1229 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1230 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
1231     if (m_readytype!='E' && m_op!=IDENTITY)
1232     {
1233     collapse();
1234     }
1235 jfenwick 1885 if (m_op==IDENTITY)
1236 jfenwick 1865 {
1237 jfenwick 1879 const ValueType& vec=m_id->getVector();
1238 jfenwick 1889 if (m_readytype=='C')
1239     {
1240 jfenwick 1898 roffset=0;
1241 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1242 jfenwick 1898 return &(vec);
1243 jfenwick 1889 }
1244 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
1245 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1246 jfenwick 1898 return &(vec);
1247 jfenwick 1865 }
1248 jfenwick 1889 if (m_readytype!='E')
1249     {
1250     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1251     }
1252     switch (getOpgroup(m_op))
1253     {
1254 jfenwick 2147 case G_UNARY:
1255     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1256 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1257 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1258 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1259 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1260 jfenwick 1889 default:
1261     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1262     }
1263 jfenwick 2152
1264 jfenwick 1889 }
1265    
1266    
1267 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1268     // Each sample is evaluated independently and copied into the result DataExpanded.
1269 jfenwick 1879 DataReady_ptr
1270     DataLazy::resolve()
1271     {
1272    
1273 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1274     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1275 jfenwick 1879
1276 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1277 jfenwick 1889 {
1278     collapse();
1279     }
1280 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1281 jfenwick 1889 {
1282     return m_id;
1283     }
1284     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1285 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1286 jfenwick 1899 // storage to evaluate its expression
1287 jfenwick 1885 int numthreads=1;
1288     #ifdef _OPENMP
1289 jfenwick 1889 numthreads=getNumberOfThreads();
1290 jfenwick 1885 #endif
1291 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
1292 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1293 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1294 jfenwick 1879 ValueType& resvec=result->getVector();
1295     DataReady_ptr resptr=DataReady_ptr(result);
1296     int sample;
1297 jfenwick 1898 size_t outoffset; // offset in the output data
1298 jfenwick 1885 int totalsamples=getNumSamples();
1299 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
1300     size_t resoffset=0; // where in the vector to find the answer
1301 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1302 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1303 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
1304 jfenwick 1879 {
1305 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
1306 jfenwick 1885 #ifdef _OPENMP
1307 jfenwick 1898 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1308 jfenwick 1885 #else
1309 jfenwick 1899 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1310 jfenwick 1885 #endif
1311 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1312 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
1313 jfenwick 2092 LAZYDEBUG(cerr << "offset=" << outoffset << endl;)
1314 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1315 jfenwick 1879 {
1316 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
1317 jfenwick 1879 }
1318 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
1319 jfenwick 1879 }
1320     return resptr;
1321     }
1322    
1323 jfenwick 1865 std::string
1324     DataLazy::toString() const
1325     {
1326 jfenwick 1886 ostringstream oss;
1327     oss << "Lazy Data:";
1328     intoString(oss);
1329     return oss.str();
1330 jfenwick 1865 }
1331    
1332 jfenwick 1899
1333 jfenwick 1886 void
1334     DataLazy::intoString(ostringstream& oss) const
1335     {
1336     switch (getOpgroup(m_op))
1337     {
1338 jfenwick 1889 case G_IDENTITY:
1339     if (m_id->isExpanded())
1340     {
1341     oss << "E";
1342     }
1343     else if (m_id->isTagged())
1344     {
1345     oss << "T";
1346     }
1347     else if (m_id->isConstant())
1348     {
1349     oss << "C";
1350     }
1351     else
1352     {
1353     oss << "?";
1354     }
1355 jfenwick 1886 oss << '@' << m_id.get();
1356     break;
1357     case G_BINARY:
1358     oss << '(';
1359     m_left->intoString(oss);
1360     oss << ' ' << opToString(m_op) << ' ';
1361     m_right->intoString(oss);
1362     oss << ')';
1363     break;
1364     case G_UNARY:
1365 jfenwick 2147 case G_UNARY_P:
1366 jfenwick 2037 case G_NP1OUT:
1367 jfenwick 2084 case G_NP1OUT_P:
1368 jfenwick 1886 oss << opToString(m_op) << '(';
1369     m_left->intoString(oss);
1370     oss << ')';
1371     break;
1372 jfenwick 2066 case G_TENSORPROD:
1373     oss << opToString(m_op) << '(';
1374     m_left->intoString(oss);
1375     oss << ", ";
1376     m_right->intoString(oss);
1377     oss << ')';
1378     break;
1379 jfenwick 1886 default:
1380     oss << "UNKNOWN";
1381     }
1382     }
1383    
1384 jfenwick 1865 DataAbstract*
1385     DataLazy::deepCopy()
1386     {
1387 jfenwick 1935 switch (getOpgroup(m_op))
1388 jfenwick 1865 {
1389 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1390     case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1391     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1392 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1393     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1394 jfenwick 1935 default:
1395     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1396 jfenwick 1865 }
1397     }
1398    
1399    
1400 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1401     // Instances of DataReady can look at the size of their vectors.
1402     // For lazy though, it could be the size the data would be if it were resolved;
1403     // or it could be some function of the lengths of the DataReady instances which
1404     // form part of the expression.
1405     // Rather than have people making assumptions, I have disabled the method.
1406 jfenwick 1865 DataTypes::ValueType::size_type
1407     DataLazy::getLength() const
1408     {
1409 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1410 jfenwick 1865 }
1411    
1412    
1413     DataAbstract*
1414     DataLazy::getSlice(const DataTypes::RegionType& region) const
1415     {
1416 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1417 jfenwick 1865 }
1418    
1419 jfenwick 1935
1420     // To do this we need to rely on our child nodes
1421 jfenwick 1865 DataTypes::ValueType::size_type
1422     DataLazy::getPointOffset(int sampleNo,
1423 jfenwick 1935 int dataPointNo)
1424     {
1425     if (m_op==IDENTITY)
1426     {
1427     return m_id->getPointOffset(sampleNo,dataPointNo);
1428     }
1429     if (m_readytype!='E')
1430     {
1431     collapse();
1432     return m_id->getPointOffset(sampleNo,dataPointNo);
1433     }
1434     // at this point we do not have an identity node and the expression will be Expanded
1435     // so we only need to know which child to ask
1436     if (m_left->m_readytype=='E')
1437     {
1438     return m_left->getPointOffset(sampleNo,dataPointNo);
1439     }
1440     else
1441     {
1442     return m_right->getPointOffset(sampleNo,dataPointNo);
1443     }
1444     }
1445    
1446     // To do this we need to rely on our child nodes
1447     DataTypes::ValueType::size_type
1448     DataLazy::getPointOffset(int sampleNo,
1449 jfenwick 1865 int dataPointNo) const
1450     {
1451 jfenwick 1935 if (m_op==IDENTITY)
1452     {
1453     return m_id->getPointOffset(sampleNo,dataPointNo);
1454     }
1455     if (m_readytype=='E')
1456     {
1457     // at this point we do not have an identity node and the expression will be Expanded
1458     // so we only need to know which child to ask
1459     if (m_left->m_readytype=='E')
1460     {
1461     return m_left->getPointOffset(sampleNo,dataPointNo);
1462     }
1463     else
1464     {
1465     return m_right->getPointOffset(sampleNo,dataPointNo);
1466     }
1467     }
1468     if (m_readytype=='C')
1469     {
1470     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1471     }
1472     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1473 jfenwick 1865 }
1474    
1475 jfenwick 1901 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1476     // to zero, all the tags from all the DataTags would be in the result.
1477     // However since they all have the same value (0) whether they are there or not should not matter.
1478     // So I have decided that for all types this method will create a constant 0.
1479     // It can be promoted up as required.
1480     // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1481     // but we can deal with that if it arrises.
1482     void
1483     DataLazy::setToZero()
1484     {
1485     DataTypes::ValueType v(getNoValues(),0);
1486     m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1487     m_op=IDENTITY;
1488     m_right.reset();
1489     m_left.reset();
1490     m_readytype='C';
1491     m_buffsRequired=1;
1492     }
1493    
1494 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26