/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2177 - (hide annotations)
Wed Dec 17 23:51:23 2008 UTC (10 years, 3 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 51708 byte(s)
used unused variable.
Added support for limiting the size of lazy expression trees.
500,000 seems a bit high
1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 jfenwick 2177 #define SIZELIMIT
42     // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
43    
44    
45 jfenwick 1899 /*
46     How does DataLazy work?
47     ~~~~~~~~~~~~~~~~~~~~~~~
48    
49     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
50     denoted left and right.
51    
52     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
53     This means that all "internal" nodes in the structure are instances of DataLazy.
54    
55     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
56     Note that IDENITY is not considered a unary operation.
57    
58     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
59     It must however form a DAG (directed acyclic graph).
60     I will refer to individual DataLazy objects with the structure as nodes.
61    
62     Each node also stores:
63     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
64     evaluated.
65     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
66     evaluate the expression.
67     - m_samplesize ~ the number of doubles stored in a sample.
68    
69     When a new node is created, the above values are computed based on the values in the child nodes.
70     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
71    
72     The resolve method, which produces a DataReady from a DataLazy, does the following:
73     1) Create a DataReady to hold the new result.
74     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
75     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
76    
77     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
78    
79     resolveSample returns a Vector* and an offset within that vector where the result is stored.
80     Normally, this would be v, but for identity nodes their internal vector is returned instead.
81    
82     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
83    
84     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
85     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
86 jfenwick 2037
87 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
88 jfenwick 2037 1) Add to the ES_optype.
89     2) determine what opgroup your operation belongs to (X)
90     3) add a string for the op to the end of ES_opstrings
91     4) increase ES_opcount
92     5) add an entry (X) to opgroups
93     6) add an entry to the switch in collapseToReady
94     7) add an entry to resolveX
95 jfenwick 1899 */
96    
97    
98 jfenwick 1865 using namespace std;
99 jfenwick 1868 using namespace boost;
100 jfenwick 1865
101     namespace escript
102     {
103    
104     namespace
105     {
106    
107 jfenwick 1886 enum ES_opgroup
108     {
109     G_UNKNOWN,
110     G_IDENTITY,
111 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
112 jfenwick 2037 G_UNARY, // pointwise operations with one argument
113 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
114 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
115 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
116 jfenwick 2066 G_TENSORPROD // general tensor product
117 jfenwick 1886 };
118    
119    
120    
121    
122 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
123     "sin","cos","tan",
124 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
125     "asinh","acosh","atanh",
126 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
127 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
128 jfenwick 2066 "symmetric","nonsymmetric",
129 jfenwick 2084 "prod",
130 jfenwick 2147 "transpose", "trace"};
131     int ES_opcount=40;
132 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
133     G_UNARY,G_UNARY,G_UNARY, //10
134     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
135     G_UNARY,G_UNARY,G_UNARY, // 20
136 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
137     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
138 jfenwick 2066 G_NP1OUT,G_NP1OUT,
139 jfenwick 2084 G_TENSORPROD,
140     G_NP1OUT_P, G_NP1OUT_P};
141 jfenwick 1886 inline
142     ES_opgroup
143     getOpgroup(ES_optype op)
144     {
145     return opgroups[op];
146     }
147    
148 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
149     FunctionSpace
150     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
151     {
152     // perhaps this should call interpolate and throw or something?
153     // maybe we need an interpolate node -
154     // that way, if interpolate is required in any other op we can just throw a
155     // programming error exception.
156 jfenwick 1879
157 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
158     FunctionSpace r=right->getFunctionSpace();
159     if (l!=r)
160     {
161     if (r.probeInterpolation(l))
162     {
163     return l;
164     }
165     if (l.probeInterpolation(r))
166     {
167     return r;
168     }
169     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
170     }
171     return l;
172 jfenwick 1865 }
173    
174     // return the shape of the result of "left op right"
175 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
176 jfenwick 1865 DataTypes::ShapeType
177     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
178     {
179 jfenwick 1879 if (left->getShape()!=right->getShape())
180     {
181 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
182 jfenwick 1908 {
183     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
184     }
185     if (left->getRank()==0) // we need to allow scalar * anything
186     {
187     return right->getShape();
188     }
189     if (right->getRank()==0)
190     {
191     return left->getShape();
192     }
193     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
194 jfenwick 1879 }
195     return left->getShape();
196 jfenwick 1865 }
197    
198 jfenwick 2084 // return the shape for "op left"
199    
200     DataTypes::ShapeType
201     resultShape(DataAbstract_ptr left, ES_optype op)
202     {
203     switch(op)
204     {
205     case TRANS:
206     return left->getShape();
207     break;
208     case TRACE:
209     return DataTypes::scalarShape;
210     break;
211     default:
212     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
213     }
214     }
215    
216 jfenwick 2066 // determine the output shape for the general tensor product operation
217     // the additional parameters return information required later for the product
218     // the majority of this code is copy pasted from C_General_Tensor_Product
219     DataTypes::ShapeType
220     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
221 jfenwick 1865 {
222 jfenwick 2066
223     // Get rank and shape of inputs
224     int rank0 = left->getRank();
225     int rank1 = right->getRank();
226     const DataTypes::ShapeType& shape0 = left->getShape();
227     const DataTypes::ShapeType& shape1 = right->getShape();
228    
229     // Prepare for the loops of the product and verify compatibility of shapes
230     int start0=0, start1=0;
231     if (transpose == 0) {}
232     else if (transpose == 1) { start0 = axis_offset; }
233     else if (transpose == 2) { start1 = rank1-axis_offset; }
234     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
235    
236 jfenwick 2085 if (rank0<axis_offset)
237     {
238     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
239     }
240 jfenwick 2066
241     // Adjust the shapes for transpose
242     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
243     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
244     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
245     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
246    
247     // Prepare for the loops of the product
248     SL=1, SM=1, SR=1;
249     for (int i=0; i<rank0-axis_offset; i++) {
250     SL *= tmpShape0[i];
251     }
252     for (int i=rank0-axis_offset; i<rank0; i++) {
253     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
254     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
255     }
256     SM *= tmpShape0[i];
257     }
258     for (int i=axis_offset; i<rank1; i++) {
259     SR *= tmpShape1[i];
260     }
261    
262     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
263     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
264     { // block to limit the scope of out_index
265     int out_index=0;
266     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
267     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
268     }
269 jfenwick 2086
270     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
271     {
272     ostringstream os;
273     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
274     throw DataException(os.str());
275     }
276    
277 jfenwick 2066 return shape2;
278 jfenwick 1865 }
279    
280 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
281 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
282 jfenwick 2066 // The same goes for G_TENSORPROD
283 jfenwick 2157 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
284     // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
285     // multiple times
286 jfenwick 1879 int
287     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
288     {
289 jfenwick 1886 switch(getOpgroup(op))
290 jfenwick 1879 {
291 jfenwick 1886 case G_IDENTITY: return 1;
292 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
293 jfenwick 2147 case G_UNARY:
294     case G_UNARY_P: return max(left->getBuffsRequired(),1);
295 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
296 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
297 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
298 jfenwick 1879 default:
299     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
300     }
301     }
302    
303 jfenwick 1899
304 jfenwick 1865 } // end anonymous namespace
305    
306    
307 jfenwick 1899
308     // Return a string representing the operation
309 jfenwick 1865 const std::string&
310     opToString(ES_optype op)
311     {
312     if (op<0 || op>=ES_opcount)
313     {
314     op=UNKNOWNOP;
315     }
316     return ES_opstrings[op];
317     }
318    
319    
320     DataLazy::DataLazy(DataAbstract_ptr p)
321 jfenwick 2177 : parent(p->getFunctionSpace(),p->getShape())
322 jfenwick 1865 {
323 jfenwick 1879 if (p->isLazy())
324     {
325     // I don't want identity of Lazy.
326     // Question: Why would that be so bad?
327     // Answer: We assume that the child of ID is something we can call getVector on
328     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
329     }
330     else
331     {
332 jfenwick 2177 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
333     makeIdentity(dr);
334 jfenwick 1879 }
335 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
336 jfenwick 1865 }
337    
338 jfenwick 1899
339    
340 jfenwick 1901
341 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
342     : parent(left->getFunctionSpace(),left->getShape()),
343 jfenwick 2066 m_op(op),
344     m_axis_offset(0),
345     m_transpose(0),
346     m_SL(0), m_SM(0), m_SR(0)
347 jfenwick 1886 {
348 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
349 jfenwick 1886 {
350     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
351     }
352 jfenwick 2066
353 jfenwick 1886 DataLazy_ptr lleft;
354     if (!left->isLazy())
355     {
356     lleft=DataLazy_ptr(new DataLazy(left));
357     }
358     else
359     {
360     lleft=dynamic_pointer_cast<DataLazy>(left);
361     }
362 jfenwick 1889 m_readytype=lleft->m_readytype;
363 jfenwick 1886 m_left=lleft;
364 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
365 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
366 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
367 jfenwick 2177 m_children=m_left->m_children+1;
368     m_height=m_left->m_height+1;
369     SIZELIMIT
370 jfenwick 1886 }
371    
372    
373 jfenwick 1943 // In this constructor we need to consider interpolation
374 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
375     : parent(resultFS(left,right,op), resultShape(left,right,op)),
376 jfenwick 2066 m_op(op),
377     m_SL(0), m_SM(0), m_SR(0)
378 jfenwick 1879 {
379 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
380 jfenwick 1886 {
381 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
382 jfenwick 1886 }
383 jfenwick 1943
384     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
385     {
386     FunctionSpace fs=getFunctionSpace();
387     Data ltemp(left);
388     Data tmp(ltemp,fs);
389     left=tmp.borrowDataPtr();
390     }
391 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
392 jfenwick 1943 {
393     Data tmp(Data(right),getFunctionSpace());
394     right=tmp.borrowDataPtr();
395     }
396     left->operandCheck(*right);
397    
398 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
399 jfenwick 1879 {
400     m_left=dynamic_pointer_cast<DataLazy>(left);
401     }
402     else
403     {
404     m_left=DataLazy_ptr(new DataLazy(left));
405     }
406     if (right->isLazy())
407     {
408     m_right=dynamic_pointer_cast<DataLazy>(right);
409     }
410     else
411     {
412     m_right=DataLazy_ptr(new DataLazy(right));
413     }
414 jfenwick 1889 char lt=m_left->m_readytype;
415     char rt=m_right->m_readytype;
416     if (lt=='E' || rt=='E')
417     {
418     m_readytype='E';
419     }
420     else if (lt=='T' || rt=='T')
421     {
422     m_readytype='T';
423     }
424     else
425     {
426     m_readytype='C';
427     }
428 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
429     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
430 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
431 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
432     m_height=max(m_left->m_height,m_right->m_height)+1;
433     SIZELIMIT
434 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
435 jfenwick 1879 }
436    
437 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
438     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
439     m_op(op),
440     m_axis_offset(axis_offset),
441     m_transpose(transpose)
442     {
443     if ((getOpgroup(op)!=G_TENSORPROD))
444     {
445     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
446     }
447     if ((transpose>2) || (transpose<0))
448     {
449     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
450     }
451     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
452     {
453     FunctionSpace fs=getFunctionSpace();
454     Data ltemp(left);
455     Data tmp(ltemp,fs);
456     left=tmp.borrowDataPtr();
457     }
458     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
459     {
460     Data tmp(Data(right),getFunctionSpace());
461     right=tmp.borrowDataPtr();
462     }
463     left->operandCheck(*right);
464 jfenwick 1879
465 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
466     {
467     m_left=dynamic_pointer_cast<DataLazy>(left);
468     }
469     else
470     {
471     m_left=DataLazy_ptr(new DataLazy(left));
472     }
473     if (right->isLazy())
474     {
475     m_right=dynamic_pointer_cast<DataLazy>(right);
476     }
477     else
478     {
479     m_right=DataLazy_ptr(new DataLazy(right));
480     }
481     char lt=m_left->m_readytype;
482     char rt=m_right->m_readytype;
483     if (lt=='E' || rt=='E')
484     {
485     m_readytype='E';
486     }
487     else if (lt=='T' || rt=='T')
488     {
489     m_readytype='T';
490     }
491     else
492     {
493     m_readytype='C';
494     }
495     m_samplesize=getNumDPPSample()*getNoValues();
496     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
497     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
498 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
499     m_height=max(m_left->m_height,m_right->m_height)+1;
500     SIZELIMIT
501 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
502 jfenwick 2066 }
503    
504    
505 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
506     : parent(left->getFunctionSpace(), resultShape(left,op)),
507     m_op(op),
508     m_axis_offset(axis_offset),
509 jfenwick 2147 m_transpose(0),
510     m_tol(0)
511 jfenwick 2084 {
512     if ((getOpgroup(op)!=G_NP1OUT_P))
513     {
514     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
515     }
516     DataLazy_ptr lleft;
517     if (!left->isLazy())
518     {
519     lleft=DataLazy_ptr(new DataLazy(left));
520     }
521     else
522     {
523     lleft=dynamic_pointer_cast<DataLazy>(left);
524     }
525     m_readytype=lleft->m_readytype;
526     m_left=lleft;
527     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
528     m_samplesize=getNumDPPSample()*getNoValues();
529     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
530 jfenwick 2177 m_children=m_left->m_children+1;
531     m_height=m_left->m_height+1;
532     SIZELIMIT
533 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
534 jfenwick 2084 }
535    
536 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
537     : parent(left->getFunctionSpace(), left->getShape()),
538     m_op(op),
539     m_axis_offset(0),
540     m_transpose(0),
541     m_tol(tol)
542     {
543     if ((getOpgroup(op)!=G_UNARY_P))
544     {
545     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
546     }
547     DataLazy_ptr lleft;
548     if (!left->isLazy())
549     {
550     lleft=DataLazy_ptr(new DataLazy(left));
551     }
552     else
553     {
554     lleft=dynamic_pointer_cast<DataLazy>(left);
555     }
556     m_readytype=lleft->m_readytype;
557     m_left=lleft;
558     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
559     m_samplesize=getNumDPPSample()*getNoValues();
560     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
561 jfenwick 2177 m_children=m_left->m_children+1;
562     m_height=m_left->m_height+1;
563     SIZELIMIT
564 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
565     }
566 jfenwick 2084
567 jfenwick 1865 DataLazy::~DataLazy()
568     {
569     }
570    
571 jfenwick 1879
572     int
573     DataLazy::getBuffsRequired() const
574 jfenwick 1865 {
575 jfenwick 1879 return m_buffsRequired;
576     }
577    
578 jfenwick 1884
579 jfenwick 2066 size_t
580     DataLazy::getMaxSampleSize() const
581     {
582     return m_maxsamplesize;
583     }
584    
585 jfenwick 1899 /*
586     \brief Evaluates the expression using methods on Data.
587     This does the work for the collapse method.
588     For reasons of efficiency do not call this method on DataExpanded nodes.
589     */
590 jfenwick 1889 DataReady_ptr
591     DataLazy::collapseToReady()
592     {
593     if (m_readytype=='E')
594     { // this is more an efficiency concern than anything else
595     throw DataException("Programmer Error - do not use collapse on Expanded data.");
596     }
597     if (m_op==IDENTITY)
598     {
599     return m_id;
600     }
601     DataReady_ptr pleft=m_left->collapseToReady();
602     Data left(pleft);
603     Data right;
604 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
605 jfenwick 1889 {
606     right=Data(m_right->collapseToReady());
607     }
608     Data result;
609     switch(m_op)
610     {
611     case ADD:
612     result=left+right;
613     break;
614     case SUB:
615     result=left-right;
616     break;
617     case MUL:
618     result=left*right;
619     break;
620     case DIV:
621     result=left/right;
622     break;
623     case SIN:
624     result=left.sin();
625     break;
626     case COS:
627     result=left.cos();
628     break;
629     case TAN:
630     result=left.tan();
631     break;
632     case ASIN:
633     result=left.asin();
634     break;
635     case ACOS:
636     result=left.acos();
637     break;
638     case ATAN:
639     result=left.atan();
640     break;
641     case SINH:
642     result=left.sinh();
643     break;
644     case COSH:
645     result=left.cosh();
646     break;
647     case TANH:
648     result=left.tanh();
649     break;
650     case ERF:
651     result=left.erf();
652     break;
653     case ASINH:
654     result=left.asinh();
655     break;
656     case ACOSH:
657     result=left.acosh();
658     break;
659     case ATANH:
660     result=left.atanh();
661     break;
662     case LOG10:
663     result=left.log10();
664     break;
665     case LOG:
666     result=left.log();
667     break;
668     case SIGN:
669     result=left.sign();
670     break;
671     case ABS:
672     result=left.abs();
673     break;
674     case NEG:
675     result=left.neg();
676     break;
677     case POS:
678     // it doesn't mean anything for delayed.
679     // it will just trigger a deep copy of the lazy object
680     throw DataException("Programmer error - POS not supported for lazy data.");
681     break;
682     case EXP:
683     result=left.exp();
684     break;
685     case SQRT:
686     result=left.sqrt();
687     break;
688     case RECIP:
689     result=left.oneOver();
690     break;
691     case GZ:
692     result=left.wherePositive();
693     break;
694     case LZ:
695     result=left.whereNegative();
696     break;
697     case GEZ:
698     result=left.whereNonNegative();
699     break;
700     case LEZ:
701     result=left.whereNonPositive();
702     break;
703 jfenwick 2147 case NEZ:
704     result=left.whereNonZero(m_tol);
705     break;
706     case EZ:
707     result=left.whereZero(m_tol);
708     break;
709 jfenwick 2037 case SYM:
710     result=left.symmetric();
711     break;
712     case NSYM:
713     result=left.nonsymmetric();
714     break;
715 jfenwick 2066 case PROD:
716     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
717     break;
718 jfenwick 2084 case TRANS:
719     result=left.transpose(m_axis_offset);
720     break;
721     case TRACE:
722     result=left.trace(m_axis_offset);
723     break;
724 jfenwick 1889 default:
725 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
726 jfenwick 1889 }
727     return result.borrowReadyPtr();
728     }
729    
730 jfenwick 1899 /*
731     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
732     This method uses the original methods on the Data class to evaluate the expressions.
733     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
734     the purpose of using DataLazy in the first place).
735     */
736 jfenwick 1889 void
737     DataLazy::collapse()
738     {
739     if (m_op==IDENTITY)
740     {
741     return;
742     }
743     if (m_readytype=='E')
744     { // this is more an efficiency concern than anything else
745     throw DataException("Programmer Error - do not use collapse on Expanded data.");
746     }
747     m_id=collapseToReady();
748     m_op=IDENTITY;
749     }
750    
751 jfenwick 1899 /*
752 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
753 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
754     \param v A vector to store intermediate results.
755     \param offset Index in v to begin storing results.
756     \param sampleNo Sample number to evaluate.
757     \param roffset (output parameter) the offset in the return vector where the result begins.
758    
759     The return value will be an existing vector so do not deallocate it.
760     If the result is stored in v it should be stored at the offset given.
761     Everything from offset to the end of v should be considered available for this method to use.
762     */
763 jfenwick 1898 DataTypes::ValueType*
764     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
765 jfenwick 1889 {
766     // we assume that any collapsing has been done before we get here
767     // since we only have one argument we don't need to think about only
768     // processing single points.
769     if (m_readytype!='E')
770     {
771     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
772     }
773 jfenwick 1898 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
774     const double* left=&((*vleft)[roffset]);
775 jfenwick 1889 double* result=&(v[offset]);
776 jfenwick 1898 roffset=offset;
777 jfenwick 1889 switch (m_op)
778     {
779     case SIN:
780 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
781 jfenwick 1889 break;
782     case COS:
783 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
784 jfenwick 1889 break;
785     case TAN:
786 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
787 jfenwick 1889 break;
788     case ASIN:
789 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
790 jfenwick 1889 break;
791     case ACOS:
792 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
793 jfenwick 1889 break;
794     case ATAN:
795 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
796 jfenwick 1889 break;
797     case SINH:
798 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
799 jfenwick 1889 break;
800     case COSH:
801 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
802 jfenwick 1889 break;
803     case TANH:
804 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
805 jfenwick 1889 break;
806     case ERF:
807 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
808 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
809     #else
810     tensor_unary_operation(m_samplesize, left, result, ::erf);
811     break;
812     #endif
813     case ASINH:
814 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
816     #else
817     tensor_unary_operation(m_samplesize, left, result, ::asinh);
818     #endif
819     break;
820     case ACOSH:
821 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
823     #else
824     tensor_unary_operation(m_samplesize, left, result, ::acosh);
825     #endif
826     break;
827     case ATANH:
828 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
830     #else
831     tensor_unary_operation(m_samplesize, left, result, ::atanh);
832     #endif
833     break;
834     case LOG10:
835 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
836 jfenwick 1889 break;
837     case LOG:
838 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
839 jfenwick 1889 break;
840     case SIGN:
841     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
842     break;
843     case ABS:
844 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
845 jfenwick 1889 break;
846     case NEG:
847     tensor_unary_operation(m_samplesize, left, result, negate<double>());
848     break;
849     case POS:
850     // it doesn't mean anything for delayed.
851     // it will just trigger a deep copy of the lazy object
852     throw DataException("Programmer error - POS not supported for lazy data.");
853     break;
854     case EXP:
855 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
856 jfenwick 1889 break;
857     case SQRT:
858 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
859 jfenwick 1889 break;
860     case RECIP:
861     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
862     break;
863     case GZ:
864     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
865     break;
866     case LZ:
867     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
868     break;
869     case GEZ:
870     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
871     break;
872     case LEZ:
873     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
874     break;
875 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
876     case NEZ:
877     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
878     break;
879     case EZ:
880     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
881     break;
882 jfenwick 1889
883     default:
884     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
885     }
886 jfenwick 1898 return &v;
887 jfenwick 1889 }
888    
889    
890 jfenwick 2147
891    
892    
893    
894 jfenwick 2037 /*
895     \brief Compute the value of the expression (unary operation) for the given sample.
896     \return Vector which stores the value of the subexpression for the given sample.
897     \param v A vector to store intermediate results.
898     \param offset Index in v to begin storing results.
899     \param sampleNo Sample number to evaluate.
900     \param roffset (output parameter) the offset in the return vector where the result begins.
901 jfenwick 1898
902 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
903     If the result is stored in v it should be stored at the offset given.
904     Everything from offset to the end of v should be considered available for this method to use.
905     */
906     DataTypes::ValueType*
907     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
908     {
909     // we assume that any collapsing has been done before we get here
910     // since we only have one argument we don't need to think about only
911     // processing single points.
912     if (m_readytype!='E')
913     {
914     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
915     }
916     // since we can't write the result over the input, we need a result offset further along
917     size_t subroffset=roffset+m_samplesize;
918 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
919     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
920 jfenwick 2037 roffset=offset;
921 jfenwick 2157 size_t loop=0;
922     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
923     size_t step=getNoValues();
924 jfenwick 2037 switch (m_op)
925     {
926     case SYM:
927 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
928     {
929     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
930     subroffset+=step;
931     offset+=step;
932     }
933 jfenwick 2037 break;
934     case NSYM:
935 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
936     {
937     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
938     subroffset+=step;
939     offset+=step;
940     }
941 jfenwick 2037 break;
942     default:
943     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
944     }
945     return &v;
946     }
947 jfenwick 1898
948 jfenwick 2084 /*
949     \brief Compute the value of the expression (unary operation) for the given sample.
950     \return Vector which stores the value of the subexpression for the given sample.
951     \param v A vector to store intermediate results.
952     \param offset Index in v to begin storing results.
953     \param sampleNo Sample number to evaluate.
954     \param roffset (output parameter) the offset in the return vector where the result begins.
955 jfenwick 1899
956 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
957     If the result is stored in v it should be stored at the offset given.
958     Everything from offset to the end of v should be considered available for this method to use.
959     */
960     DataTypes::ValueType*
961     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
962     {
963     // we assume that any collapsing has been done before we get here
964     // since we only have one argument we don't need to think about only
965     // processing single points.
966     if (m_readytype!='E')
967     {
968     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
969     }
970     // since we can't write the result over the input, we need a result offset further along
971 jfenwick 2157 size_t subroffset;
972     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
973 jfenwick 2166 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
974     LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
975 jfenwick 2084 roffset=offset;
976 jfenwick 2157 size_t loop=0;
977     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
978 jfenwick 2166 size_t outstep=getNoValues();
979     size_t instep=m_left->getNoValues();
980     LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
981 jfenwick 2084 switch (m_op)
982     {
983     case TRACE:
984 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
985     {
986 jfenwick 2166 size_t zz=sampleNo*getNumDPPSample()+loop;
987     if (zz==5800)
988     {
989     LAZYDEBUG(cerr << "point=" << zz<< endl;)
990     LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
991     LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
992     LAZYDEBUG(cerr << subroffset << endl;)
993     LAZYDEBUG(cerr << "output=" << offset << endl;)
994     }
995     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
996     if (zz==5800)
997     {
998     LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
999     }
1000     subroffset+=instep;
1001     offset+=outstep;
1002 jfenwick 2157 }
1003 jfenwick 2084 break;
1004     case TRANS:
1005 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
1006     {
1007     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1008 jfenwick 2166 subroffset+=instep;
1009     offset+=outstep;
1010 jfenwick 2157 }
1011 jfenwick 2084 break;
1012     default:
1013     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1014     }
1015     return &v;
1016     }
1017 jfenwick 2037
1018    
1019 phornby 1987 #define PROC_OP(TYPE,X) \
1020 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1021     {\
1022     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1023     { \
1024     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1025 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1026 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1027 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1028 jfenwick 2152 lroffset+=leftstep; \
1029     rroffset+=rightstep; \
1030     }\
1031     lroffset+=oleftstep;\
1032     rroffset+=orightstep;\
1033 jfenwick 1889 }
1034    
1035 jfenwick 1899 /*
1036     \brief Compute the value of the expression (binary operation) for the given sample.
1037     \return Vector which stores the value of the subexpression for the given sample.
1038     \param v A vector to store intermediate results.
1039     \param offset Index in v to begin storing results.
1040     \param sampleNo Sample number to evaluate.
1041     \param roffset (output parameter) the offset in the return vector where the result begins.
1042    
1043     The return value will be an existing vector so do not deallocate it.
1044     If the result is stored in v it should be stored at the offset given.
1045     Everything from offset to the end of v should be considered available for this method to use.
1046     */
1047     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1048     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1049     // If both children are expanded, then we can process them in a single operation (we treat
1050     // the whole sample as one big datapoint.
1051     // If one of the children is not expanded, then we need to treat each point in the sample
1052     // individually.
1053 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1054     // For example, 2+Vector.
1055     // In this case each double within the point is treated individually
1056 jfenwick 1898 DataTypes::ValueType*
1057 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1058 jfenwick 1889 {
1059 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1060 jfenwick 1889
1061 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1062     // first work out which of the children are expanded
1063 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1064     bool rightExp=(m_right->m_readytype=='E');
1065 jfenwick 2084 if (!leftExp && !rightExp)
1066     {
1067     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1068     }
1069     bool leftScalar=(m_left->getRank()==0);
1070     bool rightScalar=(m_right->getRank()==0);
1071 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1072 jfenwick 1908 {
1073 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1074     }
1075     size_t leftsize=m_left->getNoValues();
1076     size_t rightsize=m_right->getNoValues();
1077     size_t chunksize=1; // how many doubles will be processed in one go
1078     int leftstep=0; // how far should the left offset advance after each step
1079     int rightstep=0;
1080     int numsteps=0; // total number of steps for the inner loop
1081     int oleftstep=0; // the o variables refer to the outer loop
1082     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1083     int onumsteps=1;
1084    
1085     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1086     bool RES=(rightExp && rightScalar);
1087     bool LS=(!leftExp && leftScalar); // left is a single scalar
1088     bool RS=(!rightExp && rightScalar);
1089     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1090     bool RN=(!rightExp && !rightScalar);
1091     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1092     bool REN=(rightExp && !rightScalar);
1093    
1094     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1095     {
1096     chunksize=m_left->getNumDPPSample()*leftsize;
1097     leftstep=0;
1098     rightstep=0;
1099     numsteps=1;
1100     }
1101     else if (LES || RES)
1102     {
1103     chunksize=1;
1104     if (LES) // left is an expanded scalar
1105 jfenwick 2084 {
1106 jfenwick 2152 if (RS)
1107     {
1108     leftstep=1;
1109     rightstep=0;
1110     numsteps=m_left->getNumDPPSample();
1111     }
1112     else // RN or REN
1113     {
1114     leftstep=0;
1115     oleftstep=1;
1116     rightstep=1;
1117 phornby 2171 orightstep=(RN ? -(int)rightsize : 0);
1118 jfenwick 2152 numsteps=rightsize;
1119     onumsteps=m_left->getNumDPPSample();
1120     }
1121 jfenwick 2084 }
1122 jfenwick 2152 else // right is an expanded scalar
1123     {
1124     if (LS)
1125     {
1126     rightstep=1;
1127     leftstep=0;
1128     numsteps=m_right->getNumDPPSample();
1129     }
1130     else
1131     {
1132     rightstep=0;
1133     orightstep=1;
1134     leftstep=1;
1135 jfenwick 2177 oleftstep=(LN ? -(int)leftsize : 0);
1136 jfenwick 2152 numsteps=leftsize;
1137     onumsteps=m_right->getNumDPPSample();
1138     }
1139     }
1140     }
1141     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1142     {
1143     if (LEN) // and Right will be a single value
1144     {
1145     chunksize=rightsize;
1146     leftstep=rightsize;
1147     rightstep=0;
1148     numsteps=m_left->getNumDPPSample();
1149     if (RS)
1150     {
1151     numsteps*=leftsize;
1152     }
1153     }
1154     else // REN
1155     {
1156     chunksize=leftsize;
1157     rightstep=leftsize;
1158     leftstep=0;
1159     numsteps=m_right->getNumDPPSample();
1160     if (LS)
1161     {
1162     numsteps*=rightsize;
1163     }
1164     }
1165     }
1166    
1167     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1168 jfenwick 1899 // Get the values of sub-expressions
1169 jfenwick 2157 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1170     // calcBufss for why we can't put offset as the 2nd param above
1171     const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1172 jfenwick 1899 // the right child starts further along.
1173 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1174     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1175     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1176     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1177     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1178     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1179     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1180 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1181 jfenwick 1889 switch(m_op)
1182     {
1183     case ADD:
1184 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1185 jfenwick 1889 break;
1186 jfenwick 1899 case SUB:
1187 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1188 jfenwick 1899 break;
1189     case MUL:
1190 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1191 jfenwick 1899 break;
1192     case DIV:
1193 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1194 jfenwick 1899 break;
1195 jfenwick 1910 case POW:
1196 phornby 1994 PROC_OP(double (double,double),::pow);
1197 jfenwick 1910 break;
1198 jfenwick 1889 default:
1199 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1200 jfenwick 1889 }
1201 jfenwick 1899 roffset=offset;
1202 jfenwick 1898 return &v;
1203 jfenwick 1889 }
1204    
1205 jfenwick 1898
1206 jfenwick 2152
1207 jfenwick 2066 /*
1208     \brief Compute the value of the expression (tensor product) for the given sample.
1209     \return Vector which stores the value of the subexpression for the given sample.
1210     \param v A vector to store intermediate results.
1211     \param offset Index in v to begin storing results.
1212     \param sampleNo Sample number to evaluate.
1213     \param roffset (output parameter) the offset in the return vector where the result begins.
1214 jfenwick 1898
1215 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1216     If the result is stored in v it should be stored at the offset given.
1217     Everything from offset to the end of v should be considered available for this method to use.
1218     */
1219     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1220     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1221     // unlike the other resolve helpers, we must treat these datapoints separately.
1222     DataTypes::ValueType*
1223     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1224     {
1225 jfenwick 2092 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1226 jfenwick 2066
1227     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1228     // first work out which of the children are expanded
1229     bool leftExp=(m_left->m_readytype=='E');
1230     bool rightExp=(m_right->m_readytype=='E');
1231     int steps=getNumDPPSample();
1232     int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1233     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1234     int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1235     // Get the values of sub-expressions (leave a gap of one sample for the result).
1236 jfenwick 2153 int gap=offset+m_left->getMaxSampleSize(); // actually only needs to be m_left->m_samplesize
1237     const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1238     gap+=m_right->getMaxSampleSize();
1239     const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1240     LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1241     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1242     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1243     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1244     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1245     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1246 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1247     switch(m_op)
1248     {
1249     case PROD:
1250     for (int i=0;i<steps;++i,resultp+=resultStep)
1251     {
1252 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1253     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1254     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1255 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1256     const double *ptr_1 = &((*right)[rroffset]);
1257 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1258     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1259 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1260     lroffset+=leftStep;
1261     rroffset+=rightStep;
1262     }
1263     break;
1264     default:
1265     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1266     }
1267     roffset=offset;
1268     return &v;
1269     }
1270    
1271    
1272    
1273 jfenwick 1899 /*
1274     \brief Compute the value of the expression for the given sample.
1275     \return Vector which stores the value of the subexpression for the given sample.
1276     \param v A vector to store intermediate results.
1277     \param offset Index in v to begin storing results.
1278     \param sampleNo Sample number to evaluate.
1279     \param roffset (output parameter) the offset in the return vector where the result begins.
1280 jfenwick 1898
1281 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
1282     */
1283 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
1284     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1285 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
1286     // Regardless, the storage should be assumed to be used, even if it isn't.
1287 jfenwick 1898
1288     // the roffset is the offset within the returned vector where the data begins
1289     const DataTypes::ValueType*
1290     DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1291 jfenwick 1879 {
1292 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1293 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
1294     if (m_readytype!='E' && m_op!=IDENTITY)
1295     {
1296     collapse();
1297     }
1298 jfenwick 1885 if (m_op==IDENTITY)
1299 jfenwick 1865 {
1300 jfenwick 1879 const ValueType& vec=m_id->getVector();
1301 jfenwick 1889 if (m_readytype=='C')
1302     {
1303 jfenwick 1898 roffset=0;
1304 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1305 jfenwick 1898 return &(vec);
1306 jfenwick 1889 }
1307 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
1308 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1309 jfenwick 1898 return &(vec);
1310 jfenwick 1865 }
1311 jfenwick 1889 if (m_readytype!='E')
1312     {
1313     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1314     }
1315     switch (getOpgroup(m_op))
1316     {
1317 jfenwick 2147 case G_UNARY:
1318     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1319 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1320 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1321 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1322 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1323 jfenwick 1889 default:
1324     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1325     }
1326 jfenwick 2152
1327 jfenwick 1889 }
1328    
1329 jfenwick 2177 // This needs to do the work of the idenity constructor
1330     void
1331     DataLazy::resolveToIdentity()
1332     {
1333     if (m_op==IDENTITY)
1334     return;
1335     DataReady_ptr p=resolve();
1336     makeIdentity(p);
1337     }
1338 jfenwick 1889
1339 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1340     {
1341     m_op=IDENTITY;
1342     m_axis_offset=0;
1343     m_transpose=0;
1344     m_SL=m_SM=m_SR=0;
1345     m_children=m_height=0;
1346     m_id=p;
1347     if(p->isConstant()) {m_readytype='C';}
1348     else if(p->isExpanded()) {m_readytype='E';}
1349     else if (p->isTagged()) {m_readytype='T';}
1350     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1351     m_buffsRequired=1;
1352     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1353     m_maxsamplesize=m_samplesize;
1354     m_left.reset();
1355     m_right.reset();
1356     }
1357    
1358 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1359     // Each sample is evaluated independently and copied into the result DataExpanded.
1360 jfenwick 1879 DataReady_ptr
1361     DataLazy::resolve()
1362     {
1363    
1364 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1365     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1366 jfenwick 1879
1367 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1368 jfenwick 1889 {
1369     collapse();
1370     }
1371 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1372 jfenwick 1889 {
1373     return m_id;
1374     }
1375     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1376 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1377 jfenwick 1899 // storage to evaluate its expression
1378 jfenwick 1885 int numthreads=1;
1379     #ifdef _OPENMP
1380 jfenwick 1889 numthreads=getNumberOfThreads();
1381 jfenwick 1885 #endif
1382 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
1383 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1384 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1385 jfenwick 1879 ValueType& resvec=result->getVector();
1386     DataReady_ptr resptr=DataReady_ptr(result);
1387     int sample;
1388 jfenwick 1898 size_t outoffset; // offset in the output data
1389 jfenwick 1885 int totalsamples=getNumSamples();
1390 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
1391     size_t resoffset=0; // where in the vector to find the answer
1392 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1393 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1394 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
1395 jfenwick 1879 {
1396 jfenwick 2157 if (sample==0) {ENABLEDEBUG}
1397 jfenwick 2166
1398     // if (sample==5800/getNumDPPSample()) {ENABLEDEBUG}
1399 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
1400 jfenwick 1885 #ifdef _OPENMP
1401 jfenwick 1898 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1402 jfenwick 1885 #else
1403 jfenwick 1899 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1404 jfenwick 1885 #endif
1405 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1406 jfenwick 2157 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1407 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
1408 jfenwick 2157 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1409 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1410 jfenwick 1879 {
1411 jfenwick 2166 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1412 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
1413 jfenwick 1879 }
1414 jfenwick 2157 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1415 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
1416 jfenwick 2157 DISABLEDEBUG
1417 jfenwick 1879 }
1418     return resptr;
1419     }
1420    
1421 jfenwick 1865 std::string
1422     DataLazy::toString() const
1423     {
1424 jfenwick 1886 ostringstream oss;
1425     oss << "Lazy Data:";
1426     intoString(oss);
1427     return oss.str();
1428 jfenwick 1865 }
1429    
1430 jfenwick 1899
1431 jfenwick 1886 void
1432     DataLazy::intoString(ostringstream& oss) const
1433     {
1434 jfenwick 2177 oss << "[" << m_children <<";"<<m_height <<"]";
1435 jfenwick 1886 switch (getOpgroup(m_op))
1436     {
1437 jfenwick 1889 case G_IDENTITY:
1438     if (m_id->isExpanded())
1439     {
1440     oss << "E";
1441     }
1442     else if (m_id->isTagged())
1443     {
1444     oss << "T";
1445     }
1446     else if (m_id->isConstant())
1447     {
1448     oss << "C";
1449     }
1450     else
1451     {
1452     oss << "?";
1453     }
1454 jfenwick 1886 oss << '@' << m_id.get();
1455     break;
1456     case G_BINARY:
1457     oss << '(';
1458     m_left->intoString(oss);
1459     oss << ' ' << opToString(m_op) << ' ';
1460     m_right->intoString(oss);
1461     oss << ')';
1462     break;
1463     case G_UNARY:
1464 jfenwick 2147 case G_UNARY_P:
1465 jfenwick 2037 case G_NP1OUT:
1466 jfenwick 2084 case G_NP1OUT_P:
1467 jfenwick 1886 oss << opToString(m_op) << '(';
1468     m_left->intoString(oss);
1469     oss << ')';
1470     break;
1471 jfenwick 2066 case G_TENSORPROD:
1472     oss << opToString(m_op) << '(';
1473     m_left->intoString(oss);
1474     oss << ", ";
1475     m_right->intoString(oss);
1476     oss << ')';
1477     break;
1478 jfenwick 1886 default:
1479     oss << "UNKNOWN";
1480     }
1481     }
1482    
1483 jfenwick 1865 DataAbstract*
1484     DataLazy::deepCopy()
1485     {
1486 jfenwick 1935 switch (getOpgroup(m_op))
1487 jfenwick 1865 {
1488 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1489     case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1490     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1491 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1492     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1493 jfenwick 1935 default:
1494     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1495 jfenwick 1865 }
1496     }
1497    
1498    
1499 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1500     // Instances of DataReady can look at the size of their vectors.
1501     // For lazy though, it could be the size the data would be if it were resolved;
1502     // or it could be some function of the lengths of the DataReady instances which
1503     // form part of the expression.
1504     // Rather than have people making assumptions, I have disabled the method.
1505 jfenwick 1865 DataTypes::ValueType::size_type
1506     DataLazy::getLength() const
1507     {
1508 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1509 jfenwick 1865 }
1510    
1511    
1512     DataAbstract*
1513     DataLazy::getSlice(const DataTypes::RegionType& region) const
1514     {
1515 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1516 jfenwick 1865 }
1517    
1518 jfenwick 1935
1519     // To do this we need to rely on our child nodes
1520 jfenwick 1865 DataTypes::ValueType::size_type
1521     DataLazy::getPointOffset(int sampleNo,
1522 jfenwick 1935 int dataPointNo)
1523     {
1524     if (m_op==IDENTITY)
1525     {
1526     return m_id->getPointOffset(sampleNo,dataPointNo);
1527     }
1528     if (m_readytype!='E')
1529     {
1530     collapse();
1531     return m_id->getPointOffset(sampleNo,dataPointNo);
1532     }
1533     // at this point we do not have an identity node and the expression will be Expanded
1534     // so we only need to know which child to ask
1535     if (m_left->m_readytype=='E')
1536     {
1537     return m_left->getPointOffset(sampleNo,dataPointNo);
1538     }
1539     else
1540     {
1541     return m_right->getPointOffset(sampleNo,dataPointNo);
1542     }
1543     }
1544    
1545     // To do this we need to rely on our child nodes
1546     DataTypes::ValueType::size_type
1547     DataLazy::getPointOffset(int sampleNo,
1548 jfenwick 1865 int dataPointNo) const
1549     {
1550 jfenwick 1935 if (m_op==IDENTITY)
1551     {
1552     return m_id->getPointOffset(sampleNo,dataPointNo);
1553     }
1554     if (m_readytype=='E')
1555     {
1556     // at this point we do not have an identity node and the expression will be Expanded
1557     // so we only need to know which child to ask
1558     if (m_left->m_readytype=='E')
1559     {
1560     return m_left->getPointOffset(sampleNo,dataPointNo);
1561     }
1562     else
1563     {
1564     return m_right->getPointOffset(sampleNo,dataPointNo);
1565     }
1566     }
1567     if (m_readytype=='C')
1568     {
1569     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1570     }
1571     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1572 jfenwick 1865 }
1573    
1574 jfenwick 1901 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1575     // to zero, all the tags from all the DataTags would be in the result.
1576     // However since they all have the same value (0) whether they are there or not should not matter.
1577     // So I have decided that for all types this method will create a constant 0.
1578     // It can be promoted up as required.
1579     // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1580     // but we can deal with that if it arrises.
1581     void
1582     DataLazy::setToZero()
1583     {
1584     DataTypes::ValueType v(getNoValues(),0);
1585     m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1586     m_op=IDENTITY;
1587     m_right.reset();
1588     m_left.reset();
1589     m_readytype='C';
1590     m_buffsRequired=1;
1591     }
1592    
1593 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26