/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2157 - (hide annotations)
Mon Dec 15 06:05:58 2008 UTC (10 years, 4 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 50286 byte(s)
Some lazy fixes to the symmetric operation.
1 jfenwick 1865
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     #include "DataLazy.h"
16     #ifdef USE_NETCDF
17     #include <netcdfcpp.h>
18     #endif
19     #ifdef PASO_MPI
20     #include <mpi.h>
21     #endif
22 jfenwick 1885 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25 jfenwick 1865 #include "FunctionSpace.h"
26     #include "DataTypes.h"
27 jfenwick 1868 #include "Data.h"
28 jfenwick 1886 #include "UnaryFuncs.h" // for escript::fsign
29 jfenwick 1950 #include "Utils.h"
30 jfenwick 1865
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 jfenwick 1899 /*
42     How does DataLazy work?
43     ~~~~~~~~~~~~~~~~~~~~~~~
44    
45     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
46     denoted left and right.
47    
48     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
49     This means that all "internal" nodes in the structure are instances of DataLazy.
50    
51     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
52     Note that IDENITY is not considered a unary operation.
53    
54     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
55     It must however form a DAG (directed acyclic graph).
56     I will refer to individual DataLazy objects with the structure as nodes.
57    
58     Each node also stores:
59     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
60     evaluated.
61     - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
62     evaluate the expression.
63     - m_samplesize ~ the number of doubles stored in a sample.
64    
65     When a new node is created, the above values are computed based on the values in the child nodes.
66     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
67    
68     The resolve method, which produces a DataReady from a DataLazy, does the following:
69     1) Create a DataReady to hold the new result.
70     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
71     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
72    
73     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
74    
75     resolveSample returns a Vector* and an offset within that vector where the result is stored.
76     Normally, this would be v, but for identity nodes their internal vector is returned instead.
77    
78     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
79    
80     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
81     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
82 jfenwick 2037
83 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
84 jfenwick 2037 1) Add to the ES_optype.
85     2) determine what opgroup your operation belongs to (X)
86     3) add a string for the op to the end of ES_opstrings
87     4) increase ES_opcount
88     5) add an entry (X) to opgroups
89     6) add an entry to the switch in collapseToReady
90     7) add an entry to resolveX
91 jfenwick 1899 */
92    
93    
94 jfenwick 1865 using namespace std;
95 jfenwick 1868 using namespace boost;
96 jfenwick 1865
97     namespace escript
98     {
99    
100     namespace
101     {
102    
103 jfenwick 1886 enum ES_opgroup
104     {
105     G_UNKNOWN,
106     G_IDENTITY,
107 jfenwick 1899 G_BINARY, // pointwise operations with two arguments
108 jfenwick 2037 G_UNARY, // pointwise operations with one argument
109 jfenwick 2147 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
110 jfenwick 2066 G_NP1OUT, // non-pointwise op with one output
111 jfenwick 2084 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
112 jfenwick 2066 G_TENSORPROD // general tensor product
113 jfenwick 1886 };
114    
115    
116    
117    
118 jfenwick 1910 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
119     "sin","cos","tan",
120 jfenwick 1886 "asin","acos","atan","sinh","cosh","tanh","erf",
121     "asinh","acosh","atanh",
122 jfenwick 1888 "log10","log","sign","abs","neg","pos","exp","sqrt",
123 jfenwick 2147 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
124 jfenwick 2066 "symmetric","nonsymmetric",
125 jfenwick 2084 "prod",
126 jfenwick 2147 "transpose", "trace"};
127     int ES_opcount=40;
128 jfenwick 1910 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
129     G_UNARY,G_UNARY,G_UNARY, //10
130     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
131     G_UNARY,G_UNARY,G_UNARY, // 20
132 jfenwick 2147 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
133     G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
134 jfenwick 2066 G_NP1OUT,G_NP1OUT,
135 jfenwick 2084 G_TENSORPROD,
136     G_NP1OUT_P, G_NP1OUT_P};
137 jfenwick 1886 inline
138     ES_opgroup
139     getOpgroup(ES_optype op)
140     {
141     return opgroups[op];
142     }
143    
144 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
145     FunctionSpace
146     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
147     {
148     // perhaps this should call interpolate and throw or something?
149     // maybe we need an interpolate node -
150     // that way, if interpolate is required in any other op we can just throw a
151     // programming error exception.
152 jfenwick 1879
153 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
154     FunctionSpace r=right->getFunctionSpace();
155     if (l!=r)
156     {
157     if (r.probeInterpolation(l))
158     {
159     return l;
160     }
161     if (l.probeInterpolation(r))
162     {
163     return r;
164     }
165     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
166     }
167     return l;
168 jfenwick 1865 }
169    
170     // return the shape of the result of "left op right"
171 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
172 jfenwick 1865 DataTypes::ShapeType
173     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
174     {
175 jfenwick 1879 if (left->getShape()!=right->getShape())
176     {
177 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
178 jfenwick 1908 {
179     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
180     }
181     if (left->getRank()==0) // we need to allow scalar * anything
182     {
183     return right->getShape();
184     }
185     if (right->getRank()==0)
186     {
187     return left->getShape();
188     }
189     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
190 jfenwick 1879 }
191     return left->getShape();
192 jfenwick 1865 }
193    
194 jfenwick 2084 // return the shape for "op left"
195    
196     DataTypes::ShapeType
197     resultShape(DataAbstract_ptr left, ES_optype op)
198     {
199     switch(op)
200     {
201     case TRANS:
202     return left->getShape();
203     break;
204     case TRACE:
205     return DataTypes::scalarShape;
206     break;
207     default:
208     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
209     }
210     }
211    
212 jfenwick 2066 // determine the output shape for the general tensor product operation
213     // the additional parameters return information required later for the product
214     // the majority of this code is copy pasted from C_General_Tensor_Product
215     DataTypes::ShapeType
216     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
217 jfenwick 1865 {
218 jfenwick 2066
219     // Get rank and shape of inputs
220     int rank0 = left->getRank();
221     int rank1 = right->getRank();
222     const DataTypes::ShapeType& shape0 = left->getShape();
223     const DataTypes::ShapeType& shape1 = right->getShape();
224    
225     // Prepare for the loops of the product and verify compatibility of shapes
226     int start0=0, start1=0;
227     if (transpose == 0) {}
228     else if (transpose == 1) { start0 = axis_offset; }
229     else if (transpose == 2) { start1 = rank1-axis_offset; }
230     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
231    
232 jfenwick 2085 if (rank0<axis_offset)
233     {
234     throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
235     }
236 jfenwick 2066
237     // Adjust the shapes for transpose
238     DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
239     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
240     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
241     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
242    
243     // Prepare for the loops of the product
244     SL=1, SM=1, SR=1;
245     for (int i=0; i<rank0-axis_offset; i++) {
246     SL *= tmpShape0[i];
247     }
248     for (int i=rank0-axis_offset; i<rank0; i++) {
249     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
250     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
251     }
252     SM *= tmpShape0[i];
253     }
254     for (int i=axis_offset; i<rank1; i++) {
255     SR *= tmpShape1[i];
256     }
257    
258     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
259     DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
260     { // block to limit the scope of out_index
261     int out_index=0;
262     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
263     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
264     }
265 jfenwick 2086
266     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
267     {
268     ostringstream os;
269     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
270     throw DataException(os.str());
271     }
272    
273 jfenwick 2066 return shape2;
274 jfenwick 1865 }
275    
276 jfenwick 2066
277     // determine the number of points in the result of "left op right"
278     // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
279     // size_t
280     // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
281     // {
282     // switch (getOpgroup(op))
283     // {
284     // case G_BINARY: return left->getLength();
285     // case G_UNARY: return left->getLength();
286     // case G_NP1OUT: return left->getLength();
287     // default:
288     // throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
289     // }
290     // }
291    
292 jfenwick 1899 // determine the number of samples requires to evaluate an expression combining left and right
293 jfenwick 2037 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294 jfenwick 2066 // The same goes for G_TENSORPROD
295 jfenwick 2157 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296     // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297     // multiple times
298 jfenwick 1879 int
299     calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300     {
301 jfenwick 1886 switch(getOpgroup(op))
302 jfenwick 1879 {
303 jfenwick 1886 case G_IDENTITY: return 1;
304 jfenwick 2157 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305 jfenwick 2147 case G_UNARY:
306     case G_UNARY_P: return max(left->getBuffsRequired(),1);
307 jfenwick 2037 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
308 jfenwick 2084 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
309 jfenwick 2066 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
310 jfenwick 1879 default:
311     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
312     }
313     }
314    
315 jfenwick 1899
316 jfenwick 1865 } // end anonymous namespace
317    
318    
319 jfenwick 1899
320     // Return a string representing the operation
321 jfenwick 1865 const std::string&
322     opToString(ES_optype op)
323     {
324     if (op<0 || op>=ES_opcount)
325     {
326     op=UNKNOWNOP;
327     }
328     return ES_opstrings[op];
329     }
330    
331    
332     DataLazy::DataLazy(DataAbstract_ptr p)
333     : parent(p->getFunctionSpace(),p->getShape()),
334 jfenwick 2066 m_op(IDENTITY),
335     m_axis_offset(0),
336     m_transpose(0),
337     m_SL(0), m_SM(0), m_SR(0)
338 jfenwick 1865 {
339 jfenwick 1879 if (p->isLazy())
340     {
341     // I don't want identity of Lazy.
342     // Question: Why would that be so bad?
343     // Answer: We assume that the child of ID is something we can call getVector on
344     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
345     }
346     else
347     {
348     m_id=dynamic_pointer_cast<DataReady>(p);
349 jfenwick 1889 if(p->isConstant()) {m_readytype='C';}
350     else if(p->isExpanded()) {m_readytype='E';}
351     else if (p->isTagged()) {m_readytype='T';}
352     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
353 jfenwick 1879 }
354 jfenwick 1886 m_buffsRequired=1;
355 jfenwick 1879 m_samplesize=getNumDPPSample()*getNoValues();
356 jfenwick 2066 m_maxsamplesize=m_samplesize;
357 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
358 jfenwick 1865 }
359    
360 jfenwick 1899
361    
362 jfenwick 1901
363 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
364     : parent(left->getFunctionSpace(),left->getShape()),
365 jfenwick 2066 m_op(op),
366     m_axis_offset(0),
367     m_transpose(0),
368     m_SL(0), m_SM(0), m_SR(0)
369 jfenwick 1886 {
370 jfenwick 2037 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
371 jfenwick 1886 {
372     throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
373     }
374 jfenwick 2066
375 jfenwick 1886 DataLazy_ptr lleft;
376     if (!left->isLazy())
377     {
378     lleft=DataLazy_ptr(new DataLazy(left));
379     }
380     else
381     {
382     lleft=dynamic_pointer_cast<DataLazy>(left);
383     }
384 jfenwick 1889 m_readytype=lleft->m_readytype;
385 jfenwick 1886 m_left=lleft;
386 jfenwick 2037 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
387 jfenwick 1886 m_samplesize=getNumDPPSample()*getNoValues();
388 jfenwick 2066 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
389 jfenwick 1886 }
390    
391    
392 jfenwick 1943 // In this constructor we need to consider interpolation
393 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
394     : parent(resultFS(left,right,op), resultShape(left,right,op)),
395 jfenwick 2066 m_op(op),
396     m_SL(0), m_SM(0), m_SR(0)
397 jfenwick 1879 {
398 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
399 jfenwick 1886 {
400 jfenwick 1889 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
401 jfenwick 1886 }
402 jfenwick 1943
403     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
404     {
405     FunctionSpace fs=getFunctionSpace();
406     Data ltemp(left);
407     Data tmp(ltemp,fs);
408     left=tmp.borrowDataPtr();
409     }
410 jfenwick 2066 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
411 jfenwick 1943 {
412     Data tmp(Data(right),getFunctionSpace());
413     right=tmp.borrowDataPtr();
414     }
415     left->operandCheck(*right);
416    
417 jfenwick 1899 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
418 jfenwick 1879 {
419     m_left=dynamic_pointer_cast<DataLazy>(left);
420     }
421     else
422     {
423     m_left=DataLazy_ptr(new DataLazy(left));
424     }
425     if (right->isLazy())
426     {
427     m_right=dynamic_pointer_cast<DataLazy>(right);
428     }
429     else
430     {
431     m_right=DataLazy_ptr(new DataLazy(right));
432     }
433 jfenwick 1889 char lt=m_left->m_readytype;
434     char rt=m_right->m_readytype;
435     if (lt=='E' || rt=='E')
436     {
437     m_readytype='E';
438     }
439     else if (lt=='T' || rt=='T')
440     {
441     m_readytype='T';
442     }
443     else
444     {
445     m_readytype='C';
446     }
447 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
448     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
449 jfenwick 1879 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
450 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
451 jfenwick 1879 }
452    
453 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
454     : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
455     m_op(op),
456     m_axis_offset(axis_offset),
457     m_transpose(transpose)
458     {
459     if ((getOpgroup(op)!=G_TENSORPROD))
460     {
461     throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
462     }
463     if ((transpose>2) || (transpose<0))
464     {
465     throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
466     }
467     if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
468     {
469     FunctionSpace fs=getFunctionSpace();
470     Data ltemp(left);
471     Data tmp(ltemp,fs);
472     left=tmp.borrowDataPtr();
473     }
474     if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
475     {
476     Data tmp(Data(right),getFunctionSpace());
477     right=tmp.borrowDataPtr();
478     }
479     left->operandCheck(*right);
480 jfenwick 1879
481 jfenwick 2066 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
482     {
483     m_left=dynamic_pointer_cast<DataLazy>(left);
484     }
485     else
486     {
487     m_left=DataLazy_ptr(new DataLazy(left));
488     }
489     if (right->isLazy())
490     {
491     m_right=dynamic_pointer_cast<DataLazy>(right);
492     }
493     else
494     {
495     m_right=DataLazy_ptr(new DataLazy(right));
496     }
497     char lt=m_left->m_readytype;
498     char rt=m_right->m_readytype;
499     if (lt=='E' || rt=='E')
500     {
501     m_readytype='E';
502     }
503     else if (lt=='T' || rt=='T')
504     {
505     m_readytype='T';
506     }
507     else
508     {
509     m_readytype='C';
510     }
511     m_samplesize=getNumDPPSample()*getNoValues();
512     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
513     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
514 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
515 jfenwick 2066 }
516    
517    
518 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
519     : parent(left->getFunctionSpace(), resultShape(left,op)),
520     m_op(op),
521     m_axis_offset(axis_offset),
522 jfenwick 2147 m_transpose(0),
523     m_tol(0)
524 jfenwick 2084 {
525     if ((getOpgroup(op)!=G_NP1OUT_P))
526     {
527     throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
528     }
529     DataLazy_ptr lleft;
530     if (!left->isLazy())
531     {
532     lleft=DataLazy_ptr(new DataLazy(left));
533     }
534     else
535     {
536     lleft=dynamic_pointer_cast<DataLazy>(left);
537     }
538     m_readytype=lleft->m_readytype;
539     m_left=lleft;
540     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
541     m_samplesize=getNumDPPSample()*getNoValues();
542     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
543 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
544 jfenwick 2084 }
545    
546 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
547     : parent(left->getFunctionSpace(), left->getShape()),
548     m_op(op),
549     m_axis_offset(0),
550     m_transpose(0),
551     m_tol(tol)
552     {
553     if ((getOpgroup(op)!=G_UNARY_P))
554     {
555     throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
556     }
557     DataLazy_ptr lleft;
558     if (!left->isLazy())
559     {
560     lleft=DataLazy_ptr(new DataLazy(left));
561     }
562     else
563     {
564     lleft=dynamic_pointer_cast<DataLazy>(left);
565     }
566     m_readytype=lleft->m_readytype;
567     m_left=lleft;
568     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
569     m_samplesize=getNumDPPSample()*getNoValues();
570     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
571     LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572     }
573 jfenwick 2084
574 jfenwick 1865 DataLazy::~DataLazy()
575     {
576     }
577    
578 jfenwick 1879
579     int
580     DataLazy::getBuffsRequired() const
581 jfenwick 1865 {
582 jfenwick 1879 return m_buffsRequired;
583     }
584    
585 jfenwick 1884
586 jfenwick 2066 size_t
587     DataLazy::getMaxSampleSize() const
588     {
589     return m_maxsamplesize;
590     }
591    
592 jfenwick 1899 /*
593     \brief Evaluates the expression using methods on Data.
594     This does the work for the collapse method.
595     For reasons of efficiency do not call this method on DataExpanded nodes.
596     */
597 jfenwick 1889 DataReady_ptr
598     DataLazy::collapseToReady()
599     {
600     if (m_readytype=='E')
601     { // this is more an efficiency concern than anything else
602     throw DataException("Programmer Error - do not use collapse on Expanded data.");
603     }
604     if (m_op==IDENTITY)
605     {
606     return m_id;
607     }
608     DataReady_ptr pleft=m_left->collapseToReady();
609     Data left(pleft);
610     Data right;
611 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612 jfenwick 1889 {
613     right=Data(m_right->collapseToReady());
614     }
615     Data result;
616     switch(m_op)
617     {
618     case ADD:
619     result=left+right;
620     break;
621     case SUB:
622     result=left-right;
623     break;
624     case MUL:
625     result=left*right;
626     break;
627     case DIV:
628     result=left/right;
629     break;
630     case SIN:
631     result=left.sin();
632     break;
633     case COS:
634     result=left.cos();
635     break;
636     case TAN:
637     result=left.tan();
638     break;
639     case ASIN:
640     result=left.asin();
641     break;
642     case ACOS:
643     result=left.acos();
644     break;
645     case ATAN:
646     result=left.atan();
647     break;
648     case SINH:
649     result=left.sinh();
650     break;
651     case COSH:
652     result=left.cosh();
653     break;
654     case TANH:
655     result=left.tanh();
656     break;
657     case ERF:
658     result=left.erf();
659     break;
660     case ASINH:
661     result=left.asinh();
662     break;
663     case ACOSH:
664     result=left.acosh();
665     break;
666     case ATANH:
667     result=left.atanh();
668     break;
669     case LOG10:
670     result=left.log10();
671     break;
672     case LOG:
673     result=left.log();
674     break;
675     case SIGN:
676     result=left.sign();
677     break;
678     case ABS:
679     result=left.abs();
680     break;
681     case NEG:
682     result=left.neg();
683     break;
684     case POS:
685     // it doesn't mean anything for delayed.
686     // it will just trigger a deep copy of the lazy object
687     throw DataException("Programmer error - POS not supported for lazy data.");
688     break;
689     case EXP:
690     result=left.exp();
691     break;
692     case SQRT:
693     result=left.sqrt();
694     break;
695     case RECIP:
696     result=left.oneOver();
697     break;
698     case GZ:
699     result=left.wherePositive();
700     break;
701     case LZ:
702     result=left.whereNegative();
703     break;
704     case GEZ:
705     result=left.whereNonNegative();
706     break;
707     case LEZ:
708     result=left.whereNonPositive();
709     break;
710 jfenwick 2147 case NEZ:
711     result=left.whereNonZero(m_tol);
712     break;
713     case EZ:
714     result=left.whereZero(m_tol);
715     break;
716 jfenwick 2037 case SYM:
717     result=left.symmetric();
718     break;
719     case NSYM:
720     result=left.nonsymmetric();
721     break;
722 jfenwick 2066 case PROD:
723     result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724     break;
725 jfenwick 2084 case TRANS:
726     result=left.transpose(m_axis_offset);
727     break;
728     case TRACE:
729     result=left.trace(m_axis_offset);
730     break;
731 jfenwick 1889 default:
732 jfenwick 2037 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733 jfenwick 1889 }
734     return result.borrowReadyPtr();
735     }
736    
737 jfenwick 1899 /*
738     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739     This method uses the original methods on the Data class to evaluate the expressions.
740     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741     the purpose of using DataLazy in the first place).
742     */
743 jfenwick 1889 void
744     DataLazy::collapse()
745     {
746     if (m_op==IDENTITY)
747     {
748     return;
749     }
750     if (m_readytype=='E')
751     { // this is more an efficiency concern than anything else
752     throw DataException("Programmer Error - do not use collapse on Expanded data.");
753     }
754     m_id=collapseToReady();
755     m_op=IDENTITY;
756     }
757    
758 jfenwick 1899 /*
759 jfenwick 2037 \brief Compute the value of the expression (unary operation) for the given sample.
760 jfenwick 1899 \return Vector which stores the value of the subexpression for the given sample.
761     \param v A vector to store intermediate results.
762     \param offset Index in v to begin storing results.
763     \param sampleNo Sample number to evaluate.
764     \param roffset (output parameter) the offset in the return vector where the result begins.
765    
766     The return value will be an existing vector so do not deallocate it.
767     If the result is stored in v it should be stored at the offset given.
768     Everything from offset to the end of v should be considered available for this method to use.
769     */
770 jfenwick 1898 DataTypes::ValueType*
771     DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772 jfenwick 1889 {
773     // we assume that any collapsing has been done before we get here
774     // since we only have one argument we don't need to think about only
775     // processing single points.
776     if (m_readytype!='E')
777     {
778     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779     }
780 jfenwick 1898 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781     const double* left=&((*vleft)[roffset]);
782 jfenwick 1889 double* result=&(v[offset]);
783 jfenwick 1898 roffset=offset;
784 jfenwick 1889 switch (m_op)
785     {
786     case SIN:
787 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788 jfenwick 1889 break;
789     case COS:
790 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791 jfenwick 1889 break;
792     case TAN:
793 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794 jfenwick 1889 break;
795     case ASIN:
796 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797 jfenwick 1889 break;
798     case ACOS:
799 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800 jfenwick 1889 break;
801     case ATAN:
802 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803 jfenwick 1889 break;
804     case SINH:
805 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806 jfenwick 1889 break;
807     case COSH:
808 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809 jfenwick 1889 break;
810     case TANH:
811 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812 jfenwick 1889 break;
813     case ERF:
814 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815 jfenwick 1889 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816     #else
817     tensor_unary_operation(m_samplesize, left, result, ::erf);
818     break;
819     #endif
820     case ASINH:
821 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823     #else
824     tensor_unary_operation(m_samplesize, left, result, ::asinh);
825     #endif
826     break;
827     case ACOSH:
828 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830     #else
831     tensor_unary_operation(m_samplesize, left, result, ::acosh);
832     #endif
833     break;
834     case ATANH:
835 phornby 2050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836 jfenwick 1889 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837     #else
838     tensor_unary_operation(m_samplesize, left, result, ::atanh);
839     #endif
840     break;
841     case LOG10:
842 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843 jfenwick 1889 break;
844     case LOG:
845 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846 jfenwick 1889 break;
847     case SIGN:
848     tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849     break;
850     case ABS:
851 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852 jfenwick 1889 break;
853     case NEG:
854     tensor_unary_operation(m_samplesize, left, result, negate<double>());
855     break;
856     case POS:
857     // it doesn't mean anything for delayed.
858     // it will just trigger a deep copy of the lazy object
859     throw DataException("Programmer error - POS not supported for lazy data.");
860     break;
861     case EXP:
862 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863 jfenwick 1889 break;
864     case SQRT:
865 jfenwick 1972 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866 jfenwick 1889 break;
867     case RECIP:
868     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
869     break;
870     case GZ:
871     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
872     break;
873     case LZ:
874     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
875     break;
876     case GEZ:
877     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
878     break;
879     case LEZ:
880     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881     break;
882 jfenwick 2147 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883     case NEZ:
884     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885     break;
886     case EZ:
887     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888     break;
889 jfenwick 1889
890     default:
891     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892     }
893 jfenwick 1898 return &v;
894 jfenwick 1889 }
895    
896    
897 jfenwick 2147
898    
899    
900    
901 jfenwick 2037 /*
902     \brief Compute the value of the expression (unary operation) for the given sample.
903     \return Vector which stores the value of the subexpression for the given sample.
904     \param v A vector to store intermediate results.
905     \param offset Index in v to begin storing results.
906     \param sampleNo Sample number to evaluate.
907     \param roffset (output parameter) the offset in the return vector where the result begins.
908 jfenwick 1898
909 jfenwick 2037 The return value will be an existing vector so do not deallocate it.
910     If the result is stored in v it should be stored at the offset given.
911     Everything from offset to the end of v should be considered available for this method to use.
912     */
913     DataTypes::ValueType*
914     DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915     {
916     // we assume that any collapsing has been done before we get here
917     // since we only have one argument we don't need to think about only
918     // processing single points.
919     if (m_readytype!='E')
920     {
921     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922     }
923     // since we can't write the result over the input, we need a result offset further along
924     size_t subroffset=roffset+m_samplesize;
925 jfenwick 2157 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927 jfenwick 2037 roffset=offset;
928 jfenwick 2157 size_t loop=0;
929     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930     size_t step=getNoValues();
931 jfenwick 2037 switch (m_op)
932     {
933     case SYM:
934 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
935     {
936     DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937     subroffset+=step;
938     offset+=step;
939     }
940 jfenwick 2037 break;
941     case NSYM:
942 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
943     {
944     DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945     subroffset+=step;
946     offset+=step;
947     }
948 jfenwick 2037 break;
949     default:
950     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951     }
952     return &v;
953     }
954 jfenwick 1898
955 jfenwick 2084 /*
956     \brief Compute the value of the expression (unary operation) for the given sample.
957     \return Vector which stores the value of the subexpression for the given sample.
958     \param v A vector to store intermediate results.
959     \param offset Index in v to begin storing results.
960     \param sampleNo Sample number to evaluate.
961     \param roffset (output parameter) the offset in the return vector where the result begins.
962 jfenwick 1899
963 jfenwick 2084 The return value will be an existing vector so do not deallocate it.
964     If the result is stored in v it should be stored at the offset given.
965     Everything from offset to the end of v should be considered available for this method to use.
966     */
967     DataTypes::ValueType*
968     DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969     {
970     // we assume that any collapsing has been done before we get here
971     // since we only have one argument we don't need to think about only
972     // processing single points.
973     if (m_readytype!='E')
974     {
975     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976     }
977     // since we can't write the result over the input, we need a result offset further along
978 jfenwick 2157 size_t subroffset;
979     const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980     LAZYDEBUG(cerr << "from=" << offset+m_left->m_samplesize << " to=" << subroffset << " ret=" << roffset << endl;)
981 jfenwick 2084 roffset=offset;
982 jfenwick 2157 size_t loop=0;
983     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
984     size_t step=getNoValues();
985 jfenwick 2084 switch (m_op)
986     {
987     case TRACE:
988 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
989     {
990     DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
991     subroffset+=step;
992     offset+=step;
993     }
994 jfenwick 2084 break;
995     case TRANS:
996 jfenwick 2157 for (loop=0;loop<numsteps;++loop)
997     {
998     DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
999     subroffset+=step;
1000     offset+=step;
1001     }
1002 jfenwick 2084 break;
1003     default:
1004     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1005     }
1006     return &v;
1007     }
1008 jfenwick 2037
1009    
1010 phornby 1987 #define PROC_OP(TYPE,X) \
1011 jfenwick 2152 for (int j=0;j<onumsteps;++j)\
1012     {\
1013     for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1014     { \
1015     LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1016 jfenwick 2157 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1017 jfenwick 2152 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1018 jfenwick 2157 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1019 jfenwick 2152 lroffset+=leftstep; \
1020     rroffset+=rightstep; \
1021     }\
1022     lroffset+=oleftstep;\
1023     rroffset+=orightstep;\
1024 jfenwick 1889 }
1025    
1026 jfenwick 1899 /*
1027     \brief Compute the value of the expression (binary operation) for the given sample.
1028     \return Vector which stores the value of the subexpression for the given sample.
1029     \param v A vector to store intermediate results.
1030     \param offset Index in v to begin storing results.
1031     \param sampleNo Sample number to evaluate.
1032     \param roffset (output parameter) the offset in the return vector where the result begins.
1033    
1034     The return value will be an existing vector so do not deallocate it.
1035     If the result is stored in v it should be stored at the offset given.
1036     Everything from offset to the end of v should be considered available for this method to use.
1037     */
1038     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1039     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1040     // If both children are expanded, then we can process them in a single operation (we treat
1041     // the whole sample as one big datapoint.
1042     // If one of the children is not expanded, then we need to treat each point in the sample
1043     // individually.
1044 jfenwick 1908 // There is an additional complication when scalar operations are considered.
1045     // For example, 2+Vector.
1046     // In this case each double within the point is treated individually
1047 jfenwick 1898 DataTypes::ValueType*
1048 jfenwick 1899 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1049 jfenwick 1889 {
1050 jfenwick 2092 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1051 jfenwick 1889
1052 jfenwick 1899 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1053     // first work out which of the children are expanded
1054 jfenwick 1889 bool leftExp=(m_left->m_readytype=='E');
1055     bool rightExp=(m_right->m_readytype=='E');
1056 jfenwick 2084 if (!leftExp && !rightExp)
1057     {
1058     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1059     }
1060     bool leftScalar=(m_left->getRank()==0);
1061     bool rightScalar=(m_right->getRank()==0);
1062 jfenwick 2152 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1063 jfenwick 1908 {
1064 jfenwick 2152 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1065     }
1066     size_t leftsize=m_left->getNoValues();
1067     size_t rightsize=m_right->getNoValues();
1068     size_t chunksize=1; // how many doubles will be processed in one go
1069     int leftstep=0; // how far should the left offset advance after each step
1070     int rightstep=0;
1071     int numsteps=0; // total number of steps for the inner loop
1072     int oleftstep=0; // the o variables refer to the outer loop
1073     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1074     int onumsteps=1;
1075    
1076     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1077     bool RES=(rightExp && rightScalar);
1078     bool LS=(!leftExp && leftScalar); // left is a single scalar
1079     bool RS=(!rightExp && rightScalar);
1080     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1081     bool RN=(!rightExp && !rightScalar);
1082     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1083     bool REN=(rightExp && !rightScalar);
1084    
1085     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1086     {
1087     chunksize=m_left->getNumDPPSample()*leftsize;
1088     leftstep=0;
1089     rightstep=0;
1090     numsteps=1;
1091     }
1092     else if (LES || RES)
1093     {
1094     chunksize=1;
1095     if (LES) // left is an expanded scalar
1096 jfenwick 2084 {
1097 jfenwick 2152 if (RS)
1098     {
1099     leftstep=1;
1100     rightstep=0;
1101     numsteps=m_left->getNumDPPSample();
1102     }
1103     else // RN or REN
1104     {
1105     leftstep=0;
1106     oleftstep=1;
1107     rightstep=1;
1108     orightstep=(RN?-rightsize:0);
1109     numsteps=rightsize;
1110     onumsteps=m_left->getNumDPPSample();
1111     }
1112 jfenwick 2084 }
1113 jfenwick 2152 else // right is an expanded scalar
1114     {
1115     if (LS)
1116     {
1117     rightstep=1;
1118     leftstep=0;
1119     numsteps=m_right->getNumDPPSample();
1120     }
1121     else
1122     {
1123     rightstep=0;
1124     orightstep=1;
1125     leftstep=1;
1126     oleftstep=(LN?-leftsize:0);
1127     numsteps=leftsize;
1128     onumsteps=m_right->getNumDPPSample();
1129     }
1130     }
1131     }
1132     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1133     {
1134     if (LEN) // and Right will be a single value
1135     {
1136     chunksize=rightsize;
1137     leftstep=rightsize;
1138     rightstep=0;
1139     numsteps=m_left->getNumDPPSample();
1140     if (RS)
1141     {
1142     numsteps*=leftsize;
1143     }
1144     }
1145     else // REN
1146     {
1147     chunksize=leftsize;
1148     rightstep=leftsize;
1149     leftstep=0;
1150     numsteps=m_right->getNumDPPSample();
1151     if (LS)
1152     {
1153     numsteps*=rightsize;
1154     }
1155     }
1156     }
1157    
1158     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1159 jfenwick 1899 // Get the values of sub-expressions
1160 jfenwick 2157 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1161     // calcBufss for why we can't put offset as the 2nd param above
1162     const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1163 jfenwick 1899 // the right child starts further along.
1164 jfenwick 2152 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1165     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1166     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1167     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1168     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1169     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1170     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1171 jfenwick 1899 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1172 jfenwick 1889 switch(m_op)
1173     {
1174     case ADD:
1175 phornby 2021 PROC_OP(NO_ARG,plus<double>());
1176 jfenwick 1889 break;
1177 jfenwick 1899 case SUB:
1178 phornby 2021 PROC_OP(NO_ARG,minus<double>());
1179 jfenwick 1899 break;
1180     case MUL:
1181 phornby 2021 PROC_OP(NO_ARG,multiplies<double>());
1182 jfenwick 1899 break;
1183     case DIV:
1184 phornby 2021 PROC_OP(NO_ARG,divides<double>());
1185 jfenwick 1899 break;
1186 jfenwick 1910 case POW:
1187 phornby 1994 PROC_OP(double (double,double),::pow);
1188 jfenwick 1910 break;
1189 jfenwick 1889 default:
1190 jfenwick 1898 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1191 jfenwick 1889 }
1192 jfenwick 1899 roffset=offset;
1193 jfenwick 1898 return &v;
1194 jfenwick 1889 }
1195    
1196 jfenwick 1898
1197 jfenwick 2152
1198 jfenwick 2066 /*
1199     \brief Compute the value of the expression (tensor product) for the given sample.
1200     \return Vector which stores the value of the subexpression for the given sample.
1201     \param v A vector to store intermediate results.
1202     \param offset Index in v to begin storing results.
1203     \param sampleNo Sample number to evaluate.
1204     \param roffset (output parameter) the offset in the return vector where the result begins.
1205 jfenwick 1898
1206 jfenwick 2066 The return value will be an existing vector so do not deallocate it.
1207     If the result is stored in v it should be stored at the offset given.
1208     Everything from offset to the end of v should be considered available for this method to use.
1209     */
1210     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1211     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1212     // unlike the other resolve helpers, we must treat these datapoints separately.
1213     DataTypes::ValueType*
1214     DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1215     {
1216 jfenwick 2092 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1217 jfenwick 2066
1218     size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1219     // first work out which of the children are expanded
1220     bool leftExp=(m_left->m_readytype=='E');
1221     bool rightExp=(m_right->m_readytype=='E');
1222     int steps=getNumDPPSample();
1223     int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1224     int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1225     int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1226     // Get the values of sub-expressions (leave a gap of one sample for the result).
1227 jfenwick 2153 int gap=offset+m_left->getMaxSampleSize(); // actually only needs to be m_left->m_samplesize
1228     const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1229     gap+=m_right->getMaxSampleSize();
1230     const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1231     LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1232     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1233     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1234     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1235     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1236     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1237 jfenwick 2066 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1238     switch(m_op)
1239     {
1240     case PROD:
1241     for (int i=0;i<steps;++i,resultp+=resultStep)
1242     {
1243 jfenwick 2153 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1244     LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1245     LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1246 jfenwick 2066 const double *ptr_0 = &((*left)[lroffset]);
1247     const double *ptr_1 = &((*right)[rroffset]);
1248 jfenwick 2153 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1249     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1250 jfenwick 2066 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1251     lroffset+=leftStep;
1252     rroffset+=rightStep;
1253     }
1254     break;
1255     default:
1256     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1257     }
1258     roffset=offset;
1259     return &v;
1260     }
1261    
1262    
1263    
1264 jfenwick 1899 /*
1265     \brief Compute the value of the expression for the given sample.
1266     \return Vector which stores the value of the subexpression for the given sample.
1267     \param v A vector to store intermediate results.
1268     \param offset Index in v to begin storing results.
1269     \param sampleNo Sample number to evaluate.
1270     \param roffset (output parameter) the offset in the return vector where the result begins.
1271 jfenwick 1898
1272 jfenwick 1899 The return value will be an existing vector so do not deallocate it.
1273     */
1274 jfenwick 1884 // the vector and the offset are a place where the method could write its data if it wishes
1275     // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1276 jfenwick 1885 // Hence the return value to indicate where the data is actually stored.
1277     // Regardless, the storage should be assumed to be used, even if it isn't.
1278 jfenwick 1898
1279     // the roffset is the offset within the returned vector where the data begins
1280     const DataTypes::ValueType*
1281     DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1282 jfenwick 1879 {
1283 jfenwick 2092 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1284 jfenwick 1889 // collapse so we have a 'E' node or an IDENTITY for some other type
1285     if (m_readytype!='E' && m_op!=IDENTITY)
1286     {
1287     collapse();
1288     }
1289 jfenwick 1885 if (m_op==IDENTITY)
1290 jfenwick 1865 {
1291 jfenwick 1879 const ValueType& vec=m_id->getVector();
1292 jfenwick 1889 if (m_readytype=='C')
1293     {
1294 jfenwick 1898 roffset=0;
1295 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1296 jfenwick 1898 return &(vec);
1297 jfenwick 1889 }
1298 jfenwick 1898 roffset=m_id->getPointOffset(sampleNo, 0);
1299 jfenwick 2152 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1300 jfenwick 1898 return &(vec);
1301 jfenwick 1865 }
1302 jfenwick 1889 if (m_readytype!='E')
1303     {
1304     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1305     }
1306     switch (getOpgroup(m_op))
1307     {
1308 jfenwick 2147 case G_UNARY:
1309     case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1310 jfenwick 1898 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1311 jfenwick 2066 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1312 jfenwick 2084 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1313 jfenwick 2066 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1314 jfenwick 1889 default:
1315     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1316     }
1317 jfenwick 2152
1318 jfenwick 1889 }
1319    
1320    
1321 jfenwick 1899 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1322     // Each sample is evaluated independently and copied into the result DataExpanded.
1323 jfenwick 1879 DataReady_ptr
1324     DataLazy::resolve()
1325     {
1326    
1327 jfenwick 2092 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1328     LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1329 jfenwick 1879
1330 jfenwick 1899 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1331 jfenwick 1889 {
1332     collapse();
1333     }
1334 jfenwick 1899 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1335 jfenwick 1889 {
1336     return m_id;
1337     }
1338     // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1339 jfenwick 2066 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1340 jfenwick 1899 // storage to evaluate its expression
1341 jfenwick 1885 int numthreads=1;
1342     #ifdef _OPENMP
1343 jfenwick 1889 numthreads=getNumberOfThreads();
1344 jfenwick 1885 #endif
1345 jfenwick 1886 ValueType v(numthreads*threadbuffersize);
1346 jfenwick 2092 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1347 jfenwick 1898 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1348 jfenwick 1879 ValueType& resvec=result->getVector();
1349     DataReady_ptr resptr=DataReady_ptr(result);
1350     int sample;
1351 jfenwick 1898 size_t outoffset; // offset in the output data
1352 jfenwick 1885 int totalsamples=getNumSamples();
1353 jfenwick 1899 const ValueType* res=0; // Vector storing the answer
1354     size_t resoffset=0; // where in the vector to find the answer
1355 jfenwick 2152 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1356 caltinay 2082 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1357 jfenwick 1885 for (sample=0;sample<totalsamples;++sample)
1358 jfenwick 1879 {
1359 jfenwick 2157 if (sample==0) {ENABLEDEBUG}
1360 jfenwick 2092 LAZYDEBUG(cout << "################################# " << sample << endl;)
1361 jfenwick 1885 #ifdef _OPENMP
1362 jfenwick 1898 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1363 jfenwick 1885 #else
1364 jfenwick 1899 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1365 jfenwick 1885 #endif
1366 jfenwick 2092 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1367 jfenwick 2157 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1368 jfenwick 1898 outoffset=result->getPointOffset(sample,0);
1369 jfenwick 2157 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1370 jfenwick 1898 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1371 jfenwick 1879 {
1372 jfenwick 2157 // LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << endl;)
1373 jfenwick 1898 resvec[outoffset]=(*res)[resoffset];
1374 jfenwick 1879 }
1375 jfenwick 2157 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1376 jfenwick 2092 LAZYDEBUG(cerr << "*********************************" << endl;)
1377 jfenwick 2157 DISABLEDEBUG
1378 jfenwick 1879 }
1379     return resptr;
1380     }
1381    
1382 jfenwick 1865 std::string
1383     DataLazy::toString() const
1384     {
1385 jfenwick 1886 ostringstream oss;
1386     oss << "Lazy Data:";
1387     intoString(oss);
1388     return oss.str();
1389 jfenwick 1865 }
1390    
1391 jfenwick 1899
1392 jfenwick 1886 void
1393     DataLazy::intoString(ostringstream& oss) const
1394     {
1395     switch (getOpgroup(m_op))
1396     {
1397 jfenwick 1889 case G_IDENTITY:
1398     if (m_id->isExpanded())
1399     {
1400     oss << "E";
1401     }
1402     else if (m_id->isTagged())
1403     {
1404     oss << "T";
1405     }
1406     else if (m_id->isConstant())
1407     {
1408     oss << "C";
1409     }
1410     else
1411     {
1412     oss << "?";
1413     }
1414 jfenwick 1886 oss << '@' << m_id.get();
1415     break;
1416     case G_BINARY:
1417     oss << '(';
1418     m_left->intoString(oss);
1419     oss << ' ' << opToString(m_op) << ' ';
1420     m_right->intoString(oss);
1421     oss << ')';
1422     break;
1423     case G_UNARY:
1424 jfenwick 2147 case G_UNARY_P:
1425 jfenwick 2037 case G_NP1OUT:
1426 jfenwick 2084 case G_NP1OUT_P:
1427 jfenwick 1886 oss << opToString(m_op) << '(';
1428     m_left->intoString(oss);
1429     oss << ')';
1430     break;
1431 jfenwick 2066 case G_TENSORPROD:
1432     oss << opToString(m_op) << '(';
1433     m_left->intoString(oss);
1434     oss << ", ";
1435     m_right->intoString(oss);
1436     oss << ')';
1437     break;
1438 jfenwick 1886 default:
1439     oss << "UNKNOWN";
1440     }
1441     }
1442    
1443 jfenwick 1865 DataAbstract*
1444     DataLazy::deepCopy()
1445     {
1446 jfenwick 1935 switch (getOpgroup(m_op))
1447 jfenwick 1865 {
1448 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1449     case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1450     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1451 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1452     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1453 jfenwick 1935 default:
1454     throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1455 jfenwick 1865 }
1456     }
1457    
1458    
1459 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1460     // Instances of DataReady can look at the size of their vectors.
1461     // For lazy though, it could be the size the data would be if it were resolved;
1462     // or it could be some function of the lengths of the DataReady instances which
1463     // form part of the expression.
1464     // Rather than have people making assumptions, I have disabled the method.
1465 jfenwick 1865 DataTypes::ValueType::size_type
1466     DataLazy::getLength() const
1467     {
1468 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1469 jfenwick 1865 }
1470    
1471    
1472     DataAbstract*
1473     DataLazy::getSlice(const DataTypes::RegionType& region) const
1474     {
1475 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1476 jfenwick 1865 }
1477    
1478 jfenwick 1935
1479     // To do this we need to rely on our child nodes
1480 jfenwick 1865 DataTypes::ValueType::size_type
1481     DataLazy::getPointOffset(int sampleNo,
1482 jfenwick 1935 int dataPointNo)
1483     {
1484     if (m_op==IDENTITY)
1485     {
1486     return m_id->getPointOffset(sampleNo,dataPointNo);
1487     }
1488     if (m_readytype!='E')
1489     {
1490     collapse();
1491     return m_id->getPointOffset(sampleNo,dataPointNo);
1492     }
1493     // at this point we do not have an identity node and the expression will be Expanded
1494     // so we only need to know which child to ask
1495     if (m_left->m_readytype=='E')
1496     {
1497     return m_left->getPointOffset(sampleNo,dataPointNo);
1498     }
1499     else
1500     {
1501     return m_right->getPointOffset(sampleNo,dataPointNo);
1502     }
1503     }
1504    
1505     // To do this we need to rely on our child nodes
1506     DataTypes::ValueType::size_type
1507     DataLazy::getPointOffset(int sampleNo,
1508 jfenwick 1865 int dataPointNo) const
1509     {
1510 jfenwick 1935 if (m_op==IDENTITY)
1511     {
1512     return m_id->getPointOffset(sampleNo,dataPointNo);
1513     }
1514     if (m_readytype=='E')
1515     {
1516     // at this point we do not have an identity node and the expression will be Expanded
1517     // so we only need to know which child to ask
1518     if (m_left->m_readytype=='E')
1519     {
1520     return m_left->getPointOffset(sampleNo,dataPointNo);
1521     }
1522     else
1523     {
1524     return m_right->getPointOffset(sampleNo,dataPointNo);
1525     }
1526     }
1527     if (m_readytype=='C')
1528     {
1529     return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1530     }
1531     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1532 jfenwick 1865 }
1533    
1534 jfenwick 1901 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1535     // to zero, all the tags from all the DataTags would be in the result.
1536     // However since they all have the same value (0) whether they are there or not should not matter.
1537     // So I have decided that for all types this method will create a constant 0.
1538     // It can be promoted up as required.
1539     // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1540     // but we can deal with that if it arrises.
1541     void
1542     DataLazy::setToZero()
1543     {
1544     DataTypes::ValueType v(getNoValues(),0);
1545     m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1546     m_op=IDENTITY;
1547     m_right.reset();
1548     m_left.reset();
1549     m_readytype='C';
1550     m_buffsRequired=1;
1551     }
1552    
1553 jfenwick 1865 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26