/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6168 - (hide annotations)
Wed Apr 13 03:08:12 2016 UTC (2 years, 11 months ago) by jfenwick
Original Path: trunk/escriptcore/src/DataLazy.cpp
File size: 62400 byte(s)
Making Lazy and the rest of escript use the same operator enumeration


1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
26 jfenwick 2199
27 jfenwick 5938 using namespace escript::DataTypes;
28    
29 caltinay 5972 #define NO_ARG
30    
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 jfenwick 2501 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42 jfenwick 2177
43 jfenwick 2795 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44 jfenwick 2472
45 jfenwick 2501
46 jfenwick 2795 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47    
48 jfenwick 1899 /*
49     How does DataLazy work?
50     ~~~~~~~~~~~~~~~~~~~~~~~
51    
52     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
53     denoted left and right.
54    
55     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
56     This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59 caltinay 4286 Note that IDENTITY is not considered a unary operation.
60 jfenwick 1899
61     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62     It must however form a DAG (directed acyclic graph).
63     I will refer to individual DataLazy objects with the structure as nodes.
64    
65     Each node also stores:
66     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67 caltinay 5972 evaluated.
68 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69 caltinay 5972 evaluate the expression.
70 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
71    
72     When a new node is created, the above values are computed based on the values in the child nodes.
73     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
74    
75     The resolve method, which produces a DataReady from a DataLazy, does the following:
76     1) Create a DataReady to hold the new result.
77     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
78     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
79    
80     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
81    
82     resolveSample returns a Vector* and an offset within that vector where the result is stored.
83     Normally, this would be v, but for identity nodes their internal vector is returned instead.
84    
85     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
86    
87     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89 jfenwick 2037
90 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91 jfenwick 2037 1) Add to the ES_optype.
92     2) determine what opgroup your operation belongs to (X)
93     3) add a string for the op to the end of ES_opstrings
94     4) increase ES_opcount
95     5) add an entry (X) to opgroups
96     6) add an entry to the switch in collapseToReady
97     7) add an entry to resolveX
98 jfenwick 1899 */
99    
100    
101 jfenwick 1865 using namespace std;
102 jfenwick 1868 using namespace boost;
103 jfenwick 1865
104     namespace escript
105     {
106    
107     namespace
108     {
109    
110 jfenwick 2777
111     // enabling this will print out when ever the maximum stacksize used by resolve increases
112     // it assumes _OPENMP is also in use
113     //#define LAZY_STACK_PROF
114    
115    
116    
117     #ifndef _OPENMP
118     #ifdef LAZY_STACK_PROF
119     #undef LAZY_STACK_PROF
120     #endif
121     #endif
122    
123    
124     #ifdef LAZY_STACK_PROF
125     std::vector<void*> stackstart(getNumberOfThreads());
126     std::vector<void*> stackend(getNumberOfThreads());
127     size_t maxstackuse=0;
128     #endif
129    
130 jfenwick 1886
131 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
132     FunctionSpace
133     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
134     {
135 caltinay 5972 // perhaps this should call interpolate and throw or something?
136     // maybe we need an interpolate node -
137     // that way, if interpolate is required in any other op we can just throw a
138     // programming error exception.
139 jfenwick 1879
140 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
141     FunctionSpace r=right->getFunctionSpace();
142     if (l!=r)
143     {
144 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
145     if (res==1)
146 jfenwick 1943 {
147 caltinay 5972 return l;
148 jfenwick 1943 }
149 jfenwick 4264 if (res==-1)
150 jfenwick 1943 {
151 caltinay 5972 return r;
152 jfenwick 1943 }
153     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
154     }
155     return l;
156 jfenwick 1865 }
157    
158     // return the shape of the result of "left op right"
159 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
160 jfenwick 1865 DataTypes::ShapeType
161     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162     {
163 caltinay 5972 if (left->getShape()!=right->getShape())
164     {
165     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
166     {
167     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
168     }
169 jfenwick 2721
170 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
171     {
172     return right->getShape();
173     }
174     if (right->getRank()==0)
175     {
176     return left->getShape();
177     }
178     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
179     }
180     return left->getShape();
181 jfenwick 1865 }
182    
183 jfenwick 2084 // return the shape for "op left"
184    
185     DataTypes::ShapeType
186 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
187 jfenwick 2084 {
188 caltinay 5972 switch(op)
189     {
190     case TRANS:
191     { // for the scoping of variables
192     const DataTypes::ShapeType& s=left->getShape();
193     DataTypes::ShapeType sh;
194     int rank=left->getRank();
195     if (axis_offset<0 || axis_offset>rank)
196     {
197 caltinay 2779 stringstream e;
198     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
199     throw DataException(e.str());
200 caltinay 5972 }
201     for (int i=0; i<rank; i++)
202     {
203     int index = (axis_offset+i)%rank;
204     sh.push_back(s[index]); // Append to new shape
205     }
206     return sh;
207     }
208     break;
209     case TRACE:
210     {
211     int rank=left->getRank();
212     if (rank<2)
213     {
214     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
215     }
216     if ((axis_offset>rank-2) || (axis_offset<0))
217     {
218     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
219     }
220     if (rank==2)
221     {
222     return DataTypes::scalarShape;
223     }
224     else if (rank==3)
225     {
226     DataTypes::ShapeType sh;
227     if (axis_offset==0)
228     {
229     sh.push_back(left->getShape()[2]);
230     }
231     else // offset==1
232     {
233     sh.push_back(left->getShape()[0]);
234     }
235     return sh;
236     }
237     else if (rank==4)
238     {
239     DataTypes::ShapeType sh;
240     const DataTypes::ShapeType& s=left->getShape();
241     if (axis_offset==0)
242     {
243     sh.push_back(s[2]);
244     sh.push_back(s[3]);
245     }
246     else if (axis_offset==1)
247     {
248     sh.push_back(s[0]);
249     sh.push_back(s[3]);
250     }
251     else // offset==2
252     {
253     sh.push_back(s[0]);
254     sh.push_back(s[1]);
255     }
256     return sh;
257     }
258     else // unknown rank
259     {
260     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
261     }
262     }
263     break;
264     default:
265     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
266     }
267 jfenwick 2084 }
268    
269 jfenwick 2496 DataTypes::ShapeType
270     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
271     {
272     // This code taken from the Data.cpp swapaxes() method
273     // Some of the checks are probably redundant here
274     int axis0_tmp,axis1_tmp;
275     const DataTypes::ShapeType& s=left->getShape();
276     DataTypes::ShapeType out_shape;
277     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
278     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
279     int rank=left->getRank();
280     if (rank<2) {
281     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
282     }
283     if (axis0<0 || axis0>rank-1) {
284 caltinay 2779 stringstream e;
285     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
286     throw DataException(e.str());
287 jfenwick 2496 }
288     if (axis1<0 || axis1>rank-1) {
289 caltinay 2782 stringstream e;
290 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
291     throw DataException(e.str());
292 jfenwick 2496 }
293     if (axis0 == axis1) {
294     throw DataException("Error - Data::swapaxes: axis indices must be different.");
295     }
296     if (axis0 > axis1) {
297     axis0_tmp=axis1;
298     axis1_tmp=axis0;
299     } else {
300     axis0_tmp=axis0;
301     axis1_tmp=axis1;
302     }
303     for (int i=0; i<rank; i++) {
304     if (i == axis0_tmp) {
305     out_shape.push_back(s[axis1_tmp]);
306     } else if (i == axis1_tmp) {
307     out_shape.push_back(s[axis0_tmp]);
308     } else {
309     out_shape.push_back(s[i]);
310     }
311     }
312     return out_shape;
313     }
314    
315    
316 jfenwick 2066 // determine the output shape for the general tensor product operation
317     // the additional parameters return information required later for the product
318     // the majority of this code is copy pasted from C_General_Tensor_Product
319     DataTypes::ShapeType
320     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
321 jfenwick 1865 {
322 caltinay 5972
323 jfenwick 2066 // Get rank and shape of inputs
324     int rank0 = left->getRank();
325     int rank1 = right->getRank();
326     const DataTypes::ShapeType& shape0 = left->getShape();
327     const DataTypes::ShapeType& shape1 = right->getShape();
328    
329     // Prepare for the loops of the product and verify compatibility of shapes
330     int start0=0, start1=0;
331 caltinay 5972 if (transpose == 0) {}
332     else if (transpose == 1) { start0 = axis_offset; }
333     else if (transpose == 2) { start1 = rank1-axis_offset; }
334     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
335 jfenwick 2066
336 jfenwick 2085 if (rank0<axis_offset)
337     {
338 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
339 jfenwick 2085 }
340 jfenwick 2066
341     // Adjust the shapes for transpose
342 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
343     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
344     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
345     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
346 jfenwick 2066
347     // Prepare for the loops of the product
348     SL=1, SM=1, SR=1;
349 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
350 jfenwick 2066 SL *= tmpShape0[i];
351     }
352 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
353 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
354     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
355     }
356     SM *= tmpShape0[i];
357     }
358 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
359 jfenwick 2066 SR *= tmpShape1[i];
360     }
361    
362     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
363 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
364     { // block to limit the scope of out_index
365 jfenwick 2066 int out_index=0;
366     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
367     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
368     }
369 jfenwick 2086
370     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
371     {
372     ostringstream os;
373     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
374     throw DataException(os.str());
375     }
376    
377 jfenwick 2066 return shape2;
378 jfenwick 1865 }
379    
380 caltinay 5972 } // end anonymous namespace
381 jfenwick 1865
382 jfenwick 2500 void DataLazy::LazyNodeSetup()
383     {
384     #ifdef _OPENMP
385     int numthreads=omp_get_max_threads();
386     m_samples.resize(numthreads*m_samplesize);
387     m_sampleids=new int[numthreads];
388     for (int i=0;i<numthreads;++i)
389     {
390     m_sampleids[i]=-1;
391     }
392     #else
393     m_samples.resize(m_samplesize);
394     m_sampleids=new int[1];
395     m_sampleids[0]=-1;
396     #endif // _OPENMP
397     }
398 jfenwick 1865
399 jfenwick 2500
400     // Creates an identity node
401 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
402 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
403     ,m_sampleids(0),
404     m_samples(1)
405 jfenwick 1865 {
406 jfenwick 1879 if (p->isLazy())
407     {
408 caltinay 5972 // I don't want identity of Lazy.
409     // Question: Why would that be so bad?
410     // Answer: We assume that the child of ID is something we can call getVector on
411     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
412 jfenwick 1879 }
413     else
414     {
415 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
416     makeIdentity(dr);
417 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
418 jfenwick 1879 }
419 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
420 jfenwick 1865 }
421    
422 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
423 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
424     m_op(op),
425     m_axis_offset(0),
426     m_transpose(0),
427     m_SL(0), m_SM(0), m_SR(0)
428 jfenwick 1886 {
429 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
430 jfenwick 1886 {
431 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
432 jfenwick 1886 }
433 jfenwick 2066
434 jfenwick 1886 DataLazy_ptr lleft;
435     if (!left->isLazy())
436     {
437 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
438 jfenwick 1886 }
439     else
440     {
441 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
442 jfenwick 1886 }
443 jfenwick 1889 m_readytype=lleft->m_readytype;
444 jfenwick 1886 m_left=lleft;
445     m_samplesize=getNumDPPSample()*getNoValues();
446 jfenwick 2177 m_children=m_left->m_children+1;
447     m_height=m_left->m_height+1;
448 jfenwick 2500 LazyNodeSetup();
449 jfenwick 2177 SIZELIMIT
450 jfenwick 1886 }
451    
452    
453 jfenwick 1943 // In this constructor we need to consider interpolation
454 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
455 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
456     m_op(op),
457     m_SL(0), m_SM(0), m_SR(0)
458 jfenwick 1879 {
459 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
460 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
461 jfenwick 1886 {
462 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
463 jfenwick 1886 }
464 jfenwick 1943
465 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
466 jfenwick 1943 {
467 caltinay 5972 FunctionSpace fs=getFunctionSpace();
468     Data ltemp(left);
469     Data tmp(ltemp,fs);
470     left=tmp.borrowDataPtr();
471 jfenwick 1943 }
472 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
473 jfenwick 1943 {
474 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
475     right=tmp.borrowDataPtr();
476 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
477 jfenwick 1943 }
478     left->operandCheck(*right);
479    
480 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
481 jfenwick 1879 {
482 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
483 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
484 jfenwick 1879 }
485     else
486     {
487 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
488 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
489 jfenwick 1879 }
490     if (right->isLazy())
491     {
492 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
493 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
494 jfenwick 1879 }
495     else
496     {
497 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
498 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
499 jfenwick 1879 }
500 jfenwick 1889 char lt=m_left->m_readytype;
501     char rt=m_right->m_readytype;
502     if (lt=='E' || rt=='E')
503     {
504 caltinay 5972 m_readytype='E';
505 jfenwick 1889 }
506     else if (lt=='T' || rt=='T')
507     {
508 caltinay 5972 m_readytype='T';
509 jfenwick 1889 }
510     else
511     {
512 caltinay 5972 m_readytype='C';
513 jfenwick 1889 }
514 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
515 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
516     m_height=max(m_left->m_height,m_right->m_height)+1;
517 jfenwick 2500 LazyNodeSetup();
518 jfenwick 2177 SIZELIMIT
519 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
520 jfenwick 1879 }
521    
522 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
523 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
524     m_op(op),
525     m_axis_offset(axis_offset),
526     m_transpose(transpose)
527 jfenwick 2066 {
528     if ((getOpgroup(op)!=G_TENSORPROD))
529     {
530 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
531 jfenwick 2066 }
532     if ((transpose>2) || (transpose<0))
533     {
534 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
535 jfenwick 2066 }
536 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
537 jfenwick 2066 {
538 caltinay 5972 FunctionSpace fs=getFunctionSpace();
539     Data ltemp(left);
540     Data tmp(ltemp,fs);
541     left=tmp.borrowDataPtr();
542 jfenwick 2066 }
543 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
544 jfenwick 2066 {
545 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
546     right=tmp.borrowDataPtr();
547 jfenwick 2066 }
548 jfenwick 2195 // left->operandCheck(*right);
549 jfenwick 1879
550 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
551 jfenwick 2066 {
552 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
553 jfenwick 2066 }
554     else
555     {
556 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
557 jfenwick 2066 }
558     if (right->isLazy())
559     {
560 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
561 jfenwick 2066 }
562     else
563     {
564 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
565 jfenwick 2066 }
566     char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')
569     {
570 caltinay 5972 m_readytype='E';
571 jfenwick 2066 }
572     else if (lt=='T' || rt=='T')
573     {
574 caltinay 5972 m_readytype='T';
575 jfenwick 2066 }
576     else
577     {
578 caltinay 5972 m_readytype='C';
579 jfenwick 2066 }
580     m_samplesize=getNumDPPSample()*getNoValues();
581 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;
583 jfenwick 2500 LazyNodeSetup();
584 jfenwick 2177 SIZELIMIT
585 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
586 jfenwick 2066 }
587    
588    
589 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
590 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
591     m_op(op),
592     m_axis_offset(axis_offset),
593     m_transpose(0),
594     m_tol(0)
595 jfenwick 2084 {
596     if ((getOpgroup(op)!=G_NP1OUT_P))
597     {
598 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
599 jfenwick 2084 }
600     DataLazy_ptr lleft;
601     if (!left->isLazy())
602     {
603 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
604 jfenwick 2084 }
605     else
606     {
607 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
608 jfenwick 2084 }
609     m_readytype=lleft->m_readytype;
610     m_left=lleft;
611     m_samplesize=getNumDPPSample()*getNoValues();
612 jfenwick 2177 m_children=m_left->m_children+1;
613     m_height=m_left->m_height+1;
614 jfenwick 2500 LazyNodeSetup();
615 jfenwick 2177 SIZELIMIT
616 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
617 jfenwick 2084 }
618    
619 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
620 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
621     m_op(op),
622     m_axis_offset(0),
623     m_transpose(0),
624     m_tol(tol)
625 jfenwick 2147 {
626     if ((getOpgroup(op)!=G_UNARY_P))
627     {
628 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
629 jfenwick 2147 }
630     DataLazy_ptr lleft;
631     if (!left->isLazy())
632     {
633 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
634 jfenwick 2147 }
635     else
636     {
637 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
638 jfenwick 2147 }
639     m_readytype=lleft->m_readytype;
640     m_left=lleft;
641     m_samplesize=getNumDPPSample()*getNoValues();
642 jfenwick 2177 m_children=m_left->m_children+1;
643     m_height=m_left->m_height+1;
644 jfenwick 2500 LazyNodeSetup();
645 jfenwick 2177 SIZELIMIT
646 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
647     }
648 jfenwick 2084
649 jfenwick 2496
650     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
651 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
652     m_op(op),
653     m_axis_offset(axis0),
654     m_transpose(axis1),
655     m_tol(0)
656 jfenwick 2496 {
657     if ((getOpgroup(op)!=G_NP1OUT_2P))
658     {
659 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
660 jfenwick 2496 }
661     DataLazy_ptr lleft;
662     if (!left->isLazy())
663     {
664 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
665 jfenwick 2496 }
666     else
667     {
668 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
669 jfenwick 2496 }
670     m_readytype=lleft->m_readytype;
671     m_left=lleft;
672     m_samplesize=getNumDPPSample()*getNoValues();
673     m_children=m_left->m_children+1;
674     m_height=m_left->m_height+1;
675 jfenwick 2500 LazyNodeSetup();
676 jfenwick 2496 SIZELIMIT
677     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
678     }
679    
680 jfenwick 3035
681     namespace
682     {
683    
684     inline int max3(int a, int b, int c)
685     {
686 caltinay 5972 int t=(a>b?a:b);
687     return (t>c?t:c);
688 jfenwick 3035
689     }
690     }
691    
692     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
693 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
694     m_op(CONDEVAL),
695     m_axis_offset(0),
696     m_transpose(0),
697     m_tol(0)
698 jfenwick 3035 {
699    
700     DataLazy_ptr lmask;
701     DataLazy_ptr lleft;
702     DataLazy_ptr lright;
703     if (!mask->isLazy())
704     {
705 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
706 jfenwick 3035 }
707     else
708     {
709 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
710 jfenwick 3035 }
711     if (!left->isLazy())
712     {
713 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
714 jfenwick 3035 }
715     else
716     {
717 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
718 jfenwick 3035 }
719     if (!right->isLazy())
720     {
721 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
722 jfenwick 3035 }
723     else
724     {
725 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
726 jfenwick 3035 }
727     m_readytype=lmask->m_readytype;
728     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
729     {
730 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
731 jfenwick 3035 }
732     m_left=lleft;
733     m_right=lright;
734     m_mask=lmask;
735     m_samplesize=getNumDPPSample()*getNoValues();
736     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
737     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
738     LazyNodeSetup();
739     SIZELIMIT
740     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
741     }
742    
743    
744    
745 jfenwick 1865 DataLazy::~DataLazy()
746     {
747 jfenwick 2500 delete[] m_sampleids;
748 jfenwick 1865 }
749    
750 jfenwick 1879
751 jfenwick 1899 /*
752     \brief Evaluates the expression using methods on Data.
753     This does the work for the collapse method.
754     For reasons of efficiency do not call this method on DataExpanded nodes.
755     */
756 jfenwick 1889 DataReady_ptr
757 jfenwick 4621 DataLazy::collapseToReady() const
758 jfenwick 1889 {
759     if (m_readytype=='E')
760 caltinay 5972 { // this is more an efficiency concern than anything else
761 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
762     }
763     if (m_op==IDENTITY)
764     {
765     return m_id;
766     }
767     DataReady_ptr pleft=m_left->collapseToReady();
768     Data left(pleft);
769     Data right;
770 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
771 jfenwick 1889 {
772     right=Data(m_right->collapseToReady());
773     }
774     Data result;
775     switch(m_op)
776     {
777     case ADD:
778 caltinay 5972 result=left+right;
779     break;
780     case SUB:
781     result=left-right;
782     break;
783     case MUL:
784     result=left*right;
785     break;
786     case DIV:
787     result=left/right;
788     break;
789 jfenwick 1889 case SIN:
790 caltinay 5972 result=left.sin();
791     break;
792 jfenwick 1889 case COS:
793 caltinay 5972 result=left.cos();
794     break;
795 jfenwick 1889 case TAN:
796 caltinay 5972 result=left.tan();
797     break;
798 jfenwick 1889 case ASIN:
799 caltinay 5972 result=left.asin();
800     break;
801 jfenwick 1889 case ACOS:
802 caltinay 5972 result=left.acos();
803     break;
804 jfenwick 1889 case ATAN:
805 caltinay 5972 result=left.atan();
806     break;
807 jfenwick 1889 case SINH:
808 caltinay 5972 result=left.sinh();
809     break;
810 jfenwick 1889 case COSH:
811 caltinay 5972 result=left.cosh();
812     break;
813 jfenwick 1889 case TANH:
814 caltinay 5972 result=left.tanh();
815     break;
816 jfenwick 1889 case ERF:
817 caltinay 5972 result=left.erf();
818     break;
819 jfenwick 1889 case ASINH:
820 caltinay 5972 result=left.asinh();
821     break;
822 jfenwick 1889 case ACOSH:
823 caltinay 5972 result=left.acosh();
824     break;
825 jfenwick 1889 case ATANH:
826 caltinay 5972 result=left.atanh();
827     break;
828 jfenwick 1889 case LOG10:
829 caltinay 5972 result=left.log10();
830     break;
831 jfenwick 1889 case LOG:
832 caltinay 5972 result=left.log();
833     break;
834 jfenwick 1889 case SIGN:
835 caltinay 5972 result=left.sign();
836     break;
837 jfenwick 1889 case ABS:
838 caltinay 5972 result=left.abs();
839     break;
840 jfenwick 1889 case NEG:
841 caltinay 5972 result=left.neg();
842     break;
843 jfenwick 1889 case POS:
844 caltinay 5972 // it doesn't mean anything for delayed.
845     // it will just trigger a deep copy of the lazy object
846     throw DataException("Programmer error - POS not supported for lazy data.");
847     break;
848 jfenwick 1889 case EXP:
849 caltinay 5972 result=left.exp();
850     break;
851 jfenwick 1889 case SQRT:
852 caltinay 5972 result=left.sqrt();
853     break;
854 jfenwick 1889 case RECIP:
855 caltinay 5972 result=left.oneOver();
856     break;
857 jfenwick 1889 case GZ:
858 caltinay 5972 result=left.wherePositive();
859     break;
860 jfenwick 1889 case LZ:
861 caltinay 5972 result=left.whereNegative();
862     break;
863 jfenwick 1889 case GEZ:
864 caltinay 5972 result=left.whereNonNegative();
865     break;
866 jfenwick 1889 case LEZ:
867 caltinay 5972 result=left.whereNonPositive();
868     break;
869 jfenwick 2147 case NEZ:
870 caltinay 5972 result=left.whereNonZero(m_tol);
871     break;
872 jfenwick 2147 case EZ:
873 caltinay 5972 result=left.whereZero(m_tol);
874     break;
875 jfenwick 2037 case SYM:
876 caltinay 5972 result=left.symmetric();
877     break;
878 jfenwick 2037 case NSYM:
879 jfenwick 6110 result=left.antisymmetric();
880 caltinay 5972 break;
881 jfenwick 2066 case PROD:
882 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
883     break;
884 jfenwick 2084 case TRANS:
885 caltinay 5972 result=left.transpose(m_axis_offset);
886     break;
887 jfenwick 2084 case TRACE:
888 caltinay 5972 result=left.trace(m_axis_offset);
889     break;
890 jfenwick 2496 case SWAP:
891 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
892     break;
893 jfenwick 2721 case MINVAL:
894 caltinay 5972 result=left.minval();
895     break;
896 jfenwick 2721 case MAXVAL:
897 caltinay 5972 result=left.minval();
898     break;
899 jfenwick 6125 case HER:
900     result=left.hermitian();
901     break;
902 jfenwick 1889 default:
903 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
904 jfenwick 1889 }
905     return result.borrowReadyPtr();
906     }
907    
908 jfenwick 1899 /*
909     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
910     This method uses the original methods on the Data class to evaluate the expressions.
911     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
912     the purpose of using DataLazy in the first place).
913     */
914 jfenwick 1889 void
915 jfenwick 4621 DataLazy::collapse() const
916 jfenwick 1889 {
917     if (m_op==IDENTITY)
918     {
919 caltinay 5972 return;
920 jfenwick 1889 }
921     if (m_readytype=='E')
922 caltinay 5972 { // this is more an efficiency concern than anything else
923 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
924     }
925     m_id=collapseToReady();
926     m_op=IDENTITY;
927     }
928    
929 jfenwick 2500 // The result will be stored in m_samples
930     // The return value is a pointer to the DataVector, offset is the offset within the return value
931 jfenwick 5938 const DataTypes::RealVectorType*
932 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
933 jfenwick 2500 {
934     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
935 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
936 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
937     {
938 caltinay 5972 collapse();
939 jfenwick 2500 }
940 caltinay 5972 if (m_op==IDENTITY)
941 jfenwick 2500 {
942 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
943 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
944 jfenwick 2777 #ifdef LAZY_STACK_PROF
945     int x;
946     if (&x<stackend[omp_get_thread_num()])
947     {
948     stackend[omp_get_thread_num()]=&x;
949     }
950     #endif
951 jfenwick 2500 return &(vec);
952     }
953     if (m_readytype!='E')
954     {
955     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
956     }
957 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
958     {
959 caltinay 5972 roffset=tid*m_samplesize;
960     return &(m_samples); // sample is already resolved
961 jfenwick 2501 }
962     m_sampleids[tid]=sampleNo;
963 jfenwick 3035
964 jfenwick 2500 switch (getOpgroup(m_op))
965     {
966     case G_UNARY:
967     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
968     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
969     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
970     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
971     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
972     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
973 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
974 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
975 jfenwick 2500 default:
976     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
977     }
978     }
979    
980 jfenwick 5938 const DataTypes::RealVectorType*
981 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
982 jfenwick 2500 {
983 caltinay 5972 // we assume that any collapsing has been done before we get here
984     // since we only have one argument we don't need to think about only
985     // processing single points.
986     // we will also know we won't get identity nodes
987 jfenwick 2500 if (m_readytype!='E')
988     {
989     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
990     }
991     if (m_op==IDENTITY)
992     {
993     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
994     }
995 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
996 jfenwick 2500 const double* left=&((*leftres)[roffset]);
997     roffset=m_samplesize*tid;
998     double* result=&(m_samples[roffset]);
999 jfenwick 6168 if (m_op==POS)
1000 jfenwick 2500 {
1001 jfenwick 6168 // this should be prevented earlier
1002     // operation is meaningless for lazy
1003     throw DataException("Programmer error - POS not supported for lazy data.");
1004 jfenwick 2500 }
1005 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1006     left,
1007     result,
1008 jfenwick 6168 m_op,
1009 jfenwick 6066 m_tol);
1010 jfenwick 2500 return &(m_samples);
1011     }
1012    
1013    
1014 jfenwick 5938 const DataTypes::RealVectorType*
1015 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1016 jfenwick 2721 {
1017 caltinay 5972 // we assume that any collapsing has been done before we get here
1018     // since we only have one argument we don't need to think about only
1019     // processing single points.
1020     // we will also know we won't get identity nodes
1021 jfenwick 2721 if (m_readytype!='E')
1022     {
1023     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1024     }
1025     if (m_op==IDENTITY)
1026     {
1027     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1028     }
1029     size_t loffset=0;
1030 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1031 jfenwick 2721
1032     roffset=m_samplesize*tid;
1033 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1034 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1035 jfenwick 2721 double* result=&(m_samples[roffset]);
1036     switch (m_op)
1037     {
1038     case MINVAL:
1039 caltinay 5972 {
1040     for (unsigned int z=0;z<ndpps;++z)
1041     {
1042     FMin op;
1043 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1044 caltinay 5972 loffset+=psize;
1045     result++;
1046     }
1047     }
1048     break;
1049 jfenwick 2721 case MAXVAL:
1050 caltinay 5972 {
1051     for (unsigned int z=0;z<ndpps;++z)
1052     {
1053     FMax op;
1054 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1055 caltinay 5972 loffset+=psize;
1056     result++;
1057     }
1058     }
1059     break;
1060 jfenwick 2721 default:
1061 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1062 jfenwick 2721 }
1063     return &(m_samples);
1064     }
1065    
1066 jfenwick 5938 const DataTypes::RealVectorType*
1067 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1068 jfenwick 2500 {
1069 caltinay 5972 // we assume that any collapsing has been done before we get here
1070     // since we only have one argument we don't need to think about only
1071     // processing single points.
1072 jfenwick 2500 if (m_readytype!='E')
1073     {
1074     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1075     }
1076     if (m_op==IDENTITY)
1077     {
1078     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1079     }
1080     size_t subroffset;
1081 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1082 jfenwick 2500 roffset=m_samplesize*tid;
1083     size_t loop=0;
1084     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1085     size_t step=getNoValues();
1086     size_t offset=roffset;
1087     switch (m_op)
1088     {
1089     case SYM:
1090 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1091     {
1092 jfenwick 6083 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1093 caltinay 5972 subroffset+=step;
1094     offset+=step;
1095     }
1096     break;
1097 jfenwick 2500 case NSYM:
1098 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1099     {
1100 jfenwick 6114 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1101 caltinay 5972 subroffset+=step;
1102     offset+=step;
1103     }
1104     break;
1105 jfenwick 2500 default:
1106 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1107 jfenwick 2500 }
1108     return &m_samples;
1109     }
1110    
1111 jfenwick 5938 const DataTypes::RealVectorType*
1112 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1113 jfenwick 2500 {
1114 caltinay 5972 // we assume that any collapsing has been done before we get here
1115     // since we only have one argument we don't need to think about only
1116     // processing single points.
1117 jfenwick 2500 if (m_readytype!='E')
1118     {
1119     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1120     }
1121     if (m_op==IDENTITY)
1122     {
1123     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1124     }
1125     size_t subroffset;
1126     size_t offset;
1127 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1128 jfenwick 2500 roffset=m_samplesize*tid;
1129     offset=roffset;
1130     size_t loop=0;
1131     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1132     size_t outstep=getNoValues();
1133     size_t instep=m_left->getNoValues();
1134     switch (m_op)
1135     {
1136     case TRACE:
1137 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1138     {
1139 jfenwick 6083 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1140 caltinay 5972 subroffset+=instep;
1141     offset+=outstep;
1142     }
1143     break;
1144 jfenwick 2500 case TRANS:
1145 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1146     {
1147 jfenwick 6083 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1148 caltinay 5972 subroffset+=instep;
1149     offset+=outstep;
1150     }
1151     break;
1152 jfenwick 2500 default:
1153 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1154 jfenwick 2500 }
1155     return &m_samples;
1156     }
1157    
1158    
1159 jfenwick 5938 const DataTypes::RealVectorType*
1160 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1161 jfenwick 2500 {
1162     if (m_readytype!='E')
1163     {
1164     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1165     }
1166     if (m_op==IDENTITY)
1167     {
1168     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1169     }
1170     size_t subroffset;
1171     size_t offset;
1172 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1173 jfenwick 2500 roffset=m_samplesize*tid;
1174     offset=roffset;
1175     size_t loop=0;
1176     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1177     size_t outstep=getNoValues();
1178     size_t instep=m_left->getNoValues();
1179     switch (m_op)
1180     {
1181     case SWAP:
1182 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1183     {
1184 jfenwick 6083 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1185 caltinay 5972 subroffset+=instep;
1186     offset+=outstep;
1187     }
1188     break;
1189 jfenwick 2500 default:
1190 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1191 jfenwick 2500 }
1192     return &m_samples;
1193     }
1194    
1195 jfenwick 5938 const DataTypes::RealVectorType*
1196 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1197 jfenwick 3035 {
1198     if (m_readytype!='E')
1199     {
1200     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1201     }
1202     if (m_op!=CONDEVAL)
1203     {
1204     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1205     }
1206     size_t subroffset;
1207 jfenwick 2500
1208 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1209     const RealVectorType* srcres=0;
1210 jfenwick 3035 if ((*maskres)[subroffset]>0)
1211     {
1212 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1213 jfenwick 3035 }
1214     else
1215     {
1216 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1217 jfenwick 3035 }
1218    
1219     // Now we need to copy the result
1220    
1221     roffset=m_samplesize*tid;
1222     for (int i=0;i<m_samplesize;++i)
1223     {
1224 caltinay 5972 m_samples[roffset+i]=(*srcres)[subroffset+i];
1225 jfenwick 3035 }
1226    
1227     return &m_samples;
1228     }
1229    
1230 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1231     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1232     // If both children are expanded, then we can process them in a single operation (we treat
1233     // the whole sample as one big datapoint.
1234     // If one of the children is not expanded, then we need to treat each point in the sample
1235     // individually.
1236     // There is an additional complication when scalar operations are considered.
1237     // For example, 2+Vector.
1238     // In this case each double within the point is treated individually
1239 jfenwick 5938 const DataTypes::RealVectorType*
1240 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1241 jfenwick 2500 {
1242     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1243    
1244 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1245     // first work out which of the children are expanded
1246 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1247     bool rightExp=(m_right->m_readytype=='E');
1248     if (!leftExp && !rightExp)
1249     {
1250 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1251 jfenwick 2500 }
1252     bool leftScalar=(m_left->getRank()==0);
1253     bool rightScalar=(m_right->getRank()==0);
1254     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1255     {
1256 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1257 jfenwick 2500 }
1258     size_t leftsize=m_left->getNoValues();
1259     size_t rightsize=m_right->getNoValues();
1260 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1261     int leftstep=0; // how far should the left offset advance after each step
1262 jfenwick 2500 int rightstep=0;
1263 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1264     int oleftstep=0; // the o variables refer to the outer loop
1265     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1266 jfenwick 2500 int onumsteps=1;
1267    
1268 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1269 jfenwick 2500 bool RES=(rightExp && rightScalar);
1270 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1271 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1272 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1273 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1274 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1275 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1276    
1277 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1278 jfenwick 2500 {
1279 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1280     leftstep=0;
1281     rightstep=0;
1282     numsteps=1;
1283 jfenwick 2500 }
1284     else if (LES || RES)
1285     {
1286 caltinay 5972 chunksize=1;
1287     if (LES) // left is an expanded scalar
1288     {
1289     if (RS)
1290     {
1291     leftstep=1;
1292     rightstep=0;
1293     numsteps=m_left->getNumDPPSample();
1294     }
1295     else // RN or REN
1296     {
1297     leftstep=0;
1298     oleftstep=1;
1299     rightstep=1;
1300     orightstep=(RN ? -(int)rightsize : 0);
1301     numsteps=rightsize;
1302     onumsteps=m_left->getNumDPPSample();
1303     }
1304     }
1305     else // right is an expanded scalar
1306     {
1307     if (LS)
1308     {
1309     rightstep=1;
1310     leftstep=0;
1311     numsteps=m_right->getNumDPPSample();
1312     }
1313     else
1314     {
1315     rightstep=0;
1316     orightstep=1;
1317     leftstep=1;
1318     oleftstep=(LN ? -(int)leftsize : 0);
1319     numsteps=leftsize;
1320     onumsteps=m_right->getNumDPPSample();
1321     }
1322     }
1323 jfenwick 2500 }
1324 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1325 jfenwick 2500 {
1326 caltinay 5972 if (LEN) // and Right will be a single value
1327     {
1328     chunksize=rightsize;
1329     leftstep=rightsize;
1330     rightstep=0;
1331     numsteps=m_left->getNumDPPSample();
1332     if (RS)
1333     {
1334     numsteps*=leftsize;
1335     }
1336     }
1337     else // REN
1338     {
1339     chunksize=leftsize;
1340     rightstep=leftsize;
1341     leftstep=0;
1342     numsteps=m_right->getNumDPPSample();
1343     if (LS)
1344     {
1345     numsteps*=rightsize;
1346     }
1347     }
1348 jfenwick 2500 }
1349    
1350 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1351     // Get the values of sub-expressions
1352 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1353     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1354 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1355     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1356     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1357     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1358     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1359     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1360     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1361    
1362     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1363     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1364    
1365    
1366     roffset=m_samplesize*tid;
1367 caltinay 5972 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1368 jfenwick 2500 switch(m_op)
1369     {
1370     case ADD:
1371 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1372 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1373 jfenwick 6042 &(*left)[0],
1374     &(*right)[0],
1375     chunksize,
1376     onumsteps,
1377     numsteps,
1378     resultStep,
1379     leftstep,
1380     rightstep,
1381     oleftstep,
1382     orightstep,
1383 jfenwick 6056 lroffset,
1384     rroffset,
1385 jfenwick 6168 escript::ES_optype::ADD);
1386 caltinay 5972 break;
1387 jfenwick 2500 case SUB:
1388 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1389 jfenwick 6057 &(*left)[0],
1390     &(*right)[0],
1391     chunksize,
1392     onumsteps,
1393     numsteps,
1394     resultStep,
1395     leftstep,
1396     rightstep,
1397     oleftstep,
1398     orightstep,
1399     lroffset,
1400     rroffset,
1401 jfenwick 6168 escript::ES_optype::SUB);
1402 jfenwick 6057 //PROC_OP(NO_ARG,minus<double>());
1403 caltinay 5972 break;
1404 jfenwick 2500 case MUL:
1405 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1406 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1407 jfenwick 6057 &(*left)[0],
1408     &(*right)[0],
1409     chunksize,
1410     onumsteps,
1411     numsteps,
1412     resultStep,
1413     leftstep,
1414     rightstep,
1415     oleftstep,
1416     orightstep,
1417     lroffset,
1418     rroffset,
1419 jfenwick 6168 escript::ES_optype::MUL);
1420 caltinay 5972 break;
1421 jfenwick 2500 case DIV:
1422 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1423 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1424 jfenwick 6057 &(*left)[0],
1425     &(*right)[0],
1426     chunksize,
1427     onumsteps,
1428     numsteps,
1429     resultStep,
1430     leftstep,
1431     rightstep,
1432     oleftstep,
1433     orightstep,
1434     lroffset,
1435     rroffset,
1436 jfenwick 6168 escript::ES_optype::DIV);
1437 caltinay 5972 break;
1438 jfenwick 2500 case POW:
1439 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1440 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1441 jfenwick 6057 &(*left)[0],
1442     &(*right)[0],
1443     chunksize,
1444     onumsteps,
1445     numsteps,
1446     resultStep,
1447     leftstep,
1448     rightstep,
1449     oleftstep,
1450     orightstep,
1451     lroffset,
1452     rroffset,
1453 jfenwick 6168 escript::ES_optype::POW);
1454 caltinay 5972 break;
1455 jfenwick 2500 default:
1456 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1457 jfenwick 2500 }
1458     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1459     return &m_samples;
1460     }
1461    
1462    
1463     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1464     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1465     // unlike the other resolve helpers, we must treat these datapoints separately.
1466 jfenwick 5938 const DataTypes::RealVectorType*
1467 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1468 jfenwick 2500 {
1469     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1470    
1471 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1472     // first work out which of the children are expanded
1473 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1474     bool rightExp=(m_right->m_readytype=='E');
1475     int steps=getNumDPPSample();
1476 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1477 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1478    
1479     int resultStep=getNoValues();
1480     roffset=m_samplesize*tid;
1481     size_t offset=roffset;
1482    
1483 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1484 jfenwick 2500
1485 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1486 jfenwick 2500
1487     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1488     cout << getNoValues() << endl;)
1489    
1490    
1491     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1492     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1493     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1494     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1495     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1496     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1497     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1498    
1499 caltinay 5972 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1500 jfenwick 2500 switch(m_op)
1501     {
1502     case PROD:
1503 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1504     {
1505     const double *ptr_0 = &((*left)[lroffset]);
1506     const double *ptr_1 = &((*right)[rroffset]);
1507 jfenwick 2500
1508     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1509     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1510    
1511 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1512 jfenwick 2500
1513 caltinay 5972 lroffset+=leftStep;
1514     rroffset+=rightStep;
1515     }
1516     break;
1517 jfenwick 2500 default:
1518 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1519 jfenwick 2500 }
1520     roffset=offset;
1521     return &m_samples;
1522     }
1523    
1524 jfenwick 1898
1525 jfenwick 5938 const DataTypes::RealVectorType*
1526 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1527 jfenwick 1879 {
1528 jfenwick 2271 #ifdef _OPENMP
1529 caltinay 5972 int tid=omp_get_thread_num();
1530 jfenwick 2271 #else
1531 caltinay 5972 int tid=0;
1532 jfenwick 2271 #endif
1533 jfenwick 2777
1534     #ifdef LAZY_STACK_PROF
1535 caltinay 5972 stackstart[tid]=&tid;
1536     stackend[tid]=&tid;
1537     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1538     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1539     #pragma omp critical
1540     if (d>maxstackuse)
1541     {
1542 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1543 caltinay 5972 maxstackuse=d;
1544     }
1545     return r;
1546 jfenwick 2777 #else
1547 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1548 jfenwick 2777 #endif
1549 jfenwick 2271 }
1550    
1551    
1552 jfenwick 2497 // This needs to do the work of the identity constructor
1553 jfenwick 2177 void
1554     DataLazy::resolveToIdentity()
1555     {
1556     if (m_op==IDENTITY)
1557 caltinay 5972 return;
1558 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1559 jfenwick 2177 makeIdentity(p);
1560     }
1561 jfenwick 1889
1562 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1563     {
1564     m_op=IDENTITY;
1565     m_axis_offset=0;
1566     m_transpose=0;
1567     m_SL=m_SM=m_SR=0;
1568     m_children=m_height=0;
1569     m_id=p;
1570     if(p->isConstant()) {m_readytype='C';}
1571     else if(p->isExpanded()) {m_readytype='E';}
1572     else if (p->isTagged()) {m_readytype='T';}
1573     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1574     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1575     m_left.reset();
1576     m_right.reset();
1577     }
1578    
1579 jfenwick 2497
1580     DataReady_ptr
1581     DataLazy::resolve()
1582     {
1583     resolveToIdentity();
1584     return m_id;
1585     }
1586    
1587 jfenwick 2799
1588     /* This is really a static method but I think that caused problems in windows */
1589     void
1590     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1591     {
1592     if (dats.empty())
1593     {
1594 caltinay 5972 return;
1595 jfenwick 2799 }
1596     vector<DataLazy*> work;
1597     FunctionSpace fs=dats[0]->getFunctionSpace();
1598     bool match=true;
1599 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1600 jfenwick 2799 {
1601 caltinay 5972 if (dats[i]->m_readytype!='E')
1602     {
1603     dats[i]->collapse();
1604     }
1605     if (dats[i]->m_op!=IDENTITY)
1606     {
1607     work.push_back(dats[i]);
1608     if (fs!=dats[i]->getFunctionSpace())
1609     {
1610     match=false;
1611     }
1612     }
1613 jfenwick 2799 }
1614     if (work.empty())
1615     {
1616 caltinay 5972 return; // no work to do
1617 jfenwick 2799 }
1618 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1619     { // it is possible that dats[0] is one of the objects which we discarded and
1620     // all the other functionspaces match.
1621     vector<DataExpanded*> dep;
1622 jfenwick 6042 vector<RealVectorType*> vecs;
1623 caltinay 5972 for (int i=0;i<work.size();++i)
1624     {
1625 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1626 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1627     }
1628     int totalsamples=work[0]->getNumSamples();
1629 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1630 caltinay 5972 int sample;
1631     #pragma omp parallel private(sample, res)
1632     {
1633     size_t roffset=0;
1634     #pragma omp for schedule(static)
1635     for (sample=0;sample<totalsamples;++sample)
1636     {
1637     roffset=0;
1638     int j;
1639     for (j=work.size()-1;j>=0;--j)
1640     {
1641 jfenwick 2799 #ifdef _OPENMP
1642 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1643 jfenwick 2799 #else
1644 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1645 jfenwick 2799 #endif
1646 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1647     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1648     }
1649     }
1650     }
1651     // Now we need to load the new results as identity ops into the lazy nodes
1652     for (int i=work.size()-1;i>=0;--i)
1653     {
1654 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1655 caltinay 5972 }
1656 jfenwick 2799 }
1657 caltinay 5972 else // functionspaces do not match
1658 jfenwick 2799 {
1659 caltinay 5972 for (int i=0;i<work.size();++i)
1660     {
1661     work[i]->resolveToIdentity();
1662     }
1663 jfenwick 2799 }
1664     }
1665    
1666    
1667    
1668 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1669     DataReady_ptr
1670     DataLazy::resolveNodeWorker()
1671     {
1672 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1673 jfenwick 2500 {
1674     collapse();
1675     }
1676 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1677 jfenwick 2500 {
1678     return m_id;
1679     }
1680 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1681 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1682     RealVectorType& resvec=result->getVectorRW();
1683 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1684    
1685     int sample;
1686     int totalsamples=getNumSamples();
1687 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1688 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1689 jfenwick 2777 #pragma omp parallel private(sample,res)
1690 jfenwick 2500 {
1691 caltinay 5972 size_t roffset=0;
1692 jfenwick 2777 #ifdef LAZY_STACK_PROF
1693 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1694     stackend[omp_get_thread_num()]=&roffset;
1695 jfenwick 2777 #endif
1696 caltinay 5972 #pragma omp for schedule(static)
1697     for (sample=0;sample<totalsamples;++sample)
1698     {
1699     roffset=0;
1700 jfenwick 2500 #ifdef _OPENMP
1701 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1702 jfenwick 2500 #else
1703 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1704 jfenwick 2500 #endif
1705     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1706     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1707 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1708     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1709     }
1710 jfenwick 2500 }
1711 jfenwick 2777 #ifdef LAZY_STACK_PROF
1712     for (int i=0;i<getNumberOfThreads();++i)
1713     {
1714 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1715     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1716     if (r>maxstackuse)
1717     {
1718     maxstackuse=r;
1719     }
1720 jfenwick 2777 }
1721     cout << "Max resolve Stack use=" << maxstackuse << endl;
1722     #endif
1723 jfenwick 2500 return resptr;
1724     }
1725    
1726 jfenwick 1865 std::string
1727     DataLazy::toString() const
1728     {
1729 jfenwick 1886 ostringstream oss;
1730 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1731 jfenwick 2795 switch (escriptParams.getLAZY_STR_FMT())
1732 jfenwick 2737 {
1733 caltinay 5972 case 1: // tree format
1734     oss << endl;
1735     intoTreeString(oss,"");
1736     break;
1737     case 2: // just the depth
1738     break;
1739 jfenwick 2795 default:
1740 caltinay 5972 intoString(oss);
1741     break;
1742 jfenwick 2737 }
1743 jfenwick 1886 return oss.str();
1744 jfenwick 1865 }
1745    
1746 jfenwick 1899
1747 jfenwick 1886 void
1748     DataLazy::intoString(ostringstream& oss) const
1749     {
1750 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1751 jfenwick 1886 switch (getOpgroup(m_op))
1752     {
1753 jfenwick 1889 case G_IDENTITY:
1754 caltinay 5972 if (m_id->isExpanded())
1755     {
1756     oss << "E";
1757     }
1758     else if (m_id->isTagged())
1759     {
1760     oss << "T";
1761     }
1762     else if (m_id->isConstant())
1763     {
1764     oss << "C";
1765     }
1766     else
1767     {
1768     oss << "?";
1769     }
1770     oss << '@' << m_id.get();
1771     break;
1772 jfenwick 1886 case G_BINARY:
1773 caltinay 5972 oss << '(';
1774     m_left->intoString(oss);
1775     oss << ' ' << opToString(m_op) << ' ';
1776     m_right->intoString(oss);
1777     oss << ')';
1778     break;
1779 jfenwick 1886 case G_UNARY:
1780 jfenwick 2147 case G_UNARY_P:
1781 jfenwick 2037 case G_NP1OUT:
1782 jfenwick 2084 case G_NP1OUT_P:
1783 jfenwick 2721 case G_REDUCTION:
1784 caltinay 5972 oss << opToString(m_op) << '(';
1785     m_left->intoString(oss);
1786     oss << ')';
1787     break;
1788 jfenwick 2066 case G_TENSORPROD:
1789 caltinay 5972 oss << opToString(m_op) << '(';
1790     m_left->intoString(oss);
1791     oss << ", ";
1792     m_right->intoString(oss);
1793     oss << ')';
1794     break;
1795 jfenwick 2496 case G_NP1OUT_2P:
1796 caltinay 5972 oss << opToString(m_op) << '(';
1797     m_left->intoString(oss);
1798     oss << ", " << m_axis_offset << ", " << m_transpose;
1799     oss << ')';
1800     break;
1801 jfenwick 3035 case G_CONDEVAL:
1802 caltinay 5972 oss << opToString(m_op)<< '(' ;
1803     m_mask->intoString(oss);
1804     oss << " ? ";
1805     m_left->intoString(oss);
1806     oss << " : ";
1807     m_right->intoString(oss);
1808     oss << ')';
1809     break;
1810 jfenwick 1886 default:
1811 caltinay 5972 oss << "UNKNOWN";
1812 jfenwick 1886 }
1813     }
1814    
1815 jfenwick 2737
1816     void
1817     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1818     {
1819     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1820     switch (getOpgroup(m_op))
1821     {
1822     case G_IDENTITY:
1823 caltinay 5972 if (m_id->isExpanded())
1824     {
1825     oss << "E";
1826     }
1827     else if (m_id->isTagged())
1828     {
1829     oss << "T";
1830     }
1831     else if (m_id->isConstant())
1832     {
1833     oss << "C";
1834     }
1835     else
1836     {
1837     oss << "?";
1838     }
1839     oss << '@' << m_id.get() << endl;
1840     break;
1841 jfenwick 2737 case G_BINARY:
1842 caltinay 5972 oss << opToString(m_op) << endl;
1843     indent+='.';
1844     m_left->intoTreeString(oss, indent);
1845     m_right->intoTreeString(oss, indent);
1846     break;
1847 jfenwick 2737 case G_UNARY:
1848     case G_UNARY_P:
1849     case G_NP1OUT:
1850     case G_NP1OUT_P:
1851     case G_REDUCTION:
1852 caltinay 5972 oss << opToString(m_op) << endl;
1853     indent+='.';
1854     m_left->intoTreeString(oss, indent);
1855     break;
1856 jfenwick 2737 case G_TENSORPROD:
1857 caltinay 5972 oss << opToString(m_op) << endl;
1858     indent+='.';
1859     m_left->intoTreeString(oss, indent);
1860     m_right->intoTreeString(oss, indent);
1861     break;
1862 jfenwick 2737 case G_NP1OUT_2P:
1863 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1864     indent+='.';
1865     m_left->intoTreeString(oss, indent);
1866     break;
1867 jfenwick 2737 default:
1868 caltinay 5972 oss << "UNKNOWN";
1869 jfenwick 2737 }
1870     }
1871    
1872    
1873 jfenwick 1865 DataAbstract*
1874 jfenwick 5938 DataLazy::deepCopy() const
1875 jfenwick 1865 {
1876 jfenwick 1935 switch (getOpgroup(m_op))
1877 jfenwick 1865 {
1878 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1879 caltinay 5972 case G_UNARY:
1880 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1881 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1882     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1883 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1884     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1885 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1886     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1887 jfenwick 1935 default:
1888 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1889 jfenwick 1865 }
1890     }
1891    
1892    
1893 jfenwick 2721
1894 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1895     // Instances of DataReady can look at the size of their vectors.
1896     // For lazy though, it could be the size the data would be if it were resolved;
1897     // or it could be some function of the lengths of the DataReady instances which
1898     // form part of the expression.
1899     // Rather than have people making assumptions, I have disabled the method.
1900 jfenwick 5938 DataTypes::RealVectorType::size_type
1901 jfenwick 1865 DataLazy::getLength() const
1902     {
1903 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1904 jfenwick 1865 }
1905    
1906    
1907     DataAbstract*
1908     DataLazy::getSlice(const DataTypes::RegionType& region) const
1909     {
1910 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1911 jfenwick 1865 }
1912    
1913 jfenwick 1935
1914     // To do this we need to rely on our child nodes
1915 jfenwick 5938 DataTypes::RealVectorType::size_type
1916 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
1917 jfenwick 1935 int dataPointNo)
1918     {
1919     if (m_op==IDENTITY)
1920     {
1921 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
1922 jfenwick 1935 }
1923     if (m_readytype!='E')
1924     {
1925 caltinay 5972 collapse();
1926     return m_id->getPointOffset(sampleNo,dataPointNo);
1927 jfenwick 1935 }
1928     // at this point we do not have an identity node and the expression will be Expanded
1929     // so we only need to know which child to ask
1930     if (m_left->m_readytype=='E')
1931     {
1932 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
1933 jfenwick 1935 }
1934     else
1935     {
1936 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
1937 jfenwick 1935 }
1938     }
1939    
1940     // To do this we need to rely on our child nodes
1941 jfenwick 5938 DataTypes::RealVectorType::size_type
1942 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
1943 jfenwick 1865 int dataPointNo) const
1944     {
1945 jfenwick 1935 if (m_op==IDENTITY)
1946     {
1947 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
1948 jfenwick 1935 }
1949     if (m_readytype=='E')
1950     {
1951     // at this point we do not have an identity node and the expression will be Expanded
1952     // so we only need to know which child to ask
1953     if (m_left->m_readytype=='E')
1954     {
1955 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
1956 jfenwick 1935 }
1957     else
1958     {
1959 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
1960 jfenwick 1935 }
1961     }
1962     if (m_readytype=='C')
1963     {
1964 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1965 jfenwick 1935 }
1966     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1967 jfenwick 1865 }
1968    
1969 jfenwick 2271
1970     // I have decided to let Data:: handle this issue.
1971 jfenwick 1901 void
1972     DataLazy::setToZero()
1973     {
1974 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
1975 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1976     // m_op=IDENTITY;
1977     // m_right.reset();
1978     // m_left.reset();
1979     // m_readytype='C';
1980     // m_buffsRequired=1;
1981    
1982 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
1983 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1984 jfenwick 1901 }
1985    
1986 jfenwick 2271 bool
1987     DataLazy::actsExpanded() const
1988     {
1989 caltinay 5972 return (m_readytype=='E');
1990 jfenwick 2271 }
1991    
1992 caltinay 5997 } // end namespace
1993 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26