/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6512 - (hide annotations)
Fri Mar 3 02:01:57 2017 UTC (2 years ago) by jfenwick
File size: 62825 byte(s)
Experiments on starting complex lazy

1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
26 jfenwick 2199
27 jfenwick 5938 using namespace escript::DataTypes;
28    
29 caltinay 5972 #define NO_ARG
30    
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 caltinay 6174 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42 jfenwick 2177
43 caltinay 6174 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44 jfenwick 2472
45 jfenwick 2501
46 caltinay 6174 #define SIZELIMIT \
47     if (m_height > escript::escriptParams.getTooManyLevels()) {\
48     if (escript::escriptParams.getLazyVerbose()) {\
49     cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50     }\
51     resolveToIdentity();\
52     }
53 jfenwick 2795
54 jfenwick 1899 /*
55     How does DataLazy work?
56     ~~~~~~~~~~~~~~~~~~~~~~~
57    
58     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59     denoted left and right.
60    
61     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62     This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65 caltinay 4286 Note that IDENTITY is not considered a unary operation.
66 jfenwick 1899
67     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68     It must however form a DAG (directed acyclic graph).
69     I will refer to individual DataLazy objects with the structure as nodes.
70    
71     Each node also stores:
72     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73 caltinay 5972 evaluated.
74 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75 caltinay 5972 evaluate the expression.
76 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
77    
78     When a new node is created, the above values are computed based on the values in the child nodes.
79     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81     The resolve method, which produces a DataReady from a DataLazy, does the following:
82     1) Create a DataReady to hold the new result.
83     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88     resolveSample returns a Vector* and an offset within that vector where the result is stored.
89     Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95 jfenwick 2037
96 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97 jfenwick 2037 1) Add to the ES_optype.
98     2) determine what opgroup your operation belongs to (X)
99     3) add a string for the op to the end of ES_opstrings
100     4) increase ES_opcount
101     5) add an entry (X) to opgroups
102     6) add an entry to the switch in collapseToReady
103     7) add an entry to resolveX
104 jfenwick 1899 */
105    
106    
107 jfenwick 1865 using namespace std;
108 jfenwick 1868 using namespace boost;
109 jfenwick 1865
110     namespace escript
111     {
112    
113     namespace
114     {
115    
116 jfenwick 2777
117     // enabling this will print out when ever the maximum stacksize used by resolve increases
118     // it assumes _OPENMP is also in use
119     //#define LAZY_STACK_PROF
120    
121    
122    
123     #ifndef _OPENMP
124     #ifdef LAZY_STACK_PROF
125     #undef LAZY_STACK_PROF
126     #endif
127     #endif
128    
129    
130     #ifdef LAZY_STACK_PROF
131     std::vector<void*> stackstart(getNumberOfThreads());
132     std::vector<void*> stackend(getNumberOfThreads());
133     size_t maxstackuse=0;
134     #endif
135    
136 jfenwick 1886
137 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
138     FunctionSpace
139     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140     {
141 caltinay 5972 // perhaps this should call interpolate and throw or something?
142     // maybe we need an interpolate node -
143     // that way, if interpolate is required in any other op we can just throw a
144     // programming error exception.
145 jfenwick 1879
146 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
147     FunctionSpace r=right->getFunctionSpace();
148     if (l!=r)
149     {
150 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
151     if (res==1)
152 jfenwick 1943 {
153 caltinay 5972 return l;
154 jfenwick 1943 }
155 jfenwick 4264 if (res==-1)
156 jfenwick 1943 {
157 caltinay 5972 return r;
158 jfenwick 1943 }
159     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
160     }
161     return l;
162 jfenwick 1865 }
163    
164     // return the shape of the result of "left op right"
165 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
166 jfenwick 1865 DataTypes::ShapeType
167     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
168     {
169 caltinay 5972 if (left->getShape()!=right->getShape())
170     {
171     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
172     {
173     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
174     }
175 jfenwick 2721
176 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
177     {
178     return right->getShape();
179     }
180     if (right->getRank()==0)
181     {
182     return left->getShape();
183     }
184     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
185     }
186     return left->getShape();
187 jfenwick 1865 }
188    
189 jfenwick 2084 // return the shape for "op left"
190    
191     DataTypes::ShapeType
192 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
193 jfenwick 2084 {
194 caltinay 5972 switch(op)
195     {
196     case TRANS:
197     { // for the scoping of variables
198     const DataTypes::ShapeType& s=left->getShape();
199     DataTypes::ShapeType sh;
200     int rank=left->getRank();
201     if (axis_offset<0 || axis_offset>rank)
202     {
203 caltinay 2779 stringstream e;
204     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
205     throw DataException(e.str());
206 caltinay 5972 }
207     for (int i=0; i<rank; i++)
208     {
209     int index = (axis_offset+i)%rank;
210     sh.push_back(s[index]); // Append to new shape
211     }
212     return sh;
213     }
214     break;
215     case TRACE:
216     {
217     int rank=left->getRank();
218     if (rank<2)
219     {
220     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
221     }
222     if ((axis_offset>rank-2) || (axis_offset<0))
223     {
224     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
225     }
226     if (rank==2)
227     {
228     return DataTypes::scalarShape;
229     }
230     else if (rank==3)
231     {
232     DataTypes::ShapeType sh;
233     if (axis_offset==0)
234     {
235     sh.push_back(left->getShape()[2]);
236     }
237     else // offset==1
238     {
239     sh.push_back(left->getShape()[0]);
240     }
241     return sh;
242     }
243     else if (rank==4)
244     {
245     DataTypes::ShapeType sh;
246     const DataTypes::ShapeType& s=left->getShape();
247     if (axis_offset==0)
248     {
249     sh.push_back(s[2]);
250     sh.push_back(s[3]);
251     }
252     else if (axis_offset==1)
253     {
254     sh.push_back(s[0]);
255     sh.push_back(s[3]);
256     }
257     else // offset==2
258     {
259     sh.push_back(s[0]);
260     sh.push_back(s[1]);
261     }
262     return sh;
263     }
264     else // unknown rank
265     {
266     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
267     }
268     }
269     break;
270     default:
271     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
272     }
273 jfenwick 2084 }
274    
275 jfenwick 2496 DataTypes::ShapeType
276     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
277     {
278     // This code taken from the Data.cpp swapaxes() method
279     // Some of the checks are probably redundant here
280     int axis0_tmp,axis1_tmp;
281     const DataTypes::ShapeType& s=left->getShape();
282     DataTypes::ShapeType out_shape;
283     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
284     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
285     int rank=left->getRank();
286     if (rank<2) {
287     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
288     }
289     if (axis0<0 || axis0>rank-1) {
290 caltinay 2779 stringstream e;
291     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
292     throw DataException(e.str());
293 jfenwick 2496 }
294     if (axis1<0 || axis1>rank-1) {
295 caltinay 2782 stringstream e;
296 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
297     throw DataException(e.str());
298 jfenwick 2496 }
299     if (axis0 == axis1) {
300     throw DataException("Error - Data::swapaxes: axis indices must be different.");
301     }
302     if (axis0 > axis1) {
303     axis0_tmp=axis1;
304     axis1_tmp=axis0;
305     } else {
306     axis0_tmp=axis0;
307     axis1_tmp=axis1;
308     }
309     for (int i=0; i<rank; i++) {
310     if (i == axis0_tmp) {
311     out_shape.push_back(s[axis1_tmp]);
312     } else if (i == axis1_tmp) {
313     out_shape.push_back(s[axis0_tmp]);
314     } else {
315     out_shape.push_back(s[i]);
316     }
317     }
318     return out_shape;
319     }
320    
321    
322 jfenwick 2066 // determine the output shape for the general tensor product operation
323     // the additional parameters return information required later for the product
324     // the majority of this code is copy pasted from C_General_Tensor_Product
325     DataTypes::ShapeType
326     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
327 jfenwick 1865 {
328 caltinay 5972
329 jfenwick 2066 // Get rank and shape of inputs
330     int rank0 = left->getRank();
331     int rank1 = right->getRank();
332     const DataTypes::ShapeType& shape0 = left->getShape();
333     const DataTypes::ShapeType& shape1 = right->getShape();
334    
335     // Prepare for the loops of the product and verify compatibility of shapes
336     int start0=0, start1=0;
337 caltinay 5972 if (transpose == 0) {}
338     else if (transpose == 1) { start0 = axis_offset; }
339     else if (transpose == 2) { start1 = rank1-axis_offset; }
340     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
341 jfenwick 2066
342 jfenwick 2085 if (rank0<axis_offset)
343     {
344 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
345 jfenwick 2085 }
346 jfenwick 2066
347     // Adjust the shapes for transpose
348 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
349     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
350     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
351     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
352 jfenwick 2066
353     // Prepare for the loops of the product
354     SL=1, SM=1, SR=1;
355 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
356 jfenwick 2066 SL *= tmpShape0[i];
357     }
358 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
359 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
360     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
361     }
362     SM *= tmpShape0[i];
363     }
364 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
365 jfenwick 2066 SR *= tmpShape1[i];
366     }
367    
368     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
369 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
370     { // block to limit the scope of out_index
371 jfenwick 2066 int out_index=0;
372     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
373     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
374     }
375 jfenwick 2086
376     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
377     {
378     ostringstream os;
379     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
380     throw DataException(os.str());
381     }
382    
383 jfenwick 2066 return shape2;
384 jfenwick 1865 }
385    
386 caltinay 5972 } // end anonymous namespace
387 jfenwick 1865
388 jfenwick 2500 void DataLazy::LazyNodeSetup()
389     {
390     #ifdef _OPENMP
391     int numthreads=omp_get_max_threads();
392 jfenwick 6512 m_samples_r.resize(numthreads*m_samplesize);
393 jfenwick 2500 m_sampleids=new int[numthreads];
394     for (int i=0;i<numthreads;++i)
395     {
396     m_sampleids[i]=-1;
397     }
398     #else
399 jfenwick 6512 if (m_iscompl)
400     {
401     m_samples_c.resize(m_samplesize);
402     }
403     else
404     {
405     m_samples_r.resize(m_samplesize);
406     }
407 jfenwick 2500 m_sampleids=new int[1];
408     m_sampleids[0]=-1;
409     #endif // _OPENMP
410     }
411 jfenwick 1865
412 jfenwick 2500
413     // Creates an identity node
414 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
415 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
416     ,m_sampleids(0),
417 jfenwick 6512 m_samples_r(1)
418 jfenwick 1865 {
419 jfenwick 1879 if (p->isLazy())
420     {
421 caltinay 5972 // I don't want identity of Lazy.
422     // Question: Why would that be so bad?
423     // Answer: We assume that the child of ID is something we can call getVector on
424     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
425 jfenwick 1879 }
426     else
427     {
428 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
429     makeIdentity(dr);
430 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
431 jfenwick 1879 }
432 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
433 jfenwick 1865 }
434    
435 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
436 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
437     m_op(op),
438     m_axis_offset(0),
439     m_transpose(0),
440     m_SL(0), m_SM(0), m_SR(0)
441 jfenwick 1886 {
442 jfenwick 2721 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
443 jfenwick 1886 {
444 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
445 jfenwick 1886 }
446 jfenwick 2066
447 jfenwick 1886 DataLazy_ptr lleft;
448     if (!left->isLazy())
449     {
450 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
451 jfenwick 1886 }
452     else
453     {
454 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
455 jfenwick 1886 }
456 jfenwick 1889 m_readytype=lleft->m_readytype;
457 jfenwick 1886 m_left=lleft;
458     m_samplesize=getNumDPPSample()*getNoValues();
459 jfenwick 2177 m_children=m_left->m_children+1;
460     m_height=m_left->m_height+1;
461 jfenwick 2500 LazyNodeSetup();
462 jfenwick 2177 SIZELIMIT
463 jfenwick 1886 }
464    
465    
466 jfenwick 1943 // In this constructor we need to consider interpolation
467 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
468 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
469     m_op(op),
470     m_SL(0), m_SM(0), m_SR(0)
471 jfenwick 1879 {
472 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
473 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
474 jfenwick 1886 {
475 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
476 jfenwick 1886 }
477 jfenwick 1943
478 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
479 jfenwick 1943 {
480 caltinay 5972 FunctionSpace fs=getFunctionSpace();
481     Data ltemp(left);
482     Data tmp(ltemp,fs);
483     left=tmp.borrowDataPtr();
484 jfenwick 1943 }
485 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
486 jfenwick 1943 {
487 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
488     right=tmp.borrowDataPtr();
489 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
490 jfenwick 1943 }
491     left->operandCheck(*right);
492    
493 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
494 jfenwick 1879 {
495 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
496 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
497 jfenwick 1879 }
498     else
499     {
500 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
501 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
502 jfenwick 1879 }
503     if (right->isLazy())
504     {
505 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
506 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
507 jfenwick 1879 }
508     else
509     {
510 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
511 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
512 jfenwick 1879 }
513 jfenwick 1889 char lt=m_left->m_readytype;
514     char rt=m_right->m_readytype;
515     if (lt=='E' || rt=='E')
516     {
517 caltinay 5972 m_readytype='E';
518 jfenwick 1889 }
519     else if (lt=='T' || rt=='T')
520     {
521 caltinay 5972 m_readytype='T';
522 jfenwick 1889 }
523     else
524     {
525 caltinay 5972 m_readytype='C';
526 jfenwick 1889 }
527 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
528 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
529     m_height=max(m_left->m_height,m_right->m_height)+1;
530 jfenwick 2500 LazyNodeSetup();
531 jfenwick 2177 SIZELIMIT
532 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
533 jfenwick 1879 }
534    
535 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
536 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
537     m_op(op),
538     m_axis_offset(axis_offset),
539     m_transpose(transpose)
540 jfenwick 2066 {
541     if ((getOpgroup(op)!=G_TENSORPROD))
542     {
543 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
544 jfenwick 2066 }
545     if ((transpose>2) || (transpose<0))
546     {
547 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
548 jfenwick 2066 }
549 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
550 jfenwick 2066 {
551 caltinay 5972 FunctionSpace fs=getFunctionSpace();
552     Data ltemp(left);
553     Data tmp(ltemp,fs);
554     left=tmp.borrowDataPtr();
555 jfenwick 2066 }
556 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
557 jfenwick 2066 {
558 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
559     right=tmp.borrowDataPtr();
560 jfenwick 2066 }
561 jfenwick 2195 // left->operandCheck(*right);
562 jfenwick 1879
563 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
564 jfenwick 2066 {
565 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
566 jfenwick 2066 }
567     else
568     {
569 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
570 jfenwick 2066 }
571     if (right->isLazy())
572     {
573 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
574 jfenwick 2066 }
575     else
576     {
577 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
578 jfenwick 2066 }
579     char lt=m_left->m_readytype;
580     char rt=m_right->m_readytype;
581     if (lt=='E' || rt=='E')
582     {
583 caltinay 5972 m_readytype='E';
584 jfenwick 2066 }
585     else if (lt=='T' || rt=='T')
586     {
587 caltinay 5972 m_readytype='T';
588 jfenwick 2066 }
589     else
590     {
591 caltinay 5972 m_readytype='C';
592 jfenwick 2066 }
593     m_samplesize=getNumDPPSample()*getNoValues();
594 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
595     m_height=max(m_left->m_height,m_right->m_height)+1;
596 jfenwick 2500 LazyNodeSetup();
597 jfenwick 2177 SIZELIMIT
598 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
599 jfenwick 2066 }
600    
601    
602 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
603 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
604     m_op(op),
605     m_axis_offset(axis_offset),
606     m_transpose(0),
607     m_tol(0)
608 jfenwick 2084 {
609     if ((getOpgroup(op)!=G_NP1OUT_P))
610     {
611 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
612 jfenwick 2084 }
613     DataLazy_ptr lleft;
614     if (!left->isLazy())
615     {
616 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
617 jfenwick 2084 }
618     else
619     {
620 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
621 jfenwick 2084 }
622     m_readytype=lleft->m_readytype;
623     m_left=lleft;
624     m_samplesize=getNumDPPSample()*getNoValues();
625 jfenwick 2177 m_children=m_left->m_children+1;
626     m_height=m_left->m_height+1;
627 jfenwick 2500 LazyNodeSetup();
628 jfenwick 2177 SIZELIMIT
629 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
630 jfenwick 2084 }
631    
632 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
633 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
634     m_op(op),
635     m_axis_offset(0),
636     m_transpose(0),
637     m_tol(tol)
638 jfenwick 2147 {
639     if ((getOpgroup(op)!=G_UNARY_P))
640     {
641 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
642 jfenwick 2147 }
643     DataLazy_ptr lleft;
644     if (!left->isLazy())
645     {
646 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
647 jfenwick 2147 }
648     else
649     {
650 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
651 jfenwick 2147 }
652     m_readytype=lleft->m_readytype;
653     m_left=lleft;
654     m_samplesize=getNumDPPSample()*getNoValues();
655 jfenwick 2177 m_children=m_left->m_children+1;
656     m_height=m_left->m_height+1;
657 jfenwick 2500 LazyNodeSetup();
658 jfenwick 2177 SIZELIMIT
659 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
660     }
661 jfenwick 2084
662 jfenwick 2496
663     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
664 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
665     m_op(op),
666     m_axis_offset(axis0),
667     m_transpose(axis1),
668     m_tol(0)
669 jfenwick 2496 {
670     if ((getOpgroup(op)!=G_NP1OUT_2P))
671     {
672 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
673 jfenwick 2496 }
674     DataLazy_ptr lleft;
675     if (!left->isLazy())
676     {
677 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
678 jfenwick 2496 }
679     else
680     {
681 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
682 jfenwick 2496 }
683     m_readytype=lleft->m_readytype;
684     m_left=lleft;
685     m_samplesize=getNumDPPSample()*getNoValues();
686     m_children=m_left->m_children+1;
687     m_height=m_left->m_height+1;
688 jfenwick 2500 LazyNodeSetup();
689 jfenwick 2496 SIZELIMIT
690     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
691     }
692    
693 jfenwick 3035
694     namespace
695     {
696    
697     inline int max3(int a, int b, int c)
698     {
699 caltinay 5972 int t=(a>b?a:b);
700     return (t>c?t:c);
701 jfenwick 3035
702     }
703     }
704    
705     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
706 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
707     m_op(CONDEVAL),
708     m_axis_offset(0),
709     m_transpose(0),
710     m_tol(0)
711 jfenwick 3035 {
712    
713     DataLazy_ptr lmask;
714     DataLazy_ptr lleft;
715     DataLazy_ptr lright;
716     if (!mask->isLazy())
717     {
718 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
719 jfenwick 3035 }
720     else
721     {
722 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
723 jfenwick 3035 }
724     if (!left->isLazy())
725     {
726 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
727 jfenwick 3035 }
728     else
729     {
730 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
731 jfenwick 3035 }
732     if (!right->isLazy())
733     {
734 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
735 jfenwick 3035 }
736     else
737     {
738 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
739 jfenwick 3035 }
740     m_readytype=lmask->m_readytype;
741     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
742     {
743 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
744 jfenwick 3035 }
745     m_left=lleft;
746     m_right=lright;
747     m_mask=lmask;
748     m_samplesize=getNumDPPSample()*getNoValues();
749     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
750     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
751     LazyNodeSetup();
752     SIZELIMIT
753     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
754     }
755    
756    
757    
758 jfenwick 1865 DataLazy::~DataLazy()
759     {
760 jfenwick 2500 delete[] m_sampleids;
761 jfenwick 1865 }
762    
763 jfenwick 1879
764 jfenwick 1899 /*
765     \brief Evaluates the expression using methods on Data.
766     This does the work for the collapse method.
767     For reasons of efficiency do not call this method on DataExpanded nodes.
768     */
769 jfenwick 1889 DataReady_ptr
770 jfenwick 4621 DataLazy::collapseToReady() const
771 jfenwick 1889 {
772     if (m_readytype=='E')
773 caltinay 5972 { // this is more an efficiency concern than anything else
774 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
775     }
776     if (m_op==IDENTITY)
777     {
778     return m_id;
779     }
780     DataReady_ptr pleft=m_left->collapseToReady();
781     Data left(pleft);
782     Data right;
783 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
784 jfenwick 1889 {
785     right=Data(m_right->collapseToReady());
786     }
787     Data result;
788     switch(m_op)
789     {
790     case ADD:
791 caltinay 5972 result=left+right;
792     break;
793     case SUB:
794     result=left-right;
795     break;
796     case MUL:
797     result=left*right;
798     break;
799     case DIV:
800     result=left/right;
801     break;
802 jfenwick 1889 case SIN:
803 caltinay 5972 result=left.sin();
804     break;
805 jfenwick 1889 case COS:
806 caltinay 5972 result=left.cos();
807     break;
808 jfenwick 1889 case TAN:
809 caltinay 5972 result=left.tan();
810     break;
811 jfenwick 1889 case ASIN:
812 caltinay 5972 result=left.asin();
813     break;
814 jfenwick 1889 case ACOS:
815 caltinay 5972 result=left.acos();
816     break;
817 jfenwick 1889 case ATAN:
818 caltinay 5972 result=left.atan();
819     break;
820 jfenwick 1889 case SINH:
821 caltinay 5972 result=left.sinh();
822     break;
823 jfenwick 1889 case COSH:
824 caltinay 5972 result=left.cosh();
825     break;
826 jfenwick 1889 case TANH:
827 caltinay 5972 result=left.tanh();
828     break;
829 jfenwick 1889 case ERF:
830 caltinay 5972 result=left.erf();
831     break;
832 jfenwick 1889 case ASINH:
833 caltinay 5972 result=left.asinh();
834     break;
835 jfenwick 1889 case ACOSH:
836 caltinay 5972 result=left.acosh();
837     break;
838 jfenwick 1889 case ATANH:
839 caltinay 5972 result=left.atanh();
840     break;
841 jfenwick 1889 case LOG10:
842 caltinay 5972 result=left.log10();
843     break;
844 jfenwick 1889 case LOG:
845 caltinay 5972 result=left.log();
846     break;
847 jfenwick 1889 case SIGN:
848 caltinay 5972 result=left.sign();
849     break;
850 jfenwick 1889 case ABS:
851 caltinay 5972 result=left.abs();
852     break;
853 jfenwick 1889 case NEG:
854 caltinay 5972 result=left.neg();
855     break;
856 jfenwick 1889 case POS:
857 caltinay 5972 // it doesn't mean anything for delayed.
858     // it will just trigger a deep copy of the lazy object
859     throw DataException("Programmer error - POS not supported for lazy data.");
860     break;
861 jfenwick 1889 case EXP:
862 caltinay 5972 result=left.exp();
863     break;
864 jfenwick 1889 case SQRT:
865 caltinay 5972 result=left.sqrt();
866     break;
867 jfenwick 1889 case RECIP:
868 caltinay 5972 result=left.oneOver();
869     break;
870 jfenwick 1889 case GZ:
871 caltinay 5972 result=left.wherePositive();
872     break;
873 jfenwick 1889 case LZ:
874 caltinay 5972 result=left.whereNegative();
875     break;
876 jfenwick 1889 case GEZ:
877 caltinay 5972 result=left.whereNonNegative();
878     break;
879 jfenwick 1889 case LEZ:
880 caltinay 5972 result=left.whereNonPositive();
881     break;
882 jfenwick 2147 case NEZ:
883 caltinay 5972 result=left.whereNonZero(m_tol);
884     break;
885 jfenwick 2147 case EZ:
886 caltinay 5972 result=left.whereZero(m_tol);
887     break;
888 jfenwick 2037 case SYM:
889 caltinay 5972 result=left.symmetric();
890     break;
891 jfenwick 2037 case NSYM:
892 jfenwick 6110 result=left.antisymmetric();
893 caltinay 5972 break;
894 jfenwick 2066 case PROD:
895 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
896     break;
897 jfenwick 2084 case TRANS:
898 caltinay 5972 result=left.transpose(m_axis_offset);
899     break;
900 jfenwick 2084 case TRACE:
901 caltinay 5972 result=left.trace(m_axis_offset);
902     break;
903 jfenwick 2496 case SWAP:
904 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
905     break;
906 jfenwick 2721 case MINVAL:
907 caltinay 5972 result=left.minval();
908     break;
909 jfenwick 2721 case MAXVAL:
910 caltinay 5972 result=left.minval();
911     break;
912 jfenwick 6125 case HER:
913     result=left.hermitian();
914     break;
915 jfenwick 1889 default:
916 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
917 jfenwick 1889 }
918     return result.borrowReadyPtr();
919     }
920    
921 jfenwick 1899 /*
922     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
923     This method uses the original methods on the Data class to evaluate the expressions.
924     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
925     the purpose of using DataLazy in the first place).
926     */
927 jfenwick 1889 void
928 jfenwick 4621 DataLazy::collapse() const
929 jfenwick 1889 {
930     if (m_op==IDENTITY)
931     {
932 caltinay 5972 return;
933 jfenwick 1889 }
934     if (m_readytype=='E')
935 caltinay 5972 { // this is more an efficiency concern than anything else
936 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
937     }
938     m_id=collapseToReady();
939     m_op=IDENTITY;
940     }
941    
942 jfenwick 2500 // The result will be stored in m_samples
943     // The return value is a pointer to the DataVector, offset is the offset within the return value
944 jfenwick 5938 const DataTypes::RealVectorType*
945 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
946 jfenwick 2500 {
947     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
948 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
949 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
950     {
951 caltinay 5972 collapse();
952 jfenwick 2500 }
953 caltinay 5972 if (m_op==IDENTITY)
954 jfenwick 2500 {
955 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
956 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
957 jfenwick 2777 #ifdef LAZY_STACK_PROF
958     int x;
959     if (&x<stackend[omp_get_thread_num()])
960     {
961     stackend[omp_get_thread_num()]=&x;
962     }
963     #endif
964 jfenwick 2500 return &(vec);
965     }
966     if (m_readytype!='E')
967     {
968     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
969     }
970 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
971     {
972 caltinay 5972 roffset=tid*m_samplesize;
973 jfenwick 6512 return &(m_samples_r); // sample is already resolved
974 jfenwick 2501 }
975     m_sampleids[tid]=sampleNo;
976 jfenwick 3035
977 jfenwick 2500 switch (getOpgroup(m_op))
978     {
979     case G_UNARY:
980     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
981     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
982     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
983     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
988 jfenwick 2500 default:
989     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
990     }
991     }
992    
993 jfenwick 5938 const DataTypes::RealVectorType*
994 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
995 jfenwick 2500 {
996 caltinay 5972 // we assume that any collapsing has been done before we get here
997     // since we only have one argument we don't need to think about only
998     // processing single points.
999     // we will also know we won't get identity nodes
1000 jfenwick 2500 if (m_readytype!='E')
1001     {
1002     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1003     }
1004     if (m_op==IDENTITY)
1005     {
1006     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1007     }
1008 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1009 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1010     roffset=m_samplesize*tid;
1011 jfenwick 6512 double* result=&(m_samples_r[roffset]);
1012 jfenwick 6168 if (m_op==POS)
1013 jfenwick 2500 {
1014 jfenwick 6168 // this should be prevented earlier
1015     // operation is meaningless for lazy
1016     throw DataException("Programmer error - POS not supported for lazy data.");
1017 jfenwick 2500 }
1018 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1019     left,
1020     result,
1021 jfenwick 6168 m_op,
1022 jfenwick 6066 m_tol);
1023 jfenwick 6512 return &(m_samples_r);
1024 jfenwick 2500 }
1025    
1026    
1027 jfenwick 5938 const DataTypes::RealVectorType*
1028 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1029 jfenwick 2721 {
1030 caltinay 5972 // we assume that any collapsing has been done before we get here
1031     // since we only have one argument we don't need to think about only
1032     // processing single points.
1033     // we will also know we won't get identity nodes
1034 jfenwick 2721 if (m_readytype!='E')
1035     {
1036     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1037     }
1038     if (m_op==IDENTITY)
1039     {
1040     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1041     }
1042     size_t loffset=0;
1043 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1044 jfenwick 2721
1045     roffset=m_samplesize*tid;
1046 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1047 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1048 jfenwick 6512 double* result=&(m_samples_r[roffset]);
1049 jfenwick 2721 switch (m_op)
1050     {
1051     case MINVAL:
1052 caltinay 5972 {
1053     for (unsigned int z=0;z<ndpps;++z)
1054     {
1055     FMin op;
1056 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1057 caltinay 5972 loffset+=psize;
1058     result++;
1059     }
1060     }
1061     break;
1062 jfenwick 2721 case MAXVAL:
1063 caltinay 5972 {
1064     for (unsigned int z=0;z<ndpps;++z)
1065     {
1066     FMax op;
1067 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1068 caltinay 5972 loffset+=psize;
1069     result++;
1070     }
1071     }
1072     break;
1073 jfenwick 2721 default:
1074 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1075 jfenwick 2721 }
1076 jfenwick 6512 return &(m_samples_r);
1077 jfenwick 2721 }
1078    
1079 jfenwick 5938 const DataTypes::RealVectorType*
1080 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1081 jfenwick 2500 {
1082 caltinay 5972 // we assume that any collapsing has been done before we get here
1083     // since we only have one argument we don't need to think about only
1084     // processing single points.
1085 jfenwick 2500 if (m_readytype!='E')
1086     {
1087     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1088     }
1089     if (m_op==IDENTITY)
1090     {
1091     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1092     }
1093     size_t subroffset;
1094 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1095 jfenwick 2500 roffset=m_samplesize*tid;
1096     size_t loop=0;
1097     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1098     size_t step=getNoValues();
1099     size_t offset=roffset;
1100     switch (m_op)
1101     {
1102     case SYM:
1103 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1104     {
1105 jfenwick 6512 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1106 caltinay 5972 subroffset+=step;
1107     offset+=step;
1108     }
1109     break;
1110 jfenwick 2500 case NSYM:
1111 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1112     {
1113 jfenwick 6512 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1114 caltinay 5972 subroffset+=step;
1115     offset+=step;
1116     }
1117     break;
1118 jfenwick 2500 default:
1119 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1120 jfenwick 2500 }
1121 jfenwick 6512 return &m_samples_r;
1122 jfenwick 2500 }
1123    
1124 jfenwick 5938 const DataTypes::RealVectorType*
1125 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1126 jfenwick 2500 {
1127 caltinay 5972 // we assume that any collapsing has been done before we get here
1128     // since we only have one argument we don't need to think about only
1129     // processing single points.
1130 jfenwick 2500 if (m_readytype!='E')
1131     {
1132     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1133     }
1134     if (m_op==IDENTITY)
1135     {
1136     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1137     }
1138     size_t subroffset;
1139     size_t offset;
1140 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1141 jfenwick 2500 roffset=m_samplesize*tid;
1142     offset=roffset;
1143     size_t loop=0;
1144     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1145     size_t outstep=getNoValues();
1146     size_t instep=m_left->getNoValues();
1147     switch (m_op)
1148     {
1149     case TRACE:
1150 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1151     {
1152 jfenwick 6512 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_r ,getShape(),offset,m_axis_offset);
1153 caltinay 5972 subroffset+=instep;
1154     offset+=outstep;
1155     }
1156     break;
1157 jfenwick 2500 case TRANS:
1158 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1159     {
1160 jfenwick 6512 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset,m_axis_offset);
1161 caltinay 5972 subroffset+=instep;
1162     offset+=outstep;
1163     }
1164     break;
1165 jfenwick 2500 default:
1166 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1167 jfenwick 2500 }
1168 jfenwick 6512 return &m_samples_r;
1169 jfenwick 2500 }
1170    
1171    
1172 jfenwick 5938 const DataTypes::RealVectorType*
1173 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1174 jfenwick 2500 {
1175     if (m_readytype!='E')
1176     {
1177     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1178     }
1179     if (m_op==IDENTITY)
1180     {
1181     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1182     }
1183     size_t subroffset;
1184     size_t offset;
1185 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1186 jfenwick 2500 roffset=m_samplesize*tid;
1187     offset=roffset;
1188     size_t loop=0;
1189     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190     size_t outstep=getNoValues();
1191     size_t instep=m_left->getNoValues();
1192     switch (m_op)
1193     {
1194     case SWAP:
1195 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1196     {
1197 jfenwick 6512 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset, m_axis_offset, m_transpose);
1198 caltinay 5972 subroffset+=instep;
1199     offset+=outstep;
1200     }
1201     break;
1202 jfenwick 2500 default:
1203 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1204 jfenwick 2500 }
1205 jfenwick 6512 return &m_samples_r;
1206 jfenwick 2500 }
1207    
1208 jfenwick 5938 const DataTypes::RealVectorType*
1209 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1210 jfenwick 3035 {
1211     if (m_readytype!='E')
1212     {
1213     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1214     }
1215     if (m_op!=CONDEVAL)
1216     {
1217     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1218     }
1219     size_t subroffset;
1220 jfenwick 2500
1221 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1222     const RealVectorType* srcres=0;
1223 jfenwick 3035 if ((*maskres)[subroffset]>0)
1224     {
1225 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1226 jfenwick 3035 }
1227     else
1228     {
1229 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1230 jfenwick 3035 }
1231    
1232     // Now we need to copy the result
1233    
1234     roffset=m_samplesize*tid;
1235     for (int i=0;i<m_samplesize;++i)
1236     {
1237 jfenwick 6512 m_samples_r[roffset+i]=(*srcres)[subroffset+i];
1238 jfenwick 3035 }
1239    
1240 jfenwick 6512 return &m_samples_r;
1241 jfenwick 3035 }
1242    
1243 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1244     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1245     // If both children are expanded, then we can process them in a single operation (we treat
1246     // the whole sample as one big datapoint.
1247     // If one of the children is not expanded, then we need to treat each point in the sample
1248     // individually.
1249     // There is an additional complication when scalar operations are considered.
1250     // For example, 2+Vector.
1251     // In this case each double within the point is treated individually
1252 jfenwick 5938 const DataTypes::RealVectorType*
1253 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1254 jfenwick 2500 {
1255     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1256    
1257 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1258     // first work out which of the children are expanded
1259 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1260     bool rightExp=(m_right->m_readytype=='E');
1261     if (!leftExp && !rightExp)
1262     {
1263 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1264 jfenwick 2500 }
1265     bool leftScalar=(m_left->getRank()==0);
1266     bool rightScalar=(m_right->getRank()==0);
1267     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1268     {
1269 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1270 jfenwick 2500 }
1271     size_t leftsize=m_left->getNoValues();
1272     size_t rightsize=m_right->getNoValues();
1273 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1274     int leftstep=0; // how far should the left offset advance after each step
1275 jfenwick 2500 int rightstep=0;
1276 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1277     int oleftstep=0; // the o variables refer to the outer loop
1278     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1279 jfenwick 2500 int onumsteps=1;
1280    
1281 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1282 jfenwick 2500 bool RES=(rightExp && rightScalar);
1283 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1284 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1285 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1286 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1287 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1288 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1289    
1290 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1291 jfenwick 2500 {
1292 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1293     leftstep=0;
1294     rightstep=0;
1295     numsteps=1;
1296 jfenwick 2500 }
1297     else if (LES || RES)
1298     {
1299 caltinay 5972 chunksize=1;
1300     if (LES) // left is an expanded scalar
1301     {
1302     if (RS)
1303     {
1304     leftstep=1;
1305     rightstep=0;
1306     numsteps=m_left->getNumDPPSample();
1307     }
1308     else // RN or REN
1309     {
1310     leftstep=0;
1311     oleftstep=1;
1312     rightstep=1;
1313     orightstep=(RN ? -(int)rightsize : 0);
1314     numsteps=rightsize;
1315     onumsteps=m_left->getNumDPPSample();
1316     }
1317     }
1318     else // right is an expanded scalar
1319     {
1320     if (LS)
1321     {
1322     rightstep=1;
1323     leftstep=0;
1324     numsteps=m_right->getNumDPPSample();
1325     }
1326     else
1327     {
1328     rightstep=0;
1329     orightstep=1;
1330     leftstep=1;
1331     oleftstep=(LN ? -(int)leftsize : 0);
1332     numsteps=leftsize;
1333     onumsteps=m_right->getNumDPPSample();
1334     }
1335     }
1336 jfenwick 2500 }
1337 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1338 jfenwick 2500 {
1339 caltinay 5972 if (LEN) // and Right will be a single value
1340     {
1341     chunksize=rightsize;
1342     leftstep=rightsize;
1343     rightstep=0;
1344     numsteps=m_left->getNumDPPSample();
1345     if (RS)
1346     {
1347     numsteps*=leftsize;
1348     }
1349     }
1350     else // REN
1351     {
1352     chunksize=leftsize;
1353     rightstep=leftsize;
1354     leftstep=0;
1355     numsteps=m_right->getNumDPPSample();
1356     if (LS)
1357     {
1358     numsteps*=rightsize;
1359     }
1360     }
1361 jfenwick 2500 }
1362    
1363 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1364     // Get the values of sub-expressions
1365 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1366     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1367 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1368     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1369     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1370     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1371     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1372     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1373     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1374    
1375     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1376     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1377    
1378    
1379     roffset=m_samplesize*tid;
1380 jfenwick 6512 double* resultp=&(m_samples_r[roffset]); // results are stored at the vector offset we received
1381 jfenwick 2500 switch(m_op)
1382     {
1383     case ADD:
1384 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1385 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1386 jfenwick 6042 &(*left)[0],
1387     &(*right)[0],
1388     chunksize,
1389     onumsteps,
1390     numsteps,
1391     resultStep,
1392     leftstep,
1393     rightstep,
1394     oleftstep,
1395     orightstep,
1396 jfenwick 6056 lroffset,
1397     rroffset,
1398 jfenwick 6168 escript::ES_optype::ADD);
1399 caltinay 5972 break;
1400 jfenwick 2500 case SUB:
1401 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1402 jfenwick 6057 &(*left)[0],
1403     &(*right)[0],
1404     chunksize,
1405     onumsteps,
1406     numsteps,
1407     resultStep,
1408     leftstep,
1409     rightstep,
1410     oleftstep,
1411     orightstep,
1412     lroffset,
1413     rroffset,
1414 jfenwick 6168 escript::ES_optype::SUB);
1415 jfenwick 6057 //PROC_OP(NO_ARG,minus<double>());
1416 caltinay 5972 break;
1417 jfenwick 2500 case MUL:
1418 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1419 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1420 jfenwick 6057 &(*left)[0],
1421     &(*right)[0],
1422     chunksize,
1423     onumsteps,
1424     numsteps,
1425     resultStep,
1426     leftstep,
1427     rightstep,
1428     oleftstep,
1429     orightstep,
1430     lroffset,
1431     rroffset,
1432 jfenwick 6168 escript::ES_optype::MUL);
1433 caltinay 5972 break;
1434 jfenwick 2500 case DIV:
1435 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1436 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1437 jfenwick 6057 &(*left)[0],
1438     &(*right)[0],
1439     chunksize,
1440     onumsteps,
1441     numsteps,
1442     resultStep,
1443     leftstep,
1444     rightstep,
1445     oleftstep,
1446     orightstep,
1447     lroffset,
1448     rroffset,
1449 jfenwick 6168 escript::ES_optype::DIV);
1450 caltinay 5972 break;
1451 jfenwick 2500 case POW:
1452 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1453 jfenwick 6083 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1454 jfenwick 6057 &(*left)[0],
1455     &(*right)[0],
1456     chunksize,
1457     onumsteps,
1458     numsteps,
1459     resultStep,
1460     leftstep,
1461     rightstep,
1462     oleftstep,
1463     orightstep,
1464     lroffset,
1465     rroffset,
1466 jfenwick 6168 escript::ES_optype::POW);
1467 caltinay 5972 break;
1468 jfenwick 2500 default:
1469 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1470 jfenwick 2500 }
1471 jfenwick 6512 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_r[roffset] << endl;)
1472     return &m_samples_r;
1473 jfenwick 2500 }
1474    
1475    
1476     // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1477     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1478     // unlike the other resolve helpers, we must treat these datapoints separately.
1479 jfenwick 5938 const DataTypes::RealVectorType*
1480 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1481 jfenwick 2500 {
1482     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1483    
1484 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1485     // first work out which of the children are expanded
1486 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1487     bool rightExp=(m_right->m_readytype=='E');
1488     int steps=getNumDPPSample();
1489 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1490 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
1491    
1492     int resultStep=getNoValues();
1493     roffset=m_samplesize*tid;
1494     size_t offset=roffset;
1495    
1496 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1497 jfenwick 2500
1498 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1499 jfenwick 2500
1500     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1501     cout << getNoValues() << endl;)
1502    
1503    
1504     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1505     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1506     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1507     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1508     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1509     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1510     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1511    
1512 jfenwick 6512 double* resultp=&(m_samples_r[offset]); // results are stored at the vector offset we received
1513 jfenwick 2500 switch(m_op)
1514     {
1515     case PROD:
1516 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
1517     {
1518     const double *ptr_0 = &((*left)[lroffset]);
1519     const double *ptr_1 = &((*right)[rroffset]);
1520 jfenwick 2500
1521     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1522     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1523    
1524 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1525 jfenwick 2500
1526 caltinay 5972 lroffset+=leftStep;
1527     rroffset+=rightStep;
1528     }
1529     break;
1530 jfenwick 2500 default:
1531 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1532 jfenwick 2500 }
1533     roffset=offset;
1534 jfenwick 6512 return &m_samples_r;
1535 jfenwick 2500 }
1536    
1537 jfenwick 1898
1538 jfenwick 5938 const DataTypes::RealVectorType*
1539 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1540 jfenwick 1879 {
1541 jfenwick 2271 #ifdef _OPENMP
1542 caltinay 5972 int tid=omp_get_thread_num();
1543 jfenwick 2271 #else
1544 caltinay 5972 int tid=0;
1545 jfenwick 2271 #endif
1546 jfenwick 2777
1547     #ifdef LAZY_STACK_PROF
1548 caltinay 5972 stackstart[tid]=&tid;
1549     stackend[tid]=&tid;
1550     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1551     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1552     #pragma omp critical
1553     if (d>maxstackuse)
1554     {
1555 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
1556 caltinay 5972 maxstackuse=d;
1557     }
1558     return r;
1559 jfenwick 2777 #else
1560 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
1561 jfenwick 2777 #endif
1562 jfenwick 2271 }
1563    
1564    
1565 jfenwick 2497 // This needs to do the work of the identity constructor
1566 jfenwick 2177 void
1567     DataLazy::resolveToIdentity()
1568     {
1569     if (m_op==IDENTITY)
1570 caltinay 5972 return;
1571 jfenwick 2500 DataReady_ptr p=resolveNodeWorker();
1572 jfenwick 2177 makeIdentity(p);
1573     }
1574 jfenwick 1889
1575 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
1576     {
1577     m_op=IDENTITY;
1578     m_axis_offset=0;
1579     m_transpose=0;
1580     m_SL=m_SM=m_SR=0;
1581     m_children=m_height=0;
1582     m_id=p;
1583     if(p->isConstant()) {m_readytype='C';}
1584     else if(p->isExpanded()) {m_readytype='E';}
1585     else if (p->isTagged()) {m_readytype='T';}
1586     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1587     m_samplesize=p->getNumDPPSample()*p->getNoValues();
1588     m_left.reset();
1589     m_right.reset();
1590 jfenwick 6512 m_iscompl=p->isComplex();
1591 jfenwick 2177 }
1592    
1593 jfenwick 2497
1594     DataReady_ptr
1595     DataLazy::resolve()
1596     {
1597     resolveToIdentity();
1598     return m_id;
1599     }
1600    
1601 jfenwick 2799
1602     /* This is really a static method but I think that caused problems in windows */
1603     void
1604     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1605     {
1606     if (dats.empty())
1607     {
1608 caltinay 5972 return;
1609 jfenwick 2799 }
1610     vector<DataLazy*> work;
1611     FunctionSpace fs=dats[0]->getFunctionSpace();
1612     bool match=true;
1613 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
1614 jfenwick 2799 {
1615 caltinay 5972 if (dats[i]->m_readytype!='E')
1616     {
1617     dats[i]->collapse();
1618     }
1619     if (dats[i]->m_op!=IDENTITY)
1620     {
1621     work.push_back(dats[i]);
1622     if (fs!=dats[i]->getFunctionSpace())
1623     {
1624     match=false;
1625     }
1626     }
1627 jfenwick 2799 }
1628     if (work.empty())
1629     {
1630 caltinay 5972 return; // no work to do
1631 jfenwick 2799 }
1632 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
1633     { // it is possible that dats[0] is one of the objects which we discarded and
1634     // all the other functionspaces match.
1635     vector<DataExpanded*> dep;
1636 jfenwick 6042 vector<RealVectorType*> vecs;
1637 caltinay 5972 for (int i=0;i<work.size();++i)
1638     {
1639 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1640 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
1641     }
1642     int totalsamples=work[0]->getNumSamples();
1643 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1644 caltinay 5972 int sample;
1645     #pragma omp parallel private(sample, res)
1646     {
1647     size_t roffset=0;
1648     #pragma omp for schedule(static)
1649     for (sample=0;sample<totalsamples;++sample)
1650     {
1651     roffset=0;
1652     int j;
1653     for (j=work.size()-1;j>=0;--j)
1654     {
1655 jfenwick 2799 #ifdef _OPENMP
1656 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1657 jfenwick 2799 #else
1658 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
1659 jfenwick 2799 #endif
1660 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1661     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1662     }
1663     }
1664     }
1665     // Now we need to load the new results as identity ops into the lazy nodes
1666     for (int i=work.size()-1;i>=0;--i)
1667     {
1668 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1669 caltinay 5972 }
1670 jfenwick 2799 }
1671 caltinay 5972 else // functionspaces do not match
1672 jfenwick 2799 {
1673 caltinay 5972 for (int i=0;i<work.size();++i)
1674     {
1675     work[i]->resolveToIdentity();
1676     }
1677 jfenwick 2799 }
1678     }
1679    
1680    
1681    
1682 jfenwick 2500 // This version of resolve uses storage in each node to hold results
1683     DataReady_ptr
1684     DataLazy::resolveNodeWorker()
1685     {
1686 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1687 jfenwick 2500 {
1688     collapse();
1689     }
1690 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1691 jfenwick 2500 {
1692     return m_id;
1693     }
1694 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1695 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1696     RealVectorType& resvec=result->getVectorRW();
1697 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
1698    
1699     int sample;
1700     int totalsamples=getNumSamples();
1701 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
1702 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1703 jfenwick 2777 #pragma omp parallel private(sample,res)
1704 jfenwick 2500 {
1705 caltinay 5972 size_t roffset=0;
1706 jfenwick 2777 #ifdef LAZY_STACK_PROF
1707 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
1708     stackend[omp_get_thread_num()]=&roffset;
1709 jfenwick 2777 #endif
1710 caltinay 5972 #pragma omp for schedule(static)
1711     for (sample=0;sample<totalsamples;++sample)
1712     {
1713     roffset=0;
1714 jfenwick 2500 #ifdef _OPENMP
1715 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1716 jfenwick 2500 #else
1717 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
1718 jfenwick 2500 #endif
1719     LAZYDEBUG(cout << "Sample #" << sample << endl;)
1720     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1721 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1722     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1723     }
1724 jfenwick 2500 }
1725 jfenwick 2777 #ifdef LAZY_STACK_PROF
1726     for (int i=0;i<getNumberOfThreads();++i)
1727     {
1728 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1729     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1730     if (r>maxstackuse)
1731     {
1732     maxstackuse=r;
1733     }
1734 jfenwick 2777 }
1735     cout << "Max resolve Stack use=" << maxstackuse << endl;
1736     #endif
1737 jfenwick 2500 return resptr;
1738     }
1739    
1740 jfenwick 1865 std::string
1741     DataLazy::toString() const
1742     {
1743 jfenwick 1886 ostringstream oss;
1744 jfenwick 2791 oss << "Lazy Data: [depth=" << m_height<< "] ";
1745 caltinay 6174 switch (escriptParams.getLazyStrFmt())
1746 jfenwick 2737 {
1747 caltinay 5972 case 1: // tree format
1748     oss << endl;
1749     intoTreeString(oss,"");
1750     break;
1751     case 2: // just the depth
1752     break;
1753 jfenwick 2795 default:
1754 caltinay 5972 intoString(oss);
1755     break;
1756 jfenwick 2737 }
1757 jfenwick 1886 return oss.str();
1758 jfenwick 1865 }
1759    
1760 jfenwick 1899
1761 jfenwick 1886 void
1762     DataLazy::intoString(ostringstream& oss) const
1763     {
1764 jfenwick 2271 // oss << "[" << m_children <<";"<<m_height <<"]";
1765 jfenwick 1886 switch (getOpgroup(m_op))
1766     {
1767 jfenwick 1889 case G_IDENTITY:
1768 caltinay 5972 if (m_id->isExpanded())
1769     {
1770     oss << "E";
1771     }
1772     else if (m_id->isTagged())
1773     {
1774     oss << "T";
1775     }
1776     else if (m_id->isConstant())
1777     {
1778     oss << "C";
1779     }
1780     else
1781     {
1782     oss << "?";
1783     }
1784     oss << '@' << m_id.get();
1785     break;
1786 jfenwick 1886 case G_BINARY:
1787 caltinay 5972 oss << '(';
1788     m_left->intoString(oss);
1789     oss << ' ' << opToString(m_op) << ' ';
1790     m_right->intoString(oss);
1791     oss << ')';
1792     break;
1793 jfenwick 1886 case G_UNARY:
1794 jfenwick 2147 case G_UNARY_P:
1795 jfenwick 2037 case G_NP1OUT:
1796 jfenwick 2084 case G_NP1OUT_P:
1797 jfenwick 2721 case G_REDUCTION:
1798 caltinay 5972 oss << opToString(m_op) << '(';
1799     m_left->intoString(oss);
1800     oss << ')';
1801     break;
1802 jfenwick 2066 case G_TENSORPROD:
1803 caltinay 5972 oss << opToString(m_op) << '(';
1804     m_left->intoString(oss);
1805     oss << ", ";
1806     m_right->intoString(oss);
1807     oss << ')';
1808     break;
1809 jfenwick 2496 case G_NP1OUT_2P:
1810 caltinay 5972 oss << opToString(m_op) << '(';
1811     m_left->intoString(oss);
1812     oss << ", " << m_axis_offset << ", " << m_transpose;
1813     oss << ')';
1814     break;
1815 jfenwick 3035 case G_CONDEVAL:
1816 caltinay 5972 oss << opToString(m_op)<< '(' ;
1817     m_mask->intoString(oss);
1818     oss << " ? ";
1819     m_left->intoString(oss);
1820     oss << " : ";
1821     m_right->intoString(oss);
1822     oss << ')';
1823     break;
1824 jfenwick 1886 default:
1825 caltinay 5972 oss << "UNKNOWN";
1826 jfenwick 1886 }
1827     }
1828    
1829 jfenwick 2737
1830     void
1831     DataLazy::intoTreeString(ostringstream& oss, string indent) const
1832     {
1833     oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1834     switch (getOpgroup(m_op))
1835     {
1836     case G_IDENTITY:
1837 caltinay 5972 if (m_id->isExpanded())
1838     {
1839     oss << "E";
1840     }
1841     else if (m_id->isTagged())
1842     {
1843     oss << "T";
1844     }
1845     else if (m_id->isConstant())
1846     {
1847     oss << "C";
1848     }
1849     else
1850     {
1851     oss << "?";
1852     }
1853     oss << '@' << m_id.get() << endl;
1854     break;
1855 jfenwick 2737 case G_BINARY:
1856 caltinay 5972 oss << opToString(m_op) << endl;
1857     indent+='.';
1858     m_left->intoTreeString(oss, indent);
1859     m_right->intoTreeString(oss, indent);
1860     break;
1861 jfenwick 2737 case G_UNARY:
1862     case G_UNARY_P:
1863     case G_NP1OUT:
1864     case G_NP1OUT_P:
1865     case G_REDUCTION:
1866 caltinay 5972 oss << opToString(m_op) << endl;
1867     indent+='.';
1868     m_left->intoTreeString(oss, indent);
1869     break;
1870 jfenwick 2737 case G_TENSORPROD:
1871 caltinay 5972 oss << opToString(m_op) << endl;
1872     indent+='.';
1873     m_left->intoTreeString(oss, indent);
1874     m_right->intoTreeString(oss, indent);
1875     break;
1876 jfenwick 2737 case G_NP1OUT_2P:
1877 caltinay 5972 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1878     indent+='.';
1879     m_left->intoTreeString(oss, indent);
1880     break;
1881 jfenwick 2737 default:
1882 caltinay 5972 oss << "UNKNOWN";
1883 jfenwick 2737 }
1884     }
1885    
1886    
1887 jfenwick 1865 DataAbstract*
1888 jfenwick 5938 DataLazy::deepCopy() const
1889 jfenwick 1865 {
1890 jfenwick 1935 switch (getOpgroup(m_op))
1891 jfenwick 1865 {
1892 jfenwick 1935 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1893 caltinay 5972 case G_UNARY:
1894 jfenwick 2721 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1895 caltinay 5972 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1896     case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1897 jfenwick 2066 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1898     case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1899 jfenwick 2721 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1900     case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1901 jfenwick 1935 default:
1902 caltinay 5972 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1903 jfenwick 1865 }
1904     }
1905    
1906 jfenwick 6469 // For this, we don't care what op you were doing because the answer is now zero
1907     DataAbstract*
1908     DataLazy::zeroedCopy() const
1909     {
1910     return new DataLazy(m_id->zeroedCopy()->getPtr());
1911     }
1912 jfenwick 1865
1913 jfenwick 2066 // There is no single, natural interpretation of getLength on DataLazy.
1914     // Instances of DataReady can look at the size of their vectors.
1915     // For lazy though, it could be the size the data would be if it were resolved;
1916     // or it could be some function of the lengths of the DataReady instances which
1917     // form part of the expression.
1918     // Rather than have people making assumptions, I have disabled the method.
1919 jfenwick 5938 DataTypes::RealVectorType::size_type
1920 jfenwick 1865 DataLazy::getLength() const
1921     {
1922 jfenwick 2066 throw DataException("getLength() does not make sense for lazy data.");
1923 jfenwick 1865 }
1924    
1925    
1926     DataAbstract*
1927     DataLazy::getSlice(const DataTypes::RegionType& region) const
1928     {
1929 jfenwick 1879 throw DataException("getSlice - not implemented for Lazy objects.");
1930 jfenwick 1865 }
1931    
1932 jfenwick 1935
1933     // To do this we need to rely on our child nodes
1934 jfenwick 5938 DataTypes::RealVectorType::size_type
1935 jfenwick 1865 DataLazy::getPointOffset(int sampleNo,
1936 jfenwick 1935 int dataPointNo)
1937     {
1938     if (m_op==IDENTITY)
1939     {
1940 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
1941 jfenwick 1935 }
1942     if (m_readytype!='E')
1943     {
1944 caltinay 5972 collapse();
1945     return m_id->getPointOffset(sampleNo,dataPointNo);
1946 jfenwick 1935 }
1947     // at this point we do not have an identity node and the expression will be Expanded
1948     // so we only need to know which child to ask
1949     if (m_left->m_readytype=='E')
1950     {
1951 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
1952 jfenwick 1935 }
1953     else
1954     {
1955 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
1956 jfenwick 1935 }
1957     }
1958    
1959     // To do this we need to rely on our child nodes
1960 jfenwick 5938 DataTypes::RealVectorType::size_type
1961 jfenwick 1935 DataLazy::getPointOffset(int sampleNo,
1962 jfenwick 1865 int dataPointNo) const
1963     {
1964 jfenwick 1935 if (m_op==IDENTITY)
1965     {
1966 caltinay 5972 return m_id->getPointOffset(sampleNo,dataPointNo);
1967 jfenwick 1935 }
1968     if (m_readytype=='E')
1969     {
1970     // at this point we do not have an identity node and the expression will be Expanded
1971     // so we only need to know which child to ask
1972     if (m_left->m_readytype=='E')
1973     {
1974 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo);
1975 jfenwick 1935 }
1976     else
1977     {
1978 caltinay 5972 return m_right->getPointOffset(sampleNo,dataPointNo);
1979 jfenwick 1935 }
1980     }
1981     if (m_readytype=='C')
1982     {
1983 caltinay 5972 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1984 jfenwick 1935 }
1985     throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1986 jfenwick 1865 }
1987    
1988 jfenwick 2271
1989     // I have decided to let Data:: handle this issue.
1990 jfenwick 1901 void
1991     DataLazy::setToZero()
1992     {
1993 jfenwick 5938 // DataTypes::RealVectorType v(getNoValues(),0);
1994 jfenwick 2271 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1995     // m_op=IDENTITY;
1996     // m_right.reset();
1997     // m_left.reset();
1998     // m_readytype='C';
1999     // m_buffsRequired=1;
2000    
2001 jfenwick 4634 (void)privdebug; // to stop the compiler complaining about unused privdebug
2002 jfenwick 2271 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2003 jfenwick 1901 }
2004    
2005 jfenwick 2271 bool
2006     DataLazy::actsExpanded() const
2007     {
2008 caltinay 5972 return (m_readytype=='E');
2009 jfenwick 2271 }
2010    
2011 caltinay 5997 } // end namespace
2012 caltinay 5972

  ViewVC Help
Powered by ViewVC 1.1.26