/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Annotation of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6520 - (hide annotations)
Tue Mar 7 03:11:48 2017 UTC (2 years ago) by jfenwick
File size: 86219 byte(s)
Remove dead comment
1 jfenwick 1865
2 jfenwick 3981 /*****************************************************************************
3 jfenwick 1865 *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 jfenwick 1865 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 jfenwick 1865 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 jfenwick 1865
17     #include "DataLazy.h"
18 caltinay 5972 #include "Data.h"
19     #include "DataTypes.h"
20     #include "EscriptParams.h"
21     #include "FunctionSpace.h"
22     #include "Utils.h"
23 jfenwick 6084 #include "DataVectorOps.h"
24 caltinay 5972
25 caltinay 5997 #include <iomanip> // for some fancy formatting in debug
26 jfenwick 2199
27 jfenwick 5938 using namespace escript::DataTypes;
28    
29 caltinay 5972 #define NO_ARG
30    
31 jfenwick 2157 // #define LAZYDEBUG(X) if (privdebug){X;}
32 jfenwick 2092 #define LAZYDEBUG(X)
33 jfenwick 2157 namespace
34     {
35     bool privdebug=false;
36 jfenwick 2092
37 jfenwick 2157 #define ENABLEDEBUG privdebug=true;
38     #define DISABLEDEBUG privdebug=false;
39     }
40    
41 caltinay 6174 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42 jfenwick 2177
43 caltinay 6174 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44 jfenwick 2472
45 jfenwick 2501
46 caltinay 6174 #define SIZELIMIT \
47     if (m_height > escript::escriptParams.getTooManyLevels()) {\
48     if (escript::escriptParams.getLazyVerbose()) {\
49     cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50     }\
51     resolveToIdentity();\
52     }
53 jfenwick 2795
54 jfenwick 1899 /*
55     How does DataLazy work?
56     ~~~~~~~~~~~~~~~~~~~~~~~
57    
58     Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59     denoted left and right.
60    
61     A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62     This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64     Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65 caltinay 4286 Note that IDENTITY is not considered a unary operation.
66 jfenwick 1899
67     I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68     It must however form a DAG (directed acyclic graph).
69     I will refer to individual DataLazy objects with the structure as nodes.
70    
71     Each node also stores:
72     - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73 caltinay 5972 evaluated.
74 caltinay 4286 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75 caltinay 5972 evaluate the expression.
76 jfenwick 1899 - m_samplesize ~ the number of doubles stored in a sample.
77    
78     When a new node is created, the above values are computed based on the values in the child nodes.
79     Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81     The resolve method, which produces a DataReady from a DataLazy, does the following:
82     1) Create a DataReady to hold the new result.
83     2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84     3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86     (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88     resolveSample returns a Vector* and an offset within that vector where the result is stored.
89     Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91     The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93     For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94     The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95 jfenwick 2037
96 jfenwick 2147 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97 jfenwick 2037 1) Add to the ES_optype.
98     2) determine what opgroup your operation belongs to (X)
99     3) add a string for the op to the end of ES_opstrings
100     4) increase ES_opcount
101     5) add an entry (X) to opgroups
102     6) add an entry to the switch in collapseToReady
103     7) add an entry to resolveX
104 jfenwick 1899 */
105    
106    
107 jfenwick 1865 using namespace std;
108 jfenwick 1868 using namespace boost;
109 jfenwick 1865
110     namespace escript
111     {
112 jfenwick 6518
113    
114     DataLazy_ptr makePromote(DataLazy_ptr p)
115     {
116     if (p->isComplex())
117     {
118     return p;
119     }
120     DataLazy* temp=new DataLazy(p, PROM);
121     return DataLazy_ptr(temp);
122     }
123    
124 jfenwick 1865
125     namespace
126     {
127    
128 jfenwick 2777
129     // enabling this will print out when ever the maximum stacksize used by resolve increases
130     // it assumes _OPENMP is also in use
131     //#define LAZY_STACK_PROF
132    
133    
134    
135     #ifndef _OPENMP
136     #ifdef LAZY_STACK_PROF
137     #undef LAZY_STACK_PROF
138     #endif
139     #endif
140    
141    
142     #ifdef LAZY_STACK_PROF
143     std::vector<void*> stackstart(getNumberOfThreads());
144     std::vector<void*> stackend(getNumberOfThreads());
145     size_t maxstackuse=0;
146     #endif
147    
148 jfenwick 1886
149 jfenwick 6518 inline int max3(int a, int b, int c)
150     {
151     int t=(a>b?a:b);
152     return (t>c?t:c);
153 jfenwick 6513
154 jfenwick 6518 }
155 jfenwick 6513
156 jfenwick 1865 // return the FunctionSpace of the result of "left op right"
157     FunctionSpace
158     resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
159     {
160 caltinay 5972 // perhaps this should call interpolate and throw or something?
161     // maybe we need an interpolate node -
162     // that way, if interpolate is required in any other op we can just throw a
163     // programming error exception.
164 jfenwick 1879
165 jfenwick 1943 FunctionSpace l=left->getFunctionSpace();
166     FunctionSpace r=right->getFunctionSpace();
167     if (l!=r)
168     {
169 jfenwick 4264 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
170     if (res==1)
171 jfenwick 1943 {
172 caltinay 5972 return l;
173 jfenwick 1943 }
174 jfenwick 4264 if (res==-1)
175 jfenwick 1943 {
176 caltinay 5972 return r;
177 jfenwick 1943 }
178     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
179     }
180     return l;
181 jfenwick 1865 }
182    
183     // return the shape of the result of "left op right"
184 jfenwick 2066 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
185 jfenwick 1865 DataTypes::ShapeType
186     resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187     {
188 caltinay 5972 if (left->getShape()!=right->getShape())
189     {
190     if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
191     {
192     throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
193     }
194 jfenwick 2721
195 caltinay 5972 if (left->getRank()==0) // we need to allow scalar * anything
196     {
197     return right->getShape();
198     }
199     if (right->getRank()==0)
200     {
201     return left->getShape();
202     }
203     throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
204     }
205     return left->getShape();
206 jfenwick 1865 }
207    
208 jfenwick 2084 // return the shape for "op left"
209    
210     DataTypes::ShapeType
211 jfenwick 2199 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
212 jfenwick 2084 {
213 caltinay 5972 switch(op)
214     {
215     case TRANS:
216     { // for the scoping of variables
217     const DataTypes::ShapeType& s=left->getShape();
218     DataTypes::ShapeType sh;
219     int rank=left->getRank();
220     if (axis_offset<0 || axis_offset>rank)
221     {
222 caltinay 2779 stringstream e;
223     e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
224     throw DataException(e.str());
225 caltinay 5972 }
226     for (int i=0; i<rank; i++)
227     {
228     int index = (axis_offset+i)%rank;
229     sh.push_back(s[index]); // Append to new shape
230     }
231     return sh;
232     }
233     break;
234     case TRACE:
235     {
236     int rank=left->getRank();
237     if (rank<2)
238     {
239     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240     }
241     if ((axis_offset>rank-2) || (axis_offset<0))
242     {
243     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244     }
245     if (rank==2)
246     {
247     return DataTypes::scalarShape;
248     }
249     else if (rank==3)
250     {
251     DataTypes::ShapeType sh;
252     if (axis_offset==0)
253     {
254     sh.push_back(left->getShape()[2]);
255     }
256     else // offset==1
257     {
258     sh.push_back(left->getShape()[0]);
259     }
260     return sh;
261     }
262     else if (rank==4)
263     {
264     DataTypes::ShapeType sh;
265     const DataTypes::ShapeType& s=left->getShape();
266     if (axis_offset==0)
267     {
268     sh.push_back(s[2]);
269     sh.push_back(s[3]);
270     }
271     else if (axis_offset==1)
272     {
273     sh.push_back(s[0]);
274     sh.push_back(s[3]);
275     }
276     else // offset==2
277     {
278     sh.push_back(s[0]);
279     sh.push_back(s[1]);
280     }
281     return sh;
282     }
283     else // unknown rank
284     {
285     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286     }
287     }
288     break;
289     default:
290     throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291     }
292 jfenwick 2084 }
293    
294 jfenwick 2496 DataTypes::ShapeType
295     SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296     {
297     // This code taken from the Data.cpp swapaxes() method
298     // Some of the checks are probably redundant here
299     int axis0_tmp,axis1_tmp;
300     const DataTypes::ShapeType& s=left->getShape();
301     DataTypes::ShapeType out_shape;
302     // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303     // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304     int rank=left->getRank();
305     if (rank<2) {
306     throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307     }
308     if (axis0<0 || axis0>rank-1) {
309 caltinay 2779 stringstream e;
310     e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
311     throw DataException(e.str());
312 jfenwick 2496 }
313     if (axis1<0 || axis1>rank-1) {
314 caltinay 2782 stringstream e;
315 caltinay 2779 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
316     throw DataException(e.str());
317 jfenwick 2496 }
318     if (axis0 == axis1) {
319     throw DataException("Error - Data::swapaxes: axis indices must be different.");
320     }
321     if (axis0 > axis1) {
322     axis0_tmp=axis1;
323     axis1_tmp=axis0;
324     } else {
325     axis0_tmp=axis0;
326     axis1_tmp=axis1;
327     }
328     for (int i=0; i<rank; i++) {
329     if (i == axis0_tmp) {
330     out_shape.push_back(s[axis1_tmp]);
331     } else if (i == axis1_tmp) {
332     out_shape.push_back(s[axis0_tmp]);
333     } else {
334     out_shape.push_back(s[i]);
335     }
336     }
337     return out_shape;
338     }
339    
340    
341 jfenwick 2066 // determine the output shape for the general tensor product operation
342     // the additional parameters return information required later for the product
343     // the majority of this code is copy pasted from C_General_Tensor_Product
344     DataTypes::ShapeType
345     GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
346 jfenwick 1865 {
347 caltinay 5972
348 jfenwick 2066 // Get rank and shape of inputs
349     int rank0 = left->getRank();
350     int rank1 = right->getRank();
351     const DataTypes::ShapeType& shape0 = left->getShape();
352     const DataTypes::ShapeType& shape1 = right->getShape();
353    
354     // Prepare for the loops of the product and verify compatibility of shapes
355     int start0=0, start1=0;
356 caltinay 5972 if (transpose == 0) {}
357     else if (transpose == 1) { start0 = axis_offset; }
358     else if (transpose == 2) { start1 = rank1-axis_offset; }
359     else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
360 jfenwick 2066
361 jfenwick 2085 if (rank0<axis_offset)
362     {
363 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
364 jfenwick 2085 }
365 jfenwick 2066
366     // Adjust the shapes for transpose
367 caltinay 5972 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
368     DataTypes::ShapeType tmpShape1(rank1); // than using push_back
369     for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
370     for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
371 jfenwick 2066
372     // Prepare for the loops of the product
373     SL=1, SM=1, SR=1;
374 caltinay 5972 for (int i=0; i<rank0-axis_offset; i++) {
375 jfenwick 2066 SL *= tmpShape0[i];
376     }
377 caltinay 5972 for (int i=rank0-axis_offset; i<rank0; i++) {
378 jfenwick 2066 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
379     throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
380     }
381     SM *= tmpShape0[i];
382     }
383 caltinay 5972 for (int i=axis_offset; i<rank1; i++) {
384 jfenwick 2066 SR *= tmpShape1[i];
385     }
386    
387     // Define the shape of the output (rank of shape is the sum of the loop ranges below)
388 caltinay 5972 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
389     { // block to limit the scope of out_index
390 jfenwick 2066 int out_index=0;
391     for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
392     for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
393     }
394 jfenwick 2086
395     if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
396     {
397     ostringstream os;
398     os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
399     throw DataException(os.str());
400     }
401    
402 jfenwick 2066 return shape2;
403 jfenwick 1865 }
404    
405 caltinay 5972 } // end anonymous namespace
406 jfenwick 1865
407 jfenwick 2500 void DataLazy::LazyNodeSetup()
408     {
409     #ifdef _OPENMP
410     int numthreads=omp_get_max_threads();
411 jfenwick 6517 if (m_iscompl)
412     {
413     m_samples_c.resize(numthreads*m_samplesize);
414     }
415     else
416     {
417     m_samples_r.resize(numthreads*m_samplesize);
418     }
419 jfenwick 2500 m_sampleids=new int[numthreads];
420     for (int i=0;i<numthreads;++i)
421     {
422     m_sampleids[i]=-1;
423     }
424     #else
425 jfenwick 6512 if (m_iscompl)
426     {
427     m_samples_c.resize(m_samplesize);
428     }
429     else
430     {
431     m_samples_r.resize(m_samplesize);
432     }
433 jfenwick 2500 m_sampleids=new int[1];
434     m_sampleids[0]=-1;
435     #endif // _OPENMP
436     }
437 jfenwick 1865
438 jfenwick 2500
439     // Creates an identity node
440 jfenwick 1865 DataLazy::DataLazy(DataAbstract_ptr p)
441 caltinay 5972 : parent(p->getFunctionSpace(),p->getShape())
442     ,m_sampleids(0),
443 jfenwick 6512 m_samples_r(1)
444 jfenwick 1865 {
445 jfenwick 1879 if (p->isLazy())
446     {
447 caltinay 5972 // I don't want identity of Lazy.
448     // Question: Why would that be so bad?
449     // Answer: We assume that the child of ID is something we can call getVector on
450     throw DataException("Programmer error - attempt to create identity from a DataLazy.");
451 jfenwick 1879 }
452     else
453     {
454 caltinay 5972 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
455     makeIdentity(dr);
456 jfenwick 2199 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
457 jfenwick 1879 }
458 jfenwick 2092 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
459 jfenwick 1865 }
460    
461 jfenwick 6513 // Wrapping a unary operation
462 jfenwick 1886 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
463 caltinay 5972 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
464     m_op(op),
465     m_axis_offset(0),
466     m_transpose(0),
467     m_SL(0), m_SM(0), m_SR(0)
468 jfenwick 1886 {
469 jfenwick 6518 ES_opgroup gop=getOpgroup(op);
470     if ((gop!=G_UNARY) && (gop!=G_NP1OUT) && (gop!=G_REDUCTION) && (gop!=G_UNARY_C))
471 jfenwick 1886 {
472 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
473 jfenwick 1886 }
474 jfenwick 2066
475 jfenwick 1886 DataLazy_ptr lleft;
476     if (!left->isLazy())
477     {
478 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
479 jfenwick 1886 }
480     else
481     {
482 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
483 jfenwick 1886 }
484 jfenwick 1889 m_readytype=lleft->m_readytype;
485 jfenwick 1886 m_left=lleft;
486     m_samplesize=getNumDPPSample()*getNoValues();
487 jfenwick 2177 m_children=m_left->m_children+1;
488     m_height=m_left->m_height+1;
489 jfenwick 6518 if (gop!=G_UNARY_C)
490     {
491     m_iscompl=left->isComplex();
492     }
493     else
494     {
495     m_iscompl=true;
496     }
497 jfenwick 2500 LazyNodeSetup();
498 jfenwick 2177 SIZELIMIT
499 jfenwick 1886 }
500    
501    
502 jfenwick 6513 // In this constructor we need to consider interpolation and promotion
503 jfenwick 1879 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
504 caltinay 5972 : parent(resultFS(left,right,op), resultShape(left,right,op)),
505     m_op(op),
506     m_SL(0), m_SM(0), m_SR(0)
507 jfenwick 1879 {
508 jfenwick 2199 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
509 jfenwick 2037 if ((getOpgroup(op)!=G_BINARY))
510 jfenwick 1886 {
511 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
512 jfenwick 1886 }
513 jfenwick 1943
514 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
515 jfenwick 1943 {
516 caltinay 5972 FunctionSpace fs=getFunctionSpace();
517     Data ltemp(left);
518     Data tmp(ltemp,fs);
519     left=tmp.borrowDataPtr();
520 jfenwick 1943 }
521 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
522 jfenwick 1943 {
523 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
524     right=tmp.borrowDataPtr();
525 jfenwick 2199 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
526 jfenwick 1943 }
527     left->operandCheck(*right);
528    
529 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
530 jfenwick 1879 {
531 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
532 jfenwick 2199 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
533 jfenwick 1879 }
534     else
535     {
536 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
537 jfenwick 2199 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
538 jfenwick 1879 }
539     if (right->isLazy())
540     {
541 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
542 jfenwick 2199 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
543 jfenwick 1879 }
544     else
545     {
546 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
547 jfenwick 2199 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
548 jfenwick 1879 }
549 jfenwick 1889 char lt=m_left->m_readytype;
550     char rt=m_right->m_readytype;
551     if (lt=='E' || rt=='E')
552     {
553 caltinay 5972 m_readytype='E';
554 jfenwick 1889 }
555     else if (lt=='T' || rt=='T')
556     {
557 caltinay 5972 m_readytype='T';
558 jfenwick 1889 }
559     else
560     {
561 caltinay 5972 m_readytype='C';
562 jfenwick 1889 }
563 jfenwick 2066 m_samplesize=getNumDPPSample()*getNoValues();
564 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
565     m_height=max(m_left->m_height,m_right->m_height)+1;
566 jfenwick 6513
567     // now we need to work out if we need to promote anything
568     if (left->isComplex()!=right->isComplex())
569     {
570     if (left->isComplex())
571     {
572     m_right=makePromote(m_right);
573     }
574     else
575     {
576     m_left=makePromote(m_left);
577     }
578     }
579     m_iscompl=left->isComplex();
580 jfenwick 2500 LazyNodeSetup();
581 jfenwick 2177 SIZELIMIT
582 jfenwick 2092 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
583 jfenwick 1879 }
584    
585 jfenwick 6513 // need to consider promotion
586 jfenwick 2066 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
587 caltinay 5972 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
588     m_op(op),
589     m_axis_offset(axis_offset),
590     m_transpose(transpose)
591 jfenwick 2066 {
592     if ((getOpgroup(op)!=G_TENSORPROD))
593     {
594 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
595 jfenwick 2066 }
596     if ((transpose>2) || (transpose<0))
597     {
598 caltinay 5972 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
599 jfenwick 2066 }
600 caltinay 5972 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
601 jfenwick 2066 {
602 caltinay 5972 FunctionSpace fs=getFunctionSpace();
603     Data ltemp(left);
604     Data tmp(ltemp,fs);
605     left=tmp.borrowDataPtr();
606 jfenwick 2066 }
607 caltinay 5972 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
608 jfenwick 2066 {
609 caltinay 5972 Data tmp(Data(right),getFunctionSpace());
610     right=tmp.borrowDataPtr();
611 jfenwick 2066 }
612 jfenwick 2195 // left->operandCheck(*right);
613 jfenwick 1879
614 caltinay 5972 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
615 jfenwick 2066 {
616 caltinay 5972 m_left=dynamic_pointer_cast<DataLazy>(left);
617 jfenwick 2066 }
618     else
619     {
620 caltinay 5972 m_left=DataLazy_ptr(new DataLazy(left));
621 jfenwick 2066 }
622     if (right->isLazy())
623     {
624 caltinay 5972 m_right=dynamic_pointer_cast<DataLazy>(right);
625 jfenwick 2066 }
626     else
627     {
628 caltinay 5972 m_right=DataLazy_ptr(new DataLazy(right));
629 jfenwick 2066 }
630     char lt=m_left->m_readytype;
631     char rt=m_right->m_readytype;
632     if (lt=='E' || rt=='E')
633     {
634 caltinay 5972 m_readytype='E';
635 jfenwick 2066 }
636     else if (lt=='T' || rt=='T')
637     {
638 caltinay 5972 m_readytype='T';
639 jfenwick 2066 }
640     else
641     {
642 caltinay 5972 m_readytype='C';
643 jfenwick 2066 }
644     m_samplesize=getNumDPPSample()*getNoValues();
645 jfenwick 2177 m_children=m_left->m_children+m_right->m_children+2;
646     m_height=max(m_left->m_height,m_right->m_height)+1;
647 jfenwick 6520
648 jfenwick 6513 // now we need to work out if we need to promote anything
649     if (left->isComplex()!=right->isComplex())
650     {
651     if (left->isComplex())
652     {
653     m_right=makePromote(m_right);
654     }
655     else
656     {
657     m_left=makePromote(m_left);
658     }
659     }
660     m_iscompl=left->isComplex();
661 jfenwick 2500 LazyNodeSetup();
662 jfenwick 2177 SIZELIMIT
663 jfenwick 2092 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
664 jfenwick 2066 }
665    
666    
667 jfenwick 2084 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
668 caltinay 5972 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
669     m_op(op),
670     m_axis_offset(axis_offset),
671     m_transpose(0),
672     m_tol(0)
673 jfenwick 2084 {
674     if ((getOpgroup(op)!=G_NP1OUT_P))
675     {
676 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
677 jfenwick 2084 }
678     DataLazy_ptr lleft;
679     if (!left->isLazy())
680     {
681 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
682 jfenwick 2084 }
683     else
684     {
685 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
686 jfenwick 2084 }
687     m_readytype=lleft->m_readytype;
688     m_left=lleft;
689     m_samplesize=getNumDPPSample()*getNoValues();
690 jfenwick 2177 m_children=m_left->m_children+1;
691     m_height=m_left->m_height+1;
692 jfenwick 6513 m_iscompl=left->isComplex();
693 jfenwick 2500 LazyNodeSetup();
694 jfenwick 2177 SIZELIMIT
695 jfenwick 2092 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
696 jfenwick 2084 }
697    
698 jfenwick 2147 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
699 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
700     m_op(op),
701     m_axis_offset(0),
702     m_transpose(0),
703     m_tol(tol)
704 jfenwick 2147 {
705     if ((getOpgroup(op)!=G_UNARY_P))
706     {
707 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
708 jfenwick 2147 }
709     DataLazy_ptr lleft;
710     if (!left->isLazy())
711     {
712 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
713 jfenwick 2147 }
714     else
715     {
716 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
717 jfenwick 2147 }
718     m_readytype=lleft->m_readytype;
719     m_left=lleft;
720     m_samplesize=getNumDPPSample()*getNoValues();
721 jfenwick 2177 m_children=m_left->m_children+1;
722     m_height=m_left->m_height+1;
723 jfenwick 6513 m_iscompl=left->isComplex();
724 jfenwick 2500 LazyNodeSetup();
725 jfenwick 2177 SIZELIMIT
726 jfenwick 2147 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
727     }
728 jfenwick 2084
729 jfenwick 2496
730     DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
731 caltinay 5972 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
732     m_op(op),
733     m_axis_offset(axis0),
734     m_transpose(axis1),
735     m_tol(0)
736 jfenwick 2496 {
737     if ((getOpgroup(op)!=G_NP1OUT_2P))
738     {
739 caltinay 5972 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
740 jfenwick 2496 }
741     DataLazy_ptr lleft;
742     if (!left->isLazy())
743     {
744 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
745 jfenwick 2496 }
746     else
747     {
748 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
749 jfenwick 2496 }
750     m_readytype=lleft->m_readytype;
751     m_left=lleft;
752     m_samplesize=getNumDPPSample()*getNoValues();
753     m_children=m_left->m_children+1;
754     m_height=m_left->m_height+1;
755 jfenwick 6513 m_iscompl=left->isComplex();
756 jfenwick 2500 LazyNodeSetup();
757 jfenwick 2496 SIZELIMIT
758     LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
759     }
760    
761 jfenwick 3035
762    
763    
764     DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
765 caltinay 5972 : parent(left->getFunctionSpace(), left->getShape()),
766     m_op(CONDEVAL),
767     m_axis_offset(0),
768     m_transpose(0),
769     m_tol(0)
770 jfenwick 3035 {
771    
772     DataLazy_ptr lmask;
773     DataLazy_ptr lleft;
774     DataLazy_ptr lright;
775     if (!mask->isLazy())
776     {
777 caltinay 5972 lmask=DataLazy_ptr(new DataLazy(mask));
778 jfenwick 3035 }
779     else
780     {
781 caltinay 5972 lmask=dynamic_pointer_cast<DataLazy>(mask);
782 jfenwick 3035 }
783     if (!left->isLazy())
784     {
785 caltinay 5972 lleft=DataLazy_ptr(new DataLazy(left));
786 jfenwick 3035 }
787     else
788     {
789 caltinay 5972 lleft=dynamic_pointer_cast<DataLazy>(left);
790 jfenwick 3035 }
791     if (!right->isLazy())
792     {
793 caltinay 5972 lright=DataLazy_ptr(new DataLazy(right));
794 jfenwick 3035 }
795     else
796     {
797 caltinay 5972 lright=dynamic_pointer_cast<DataLazy>(right);
798 jfenwick 3035 }
799     m_readytype=lmask->m_readytype;
800     if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
801     {
802 caltinay 5972 throw DataException("Programmer Error - condEval arguments must have the same readytype");
803 jfenwick 3035 }
804     m_left=lleft;
805     m_right=lright;
806     m_mask=lmask;
807     m_samplesize=getNumDPPSample()*getNoValues();
808     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
809     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
810 jfenwick 6513
811     // now we need to work out if we need to promote anything
812     if (left->isComplex()!=right->isComplex())
813     {
814     if (left->isComplex())
815     {
816     m_right=makePromote(m_right);
817     }
818     else
819     {
820     m_left=makePromote(m_left);
821     }
822     }
823     m_iscompl=left->isComplex();
824 jfenwick 3035 LazyNodeSetup();
825     SIZELIMIT
826     LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
827     }
828    
829    
830    
831 jfenwick 1865 DataLazy::~DataLazy()
832     {
833 jfenwick 2500 delete[] m_sampleids;
834 jfenwick 1865 }
835    
836 jfenwick 1879
837 jfenwick 1899 /*
838     \brief Evaluates the expression using methods on Data.
839     This does the work for the collapse method.
840     For reasons of efficiency do not call this method on DataExpanded nodes.
841     */
842 jfenwick 1889 DataReady_ptr
843 jfenwick 4621 DataLazy::collapseToReady() const
844 jfenwick 1889 {
845     if (m_readytype=='E')
846 caltinay 5972 { // this is more an efficiency concern than anything else
847 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
848     }
849     if (m_op==IDENTITY)
850     {
851     return m_id;
852     }
853     DataReady_ptr pleft=m_left->collapseToReady();
854     Data left(pleft);
855     Data right;
856 jfenwick 2066 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
857 jfenwick 1889 {
858     right=Data(m_right->collapseToReady());
859     }
860     Data result;
861     switch(m_op)
862     {
863     case ADD:
864 caltinay 5972 result=left+right;
865     break;
866     case SUB:
867     result=left-right;
868     break;
869     case MUL:
870     result=left*right;
871     break;
872     case DIV:
873     result=left/right;
874     break;
875 jfenwick 1889 case SIN:
876 caltinay 5972 result=left.sin();
877     break;
878 jfenwick 1889 case COS:
879 caltinay 5972 result=left.cos();
880     break;
881 jfenwick 1889 case TAN:
882 caltinay 5972 result=left.tan();
883     break;
884 jfenwick 1889 case ASIN:
885 caltinay 5972 result=left.asin();
886     break;
887 jfenwick 1889 case ACOS:
888 caltinay 5972 result=left.acos();
889     break;
890 jfenwick 1889 case ATAN:
891 caltinay 5972 result=left.atan();
892     break;
893 jfenwick 1889 case SINH:
894 caltinay 5972 result=left.sinh();
895     break;
896 jfenwick 1889 case COSH:
897 caltinay 5972 result=left.cosh();
898     break;
899 jfenwick 1889 case TANH:
900 caltinay 5972 result=left.tanh();
901     break;
902 jfenwick 1889 case ERF:
903 caltinay 5972 result=left.erf();
904     break;
905 jfenwick 1889 case ASINH:
906 caltinay 5972 result=left.asinh();
907     break;
908 jfenwick 1889 case ACOSH:
909 caltinay 5972 result=left.acosh();
910     break;
911 jfenwick 1889 case ATANH:
912 caltinay 5972 result=left.atanh();
913     break;
914 jfenwick 1889 case LOG10:
915 caltinay 5972 result=left.log10();
916     break;
917 jfenwick 1889 case LOG:
918 caltinay 5972 result=left.log();
919     break;
920 jfenwick 1889 case SIGN:
921 caltinay 5972 result=left.sign();
922     break;
923 jfenwick 1889 case ABS:
924 caltinay 5972 result=left.abs();
925     break;
926 jfenwick 1889 case NEG:
927 caltinay 5972 result=left.neg();
928     break;
929 jfenwick 1889 case POS:
930 caltinay 5972 // it doesn't mean anything for delayed.
931     // it will just trigger a deep copy of the lazy object
932     throw DataException("Programmer error - POS not supported for lazy data.");
933     break;
934 jfenwick 1889 case EXP:
935 caltinay 5972 result=left.exp();
936     break;
937 jfenwick 1889 case SQRT:
938 caltinay 5972 result=left.sqrt();
939     break;
940 jfenwick 1889 case RECIP:
941 caltinay 5972 result=left.oneOver();
942     break;
943 jfenwick 1889 case GZ:
944 caltinay 5972 result=left.wherePositive();
945     break;
946 jfenwick 1889 case LZ:
947 caltinay 5972 result=left.whereNegative();
948     break;
949 jfenwick 1889 case GEZ:
950 caltinay 5972 result=left.whereNonNegative();
951     break;
952 jfenwick 1889 case LEZ:
953 caltinay 5972 result=left.whereNonPositive();
954     break;
955 jfenwick 2147 case NEZ:
956 caltinay 5972 result=left.whereNonZero(m_tol);
957     break;
958 jfenwick 2147 case EZ:
959 caltinay 5972 result=left.whereZero(m_tol);
960     break;
961 jfenwick 2037 case SYM:
962 caltinay 5972 result=left.symmetric();
963     break;
964 jfenwick 2037 case NSYM:
965 jfenwick 6110 result=left.antisymmetric();
966 caltinay 5972 break;
967 jfenwick 2066 case PROD:
968 caltinay 5972 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
969     break;
970 jfenwick 2084 case TRANS:
971 caltinay 5972 result=left.transpose(m_axis_offset);
972     break;
973 jfenwick 2084 case TRACE:
974 caltinay 5972 result=left.trace(m_axis_offset);
975     break;
976 jfenwick 2496 case SWAP:
977 caltinay 5972 result=left.swapaxes(m_axis_offset, m_transpose);
978     break;
979 jfenwick 2721 case MINVAL:
980 caltinay 5972 result=left.minval();
981     break;
982 jfenwick 2721 case MAXVAL:
983 caltinay 5972 result=left.minval();
984     break;
985 jfenwick 6125 case HER:
986 jfenwick 6516 result=left.hermitian();
987 jfenwick 6518 break;
988 jfenwick 6516 case NHER:
989     result=left.antihermitian();
990 jfenwick 6518 break;
991     case PROM:
992     result.copy(left);
993     result.complicate();
994     break;
995 jfenwick 6125 break;
996 jfenwick 1889 default:
997 caltinay 5972 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
998 jfenwick 1889 }
999     return result.borrowReadyPtr();
1000     }
1001    
1002 jfenwick 1899 /*
1003     \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
1004     This method uses the original methods on the Data class to evaluate the expressions.
1005     For this reason, it should not be used on DataExpanded instances. (To do so would defeat
1006     the purpose of using DataLazy in the first place).
1007     */
1008 jfenwick 1889 void
1009 jfenwick 4621 DataLazy::collapse() const
1010 jfenwick 1889 {
1011     if (m_op==IDENTITY)
1012     {
1013 caltinay 5972 return;
1014 jfenwick 1889 }
1015     if (m_readytype=='E')
1016 caltinay 5972 { // this is more an efficiency concern than anything else
1017 jfenwick 1889 throw DataException("Programmer Error - do not use collapse on Expanded data.");
1018     }
1019     m_id=collapseToReady();
1020     m_op=IDENTITY;
1021     }
1022    
1023 jfenwick 2500 // The result will be stored in m_samples
1024     // The return value is a pointer to the DataVector, offset is the offset within the return value
1025 jfenwick 5938 const DataTypes::RealVectorType*
1026 jfenwick 4621 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1027 jfenwick 2500 {
1028     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1029 caltinay 5972 // collapse so we have a 'E' node or an IDENTITY for some other type
1030 jfenwick 2500 if (m_readytype!='E' && m_op!=IDENTITY)
1031     {
1032 caltinay 5972 collapse();
1033 jfenwick 2500 }
1034 caltinay 5972 if (m_op==IDENTITY)
1035 jfenwick 2500 {
1036 jfenwick 6042 const RealVectorType& vec=m_id->getVectorRO();
1037 jfenwick 2500 roffset=m_id->getPointOffset(sampleNo, 0);
1038 jfenwick 2777 #ifdef LAZY_STACK_PROF
1039     int x;
1040     if (&x<stackend[omp_get_thread_num()])
1041     {
1042     stackend[omp_get_thread_num()]=&x;
1043     }
1044     #endif
1045 jfenwick 2500 return &(vec);
1046     }
1047     if (m_readytype!='E')
1048     {
1049     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1050     }
1051 jfenwick 2501 if (m_sampleids[tid]==sampleNo)
1052     {
1053 caltinay 5972 roffset=tid*m_samplesize;
1054 jfenwick 6512 return &(m_samples_r); // sample is already resolved
1055 jfenwick 2501 }
1056     m_sampleids[tid]=sampleNo;
1057 jfenwick 3035
1058 jfenwick 2500 switch (getOpgroup(m_op))
1059     {
1060     case G_UNARY:
1061     case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1062     case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1063     case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1064     case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1065     case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1066     case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1067 jfenwick 2721 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1068 jfenwick 3035 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1069 jfenwick 2500 default:
1070 jfenwick 6518 throw DataException("Programmer Error - resolveNodeSample does not know how to process "+opToString(m_op)+".");
1071 jfenwick 2500 }
1072     }
1073    
1074 jfenwick 6517 // The result will be stored in m_samples
1075     // The return value is a pointer to the DataVector, offset is the offset within the return value
1076     const DataTypes::CplxVectorType*
1077     DataLazy::resolveNodeSampleCplx(int tid, int sampleNo, size_t& roffset) const
1078     {
1079     LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1080     // collapse so we have a 'E' node or an IDENTITY for some other type
1081     if (m_readytype!='E' && m_op!=IDENTITY)
1082     {
1083     collapse();
1084     }
1085     if (m_op==IDENTITY)
1086     {
1087     const CplxVectorType& vec=m_id->getVectorROC();
1088     roffset=m_id->getPointOffset(sampleNo, 0);
1089     #ifdef LAZY_STACK_PROF
1090     int x;
1091     if (&x<stackend[omp_get_thread_num()])
1092     {
1093     stackend[omp_get_thread_num()]=&x;
1094     }
1095     #endif
1096     return &(vec);
1097     }
1098     if (m_readytype!='E')
1099     {
1100     throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1101     }
1102     if (m_sampleids[tid]==sampleNo)
1103     {
1104     roffset=tid*m_samplesize;
1105     return &(m_samples_c); // sample is already resolved
1106     }
1107     m_sampleids[tid]=sampleNo;
1108    
1109     switch (getOpgroup(m_op))
1110     {
1111     case G_UNARY:
1112 jfenwick 6518 case G_UNARY_C:
1113 jfenwick 6517 case G_UNARY_P: return resolveNodeUnaryCplx(tid, sampleNo, roffset);
1114     case G_BINARY: return resolveNodeBinaryCplx(tid, sampleNo, roffset);
1115     case G_NP1OUT: return resolveNodeNP1OUTCplx(tid, sampleNo, roffset);
1116     case G_NP1OUT_P: return resolveNodeNP1OUT_PCplx(tid, sampleNo, roffset);
1117     case G_TENSORPROD: return resolveNodeTProdCplx(tid, sampleNo, roffset);
1118     case G_NP1OUT_2P: return resolveNodeNP1OUT_2PCplx(tid, sampleNo, roffset);
1119     case G_REDUCTION: return resolveNodeReductionCplx(tid, sampleNo, roffset);
1120     case G_CONDEVAL: return resolveNodeCondEvalCplx(tid, sampleNo, roffset);
1121     default:
1122 jfenwick 6518 throw DataException("Programmer Error - resolveNodeSampleCplx does not know how to process "+opToString(m_op)+".");
1123 jfenwick 6517 }
1124     }
1125    
1126 jfenwick 5938 const DataTypes::RealVectorType*
1127 jfenwick 4621 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1128 jfenwick 2500 {
1129 caltinay 5972 // we assume that any collapsing has been done before we get here
1130     // since we only have one argument we don't need to think about only
1131     // processing single points.
1132     // we will also know we won't get identity nodes
1133 jfenwick 2500 if (m_readytype!='E')
1134     {
1135     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1136     }
1137     if (m_op==IDENTITY)
1138     {
1139     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1140     }
1141 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1142 jfenwick 2500 const double* left=&((*leftres)[roffset]);
1143     roffset=m_samplesize*tid;
1144 jfenwick 6512 double* result=&(m_samples_r[roffset]);
1145 jfenwick 6168 if (m_op==POS)
1146 jfenwick 2500 {
1147 jfenwick 6168 // this should be prevented earlier
1148     // operation is meaningless for lazy
1149     throw DataException("Programmer error - POS not supported for lazy data.");
1150 jfenwick 2500 }
1151 jfenwick 6066 tensor_unary_array_operation(m_samplesize,
1152     left,
1153     result,
1154 jfenwick 6168 m_op,
1155 jfenwick 6066 m_tol);
1156 jfenwick 6512 return &(m_samples_r);
1157 jfenwick 2500 }
1158    
1159 jfenwick 6517 const DataTypes::CplxVectorType*
1160     DataLazy::resolveNodeUnaryCplx(int tid, int sampleNo, size_t& roffset) const
1161     {
1162     // we assume that any collapsing has been done before we get here
1163     // since we only have one argument we don't need to think about only
1164     // processing single points.
1165     // we will also know we won't get identity nodes
1166     if (m_readytype!='E')
1167     {
1168     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1169     }
1170     if (m_op==IDENTITY)
1171     {
1172     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1173     }
1174     if (m_op==POS)
1175     {
1176     // this should be prevented earlier
1177     // operation is meaningless for lazy
1178     throw DataException("Programmer error - POS not supported for lazy data.");
1179     }
1180 jfenwick 6518
1181     roffset=m_samplesize*tid;
1182     DataTypes::cplx_t* result=&(m_samples_c[roffset]);
1183     if (m_op==PROM)
1184     {
1185     const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1186     const DataTypes::real_t* left=&((*leftres)[roffset]);
1187    
1188     tensor_unary_promote(m_samplesize, left, result);
1189     }
1190     else
1191     {
1192     const DataTypes::CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, roffset);
1193     const DataTypes::cplx_t* left=&((*leftres)[roffset]);
1194     tensor_unary_array_operation(m_samplesize,
1195     left,
1196     result,
1197     m_op,
1198     m_tol);
1199     }
1200 jfenwick 6517 return &(m_samples_c);
1201     }
1202 jfenwick 2500
1203 jfenwick 5938 const DataTypes::RealVectorType*
1204 jfenwick 4621 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1205 jfenwick 2721 {
1206 caltinay 5972 // we assume that any collapsing has been done before we get here
1207     // since we only have one argument we don't need to think about only
1208     // processing single points.
1209     // we will also know we won't get identity nodes
1210 jfenwick 2721 if (m_readytype!='E')
1211     {
1212     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1213     }
1214     if (m_op==IDENTITY)
1215     {
1216     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1217     }
1218     size_t loffset=0;
1219 jfenwick 5938 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1220 jfenwick 2721
1221     roffset=m_samplesize*tid;
1222 jfenwick 2734 unsigned int ndpps=getNumDPPSample();
1223 jfenwick 3917 unsigned int psize=DataTypes::noValues(m_left->getShape());
1224 jfenwick 6512 double* result=&(m_samples_r[roffset]);
1225 jfenwick 2721 switch (m_op)
1226     {
1227     case MINVAL:
1228 caltinay 5972 {
1229     for (unsigned int z=0;z<ndpps;++z)
1230     {
1231     FMin op;
1232 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1233 caltinay 5972 loffset+=psize;
1234     result++;
1235     }
1236     }
1237     break;
1238 jfenwick 2721 case MAXVAL:
1239 caltinay 5972 {
1240     for (unsigned int z=0;z<ndpps;++z)
1241     {
1242     FMax op;
1243 jfenwick 6083 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1244 caltinay 5972 loffset+=psize;
1245     result++;
1246     }
1247     }
1248     break;
1249 jfenwick 2721 default:
1250 caltinay 5972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1251 jfenwick 2721 }
1252 jfenwick 6512 return &(m_samples_r);
1253 jfenwick 2721 }
1254    
1255 jfenwick 6517 const DataTypes::CplxVectorType*
1256     DataLazy::resolveNodeReductionCplx(int tid, int sampleNo, size_t& roffset) const
1257     {
1258     // we assume that any collapsing has been done before we get here
1259     // since we only have one argument we don't need to think about only
1260     // processing single points.
1261     // we will also know we won't get identity nodes
1262     if (m_readytype!='E')
1263     {
1264     throw DataException("Programmer error - resolveReductionCplx should only be called on expanded Data.");
1265     }
1266     if (m_op==IDENTITY)
1267     {
1268     throw DataException("Programmer error - resolveNodeReductionCplx should not be called on identity nodes.");
1269     }
1270     throw DataException("Programmer error - reduction operations MIN and MAX not supported for complex values.");
1271     }
1272    
1273    
1274 jfenwick 5938 const DataTypes::RealVectorType*
1275 jfenwick 4621 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1276 jfenwick 2500 {
1277 caltinay 5972 // we assume that any collapsing has been done before we get here
1278     // since we only have one argument we don't need to think about only
1279     // processing single points.
1280 jfenwick 2500 if (m_readytype!='E')
1281     {
1282     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1283     }
1284     if (m_op==IDENTITY)
1285     {
1286     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1287     }
1288     size_t subroffset;
1289 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1290 jfenwick 2500 roffset=m_samplesize*tid;
1291     size_t loop=0;
1292     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1293     size_t step=getNoValues();
1294     size_t offset=roffset;
1295     switch (m_op)
1296     {
1297     case SYM:
1298 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1299     {
1300 jfenwick 6512 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1301 caltinay 5972 subroffset+=step;
1302     offset+=step;
1303     }
1304     break;
1305 jfenwick 2500 case NSYM:
1306 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1307     {
1308 jfenwick 6512 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1309 caltinay 5972 subroffset+=step;
1310     offset+=step;
1311     }
1312     break;
1313 jfenwick 2500 default:
1314 caltinay 5972 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1315 jfenwick 2500 }
1316 jfenwick 6512 return &m_samples_r;
1317 jfenwick 2500 }
1318    
1319 jfenwick 6517
1320     const DataTypes::CplxVectorType*
1321     DataLazy::resolveNodeNP1OUTCplx(int tid, int sampleNo, size_t& roffset) const
1322     {
1323     // we assume that any collapsing has been done before we get here
1324     // since we only have one argument we don't need to think about only
1325     // processing single points.
1326     if (m_readytype!='E')
1327     {
1328     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1329     }
1330     if (m_op==IDENTITY)
1331     {
1332     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1333     }
1334     size_t subroffset;
1335     const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1336     roffset=m_samplesize*tid;
1337     size_t loop=0;
1338     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1339     size_t step=getNoValues();
1340     size_t offset=roffset;
1341     switch (m_op)
1342     {
1343     case SYM:
1344     for (loop=0;loop<numsteps;++loop)
1345     {
1346     escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(), offset);
1347     subroffset+=step;
1348     offset+=step;
1349     }
1350     break;
1351     case NSYM:
1352     for (loop=0;loop<numsteps;++loop)
1353     {
1354     escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(), offset);
1355     subroffset+=step;
1356     offset+=step;
1357     }
1358     break;
1359     default:
1360     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1361     }
1362     return &m_samples_c;
1363     }
1364    
1365 jfenwick 5938 const DataTypes::RealVectorType*
1366 jfenwick 4621 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1367 jfenwick 2500 {
1368 caltinay 5972 // we assume that any collapsing has been done before we get here
1369     // since we only have one argument we don't need to think about only
1370     // processing single points.
1371 jfenwick 2500 if (m_readytype!='E')
1372     {
1373     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1374     }
1375     if (m_op==IDENTITY)
1376     {
1377     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1378     }
1379     size_t subroffset;
1380     size_t offset;
1381 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1382 jfenwick 2500 roffset=m_samplesize*tid;
1383     offset=roffset;
1384     size_t loop=0;
1385     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1386     size_t outstep=getNoValues();
1387     size_t instep=m_left->getNoValues();
1388     switch (m_op)
1389     {
1390     case TRACE:
1391 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1392     {
1393 jfenwick 6512 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_r ,getShape(),offset,m_axis_offset);
1394 caltinay 5972 subroffset+=instep;
1395     offset+=outstep;
1396     }
1397     break;
1398 jfenwick 2500 case TRANS:
1399 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1400     {
1401 jfenwick 6512 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset,m_axis_offset);
1402 caltinay 5972 subroffset+=instep;
1403     offset+=outstep;
1404     }
1405     break;
1406 jfenwick 2500 default:
1407 caltinay 5972 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1408 jfenwick 2500 }
1409 jfenwick 6512 return &m_samples_r;
1410 jfenwick 2500 }
1411    
1412 jfenwick 6517 const DataTypes::CplxVectorType*
1413     DataLazy::resolveNodeNP1OUT_PCplx(int tid, int sampleNo, size_t& roffset) const
1414     {
1415     // we assume that any collapsing has been done before we get here
1416     // since we only have one argument we don't need to think about only
1417     // processing single points.
1418     if (m_readytype!='E')
1419     {
1420     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1421     }
1422     if (m_op==IDENTITY)
1423     {
1424     throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1425     }
1426     size_t subroffset;
1427     size_t offset;
1428     const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1429     roffset=m_samplesize*tid;
1430     offset=roffset;
1431     size_t loop=0;
1432     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1433     size_t outstep=getNoValues();
1434     size_t instep=m_left->getNoValues();
1435     switch (m_op)
1436     {
1437     case TRACE:
1438     for (loop=0;loop<numsteps;++loop)
1439     {
1440     escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_c ,getShape(),offset,m_axis_offset);
1441     subroffset+=instep;
1442     offset+=outstep;
1443     }
1444     break;
1445     case TRANS:
1446     for (loop=0;loop<numsteps;++loop)
1447     {
1448     escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(),offset,m_axis_offset);
1449     subroffset+=instep;
1450     offset+=outstep;
1451     }
1452     break;
1453     default:
1454     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1455     }
1456     return &m_samples_c;
1457     }
1458 jfenwick 2500
1459 jfenwick 5938 const DataTypes::RealVectorType*
1460 jfenwick 4621 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1461 jfenwick 2500 {
1462     if (m_readytype!='E')
1463     {
1464     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1465     }
1466     if (m_op==IDENTITY)
1467     {
1468     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1469     }
1470     size_t subroffset;
1471     size_t offset;
1472 jfenwick 6042 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1473 jfenwick 2500 roffset=m_samplesize*tid;
1474     offset=roffset;
1475     size_t loop=0;
1476     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1477     size_t outstep=getNoValues();
1478     size_t instep=m_left->getNoValues();
1479     switch (m_op)
1480     {
1481     case SWAP:
1482 caltinay 5972 for (loop=0;loop<numsteps;++loop)
1483     {
1484 jfenwick 6512 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset, m_axis_offset, m_transpose);
1485 caltinay 5972 subroffset+=instep;
1486     offset+=outstep;
1487     }
1488     break;
1489 jfenwick 2500 default:
1490 caltinay 5972 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1491 jfenwick 2500 }
1492 jfenwick 6512 return &m_samples_r;
1493 jfenwick 2500 }
1494    
1495 jfenwick 6517
1496     const DataTypes::CplxVectorType*
1497     DataLazy::resolveNodeNP1OUT_2PCplx(int tid, int sampleNo, size_t& roffset) const
1498     {
1499     if (m_readytype!='E')
1500     {
1501     throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1502     }
1503     if (m_op==IDENTITY)
1504     {
1505     throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1506     }
1507     size_t subroffset;
1508     size_t offset;
1509     const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1510     roffset=m_samplesize*tid;
1511     offset=roffset;
1512     size_t loop=0;
1513     size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1514     size_t outstep=getNoValues();
1515     size_t instep=m_left->getNoValues();
1516     switch (m_op)
1517     {
1518     case SWAP:
1519     for (loop=0;loop<numsteps;++loop)
1520     {
1521     escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(),offset, m_axis_offset, m_transpose);
1522     subroffset+=instep;
1523     offset+=outstep;
1524     }
1525     break;
1526     default:
1527     throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1528     }
1529     return &m_samples_c;
1530     }
1531    
1532 jfenwick 5938 const DataTypes::RealVectorType*
1533 jfenwick 4621 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1534 jfenwick 3035 {
1535     if (m_readytype!='E')
1536     {
1537     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1538     }
1539     if (m_op!=CONDEVAL)
1540     {
1541     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1542     }
1543     size_t subroffset;
1544 jfenwick 2500
1545 jfenwick 6042 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1546     const RealVectorType* srcres=0;
1547 jfenwick 3035 if ((*maskres)[subroffset]>0)
1548     {
1549 caltinay 5972 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1550 jfenwick 3035 }
1551     else
1552     {
1553 caltinay 5972 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1554 jfenwick 3035 }
1555    
1556     // Now we need to copy the result
1557    
1558     roffset=m_samplesize*tid;
1559     for (int i=0;i<m_samplesize;++i)
1560     {
1561 jfenwick 6512 m_samples_r[roffset+i]=(*srcres)[subroffset+i];
1562 jfenwick 3035 }
1563    
1564 jfenwick 6512 return &m_samples_r;
1565 jfenwick 3035 }
1566    
1567 jfenwick 6517 const DataTypes::CplxVectorType*
1568     DataLazy::resolveNodeCondEvalCplx(int tid, int sampleNo, size_t& roffset) const
1569     {
1570     if (m_readytype!='E')
1571     {
1572     throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1573     }
1574     if (m_op!=CONDEVAL)
1575     {
1576     throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1577     }
1578     size_t subroffset;
1579    
1580     const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1581     const CplxVectorType* srcres=0;
1582     if ((*maskres)[subroffset]>0)
1583     {
1584     srcres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1585     }
1586     else
1587     {
1588     srcres=m_right->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1589     }
1590    
1591     // Now we need to copy the result
1592    
1593     roffset=m_samplesize*tid;
1594     for (int i=0;i<m_samplesize;++i)
1595     {
1596     m_samples_c[roffset+i]=(*srcres)[subroffset+i];
1597     }
1598    
1599     return &m_samples_c;
1600     }
1601    
1602    
1603 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1604     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1605     // If both children are expanded, then we can process them in a single operation (we treat
1606     // the whole sample as one big datapoint.
1607     // If one of the children is not expanded, then we need to treat each point in the sample
1608     // individually.
1609     // There is an additional complication when scalar operations are considered.
1610     // For example, 2+Vector.
1611     // In this case each double within the point is treated individually
1612 jfenwick 5938 const DataTypes::RealVectorType*
1613 jfenwick 4621 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1614 jfenwick 2500 {
1615     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1616    
1617 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1618     // first work out which of the children are expanded
1619 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
1620     bool rightExp=(m_right->m_readytype=='E');
1621     if (!leftExp && !rightExp)
1622     {
1623 caltinay 5972 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1624 jfenwick 2500 }
1625     bool leftScalar=(m_left->getRank()==0);
1626     bool rightScalar=(m_right->getRank()==0);
1627     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1628     {
1629 caltinay 5972 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1630 jfenwick 2500 }
1631     size_t leftsize=m_left->getNoValues();
1632     size_t rightsize=m_right->getNoValues();
1633 caltinay 5972 size_t chunksize=1; // how many doubles will be processed in one go
1634     int leftstep=0; // how far should the left offset advance after each step
1635 jfenwick 2500 int rightstep=0;
1636 caltinay 5972 int numsteps=0; // total number of steps for the inner loop
1637     int oleftstep=0; // the o variables refer to the outer loop
1638     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1639 jfenwick 2500 int onumsteps=1;
1640    
1641 caltinay 5972 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1642 jfenwick 2500 bool RES=(rightExp && rightScalar);
1643 caltinay 5972 bool LS=(!leftExp && leftScalar); // left is a single scalar
1644 jfenwick 2500 bool RS=(!rightExp && rightScalar);
1645 caltinay 5972 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1646 jfenwick 2500 bool RN=(!rightExp && !rightScalar);
1647 caltinay 5972 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1648 jfenwick 2500 bool REN=(rightExp && !rightScalar);
1649    
1650 caltinay 5972 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1651 jfenwick 2500 {
1652 caltinay 5972 chunksize=m_left->getNumDPPSample()*leftsize;
1653     leftstep=0;
1654     rightstep=0;
1655     numsteps=1;
1656 jfenwick 2500 }
1657     else if (LES || RES)
1658     {
1659 caltinay 5972 chunksize=1;
1660     if (LES) // left is an expanded scalar
1661     {
1662     if (RS)
1663     {
1664     leftstep=1;
1665     rightstep=0;
1666     numsteps=m_left->getNumDPPSample();
1667     }
1668     else // RN or REN
1669     {
1670     leftstep=0;
1671     oleftstep=1;
1672     rightstep=1;
1673     orightstep=(RN ? -(int)rightsize : 0);
1674     numsteps=rightsize;
1675     onumsteps=m_left->getNumDPPSample();
1676     }
1677     }
1678     else // right is an expanded scalar
1679     {
1680     if (LS)
1681     {
1682     rightstep=1;
1683     leftstep=0;
1684     numsteps=m_right->getNumDPPSample();
1685     }
1686     else
1687     {
1688     rightstep=0;
1689     orightstep=1;
1690     leftstep=1;
1691     oleftstep=(LN ? -(int)leftsize : 0);
1692     numsteps=leftsize;
1693     onumsteps=m_right->getNumDPPSample();
1694     }
1695     }
1696 jfenwick 2500 }
1697 caltinay 5972 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1698 jfenwick 2500 {
1699 caltinay 5972 if (LEN) // and Right will be a single value
1700     {
1701     chunksize=rightsize;
1702     leftstep=rightsize;
1703     rightstep=0;
1704     numsteps=m_left->getNumDPPSample();
1705     if (RS)
1706     {
1707     numsteps*=leftsize;
1708     }
1709     }
1710     else // REN
1711     {
1712     chunksize=leftsize;
1713     rightstep=leftsize;
1714     leftstep=0;
1715     numsteps=m_right->getNumDPPSample();
1716     if (LS)
1717     {
1718     numsteps*=rightsize;
1719     }
1720     }
1721 jfenwick 2500 }
1722    
1723 caltinay 5972 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1724     // Get the values of sub-expressions
1725 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1726     const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1727 jfenwick 2500 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1728     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1729     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1730     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1731     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1732     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1733     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1734    
1735     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1736     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1737    
1738    
1739     roffset=m_samplesize*tid;
1740 jfenwick 6512 double* resultp=&(m_samples_r[roffset]); // results are stored at the vector offset we received
1741 jfenwick 2500 switch(m_op)
1742     {
1743     case ADD:
1744 jfenwick 6042 //PROC_OP(NO_ARG,plus<double>());
1745 jfenwick 6517 escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1746 jfenwick 6042 &(*left)[0],
1747     &(*right)[0],
1748     chunksize,
1749     onumsteps,
1750     numsteps,
1751     resultStep,
1752     leftstep,
1753     rightstep,
1754     oleftstep,
1755     orightstep,
1756 jfenwick 6056 lroffset,
1757     rroffset,
1758 jfenwick 6168 escript::ES_optype::ADD);
1759 caltinay 5972 break;
1760 jfenwick 2500 case SUB:
1761 jfenwick 6517 escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1762 jfenwick 6057 &(*left)[0],
1763     &(*right)[0],
1764     chunksize,
1765     onumsteps,
1766     numsteps,
1767     resultStep,
1768     leftstep,
1769     rightstep,
1770     oleftstep,
1771     orightstep,
1772     lroffset,
1773     rroffset,
1774 jfenwick 6168 escript::ES_optype::SUB);
1775 jfenwick 6057 //PROC_OP(NO_ARG,minus<double>());
1776 caltinay 5972 break;
1777 jfenwick 2500 case MUL:
1778 jfenwick 6057 //PROC_OP(NO_ARG,multiplies<double>());
1779 jfenwick 6517 escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1780 jfenwick 6057 &(*left)[0],
1781     &(*right)[0],
1782     chunksize,
1783     onumsteps,
1784     numsteps,
1785     resultStep,
1786     leftstep,
1787     rightstep,
1788     oleftstep,
1789     orightstep,
1790     lroffset,
1791     rroffset,
1792 jfenwick 6168 escript::ES_optype::MUL);
1793 caltinay 5972 break;
1794 jfenwick 2500 case DIV:
1795 jfenwick 6057 //PROC_OP(NO_ARG,divides<double>());
1796 jfenwick 6517 escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1797 jfenwick 6057 &(*left)[0],
1798     &(*right)[0],
1799     chunksize,
1800     onumsteps,
1801     numsteps,
1802     resultStep,
1803     leftstep,
1804     rightstep,
1805     oleftstep,
1806     orightstep,
1807     lroffset,
1808     rroffset,
1809 jfenwick 6168 escript::ES_optype::DIV);
1810 caltinay 5972 break;
1811 jfenwick 2500 case POW:
1812 jfenwick 6057 //PROC_OP(double (double,double),::pow);
1813 jfenwick 6517 escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1814 jfenwick 6057 &(*left)[0],
1815     &(*right)[0],
1816     chunksize,
1817     onumsteps,
1818     numsteps,
1819     resultStep,
1820     leftstep,
1821     rightstep,
1822     oleftstep,
1823     orightstep,
1824     lroffset,
1825     rroffset,
1826 jfenwick 6168 escript::ES_optype::POW);
1827 caltinay 5972 break;
1828 jfenwick 2500 default:
1829 caltinay 5972 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1830 jfenwick 2500 }
1831 jfenwick 6512 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_r[roffset] << endl;)
1832     return &m_samples_r;
1833 jfenwick 2500 }
1834    
1835 jfenwick 6517 const DataTypes::CplxVectorType*
1836     DataLazy::resolveNodeBinaryCplx(int tid, int sampleNo, size_t& roffset) const
1837     {
1838     LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1839 jfenwick 2500
1840 jfenwick 6517 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1841     // first work out which of the children are expanded
1842     bool leftExp=(m_left->m_readytype=='E');
1843     bool rightExp=(m_right->m_readytype=='E');
1844     if (!leftExp && !rightExp)
1845     {
1846     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1847     }
1848     bool leftScalar=(m_left->getRank()==0);
1849     bool rightScalar=(m_right->getRank()==0);
1850     if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1851     {
1852     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1853     }
1854     size_t leftsize=m_left->getNoValues();
1855     size_t rightsize=m_right->getNoValues();
1856     size_t chunksize=1; // how many doubles will be processed in one go
1857     int leftstep=0; // how far should the left offset advance after each step
1858     int rightstep=0;
1859     int numsteps=0; // total number of steps for the inner loop
1860     int oleftstep=0; // the o variables refer to the outer loop
1861     int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1862     int onumsteps=1;
1863    
1864     bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1865     bool RES=(rightExp && rightScalar);
1866     bool LS=(!leftExp && leftScalar); // left is a single scalar
1867     bool RS=(!rightExp && rightScalar);
1868     bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1869     bool RN=(!rightExp && !rightScalar);
1870     bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1871     bool REN=(rightExp && !rightScalar);
1872    
1873     if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1874     {
1875     chunksize=m_left->getNumDPPSample()*leftsize;
1876     leftstep=0;
1877     rightstep=0;
1878     numsteps=1;
1879     }
1880     else if (LES || RES)
1881     {
1882     chunksize=1;
1883     if (LES) // left is an expanded scalar
1884     {
1885     if (RS)
1886     {
1887     leftstep=1;
1888     rightstep=0;
1889     numsteps=m_left->getNumDPPSample();
1890     }
1891     else // RN or REN
1892     {
1893     leftstep=0;
1894     oleftstep=1;
1895     rightstep=1;
1896     orightstep=(RN ? -(int)rightsize : 0);
1897     numsteps=rightsize;
1898     onumsteps=m_left->getNumDPPSample();
1899     }
1900     }
1901     else // right is an expanded scalar
1902     {
1903     if (LS)
1904     {
1905     rightstep=1;
1906     leftstep=0;
1907     numsteps=m_right->getNumDPPSample();
1908     }
1909     else
1910     {
1911     rightstep=0;
1912     orightstep=1;
1913     leftstep=1;
1914     oleftstep=(LN ? -(int)leftsize : 0);
1915     numsteps=leftsize;
1916     onumsteps=m_right->getNumDPPSample();
1917     }
1918     }
1919     }
1920     else // this leaves (LEN, RS), (LEN, RN) and their transposes
1921     {
1922     if (LEN) // and Right will be a single value
1923     {
1924     chunksize=rightsize;
1925     leftstep=rightsize;
1926     rightstep=0;
1927     numsteps=m_left->getNumDPPSample();
1928     if (RS)
1929     {
1930     numsteps*=leftsize;
1931     }
1932     }
1933     else // REN
1934     {
1935     chunksize=leftsize;
1936     rightstep=leftsize;
1937     leftstep=0;
1938     numsteps=m_right->getNumDPPSample();
1939     if (LS)
1940     {
1941     numsteps*=rightsize;
1942     }
1943     }
1944     }
1945    
1946     int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1947     // Get the values of sub-expressions
1948     const CplxVectorType* left=m_left->resolveNodeSampleCplx(tid,sampleNo,lroffset);
1949     const CplxVectorType* right=m_right->resolveNodeSampleCplx(tid,sampleNo,rroffset);
1950     LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1951     LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1952     LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1953     LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1954     LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1955     LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1956     LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1957    
1958     LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1959     LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1960    
1961    
1962     roffset=m_samplesize*tid;
1963     cplx_t* resultp=&(m_samples_c[roffset]); // results are stored at the vector offset we received
1964     switch(m_op)
1965     {
1966     case ADD:
1967     //PROC_OP(NO_ARG,plus<double>());
1968     escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
1969     &(*left)[0],
1970     &(*right)[0],
1971     chunksize,
1972     onumsteps,
1973     numsteps,
1974     resultStep,
1975     leftstep,
1976     rightstep,
1977     oleftstep,
1978     orightstep,
1979     lroffset,
1980     rroffset,
1981     escript::ES_optype::ADD);
1982     break;
1983     case SUB:
1984     escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
1985     &(*left)[0],
1986     &(*right)[0],
1987     chunksize,
1988     onumsteps,
1989     numsteps,
1990     resultStep,
1991     leftstep,
1992     rightstep,
1993     oleftstep,
1994     orightstep,
1995     lroffset,
1996     rroffset,
1997     escript::ES_optype::SUB);
1998     //PROC_OP(NO_ARG,minus<double>());
1999     break;
2000     case MUL:
2001     //PROC_OP(NO_ARG,multiplies<double>());
2002     escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
2003     &(*left)[0],
2004     &(*right)[0],
2005     chunksize,
2006     onumsteps,
2007     numsteps,
2008     resultStep,
2009     leftstep,
2010     rightstep,
2011     oleftstep,
2012     orightstep,
2013     lroffset,
2014     rroffset,
2015     escript::ES_optype::MUL);
2016     break;
2017     case DIV:
2018     //PROC_OP(NO_ARG,divides<double>());
2019     escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
2020     &(*left)[0],
2021     &(*right)[0],
2022     chunksize,
2023     onumsteps,
2024     numsteps,
2025     resultStep,
2026     leftstep,
2027     rightstep,
2028     oleftstep,
2029     orightstep,
2030     lroffset,
2031     rroffset,
2032     escript::ES_optype::DIV);
2033     break;
2034     case POW:
2035     //PROC_OP(double (double,double),::pow);
2036     escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
2037     &(*left)[0],
2038     &(*right)[0],
2039     chunksize,
2040     onumsteps,
2041     numsteps,
2042     resultStep,
2043     leftstep,
2044     rightstep,
2045     oleftstep,
2046     orightstep,
2047     lroffset,
2048     rroffset,
2049     escript::ES_optype::POW);
2050     break;
2051     default:
2052     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2053     }
2054     LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_c[roffset] << endl;)
2055     return &m_samples_c;
2056     }
2057    
2058    
2059 jfenwick 2500 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2060     // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2061     // unlike the other resolve helpers, we must treat these datapoints separately.
2062 jfenwick 5938 const DataTypes::RealVectorType*
2063 jfenwick 4621 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
2064 jfenwick 2500 {
2065     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2066    
2067 caltinay 5972 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2068     // first work out which of the children are expanded
2069 jfenwick 2500 bool leftExp=(m_left->m_readytype=='E');
2070     bool rightExp=(m_right->m_readytype=='E');
2071     int steps=getNumDPPSample();
2072 caltinay 5972 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2073 jfenwick 2500 int rightStep=(rightExp?m_right->getNoValues() : 0);
2074    
2075     int resultStep=getNoValues();
2076     roffset=m_samplesize*tid;
2077     size_t offset=roffset;
2078    
2079 jfenwick 6042 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2080 jfenwick 2500
2081 jfenwick 6042 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2082 jfenwick 2500
2083     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2084     cout << getNoValues() << endl;)
2085    
2086    
2087     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2088     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2089     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2090     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2091     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2092     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2093     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2094    
2095 jfenwick 6512 double* resultp=&(m_samples_r[offset]); // results are stored at the vector offset we received
2096 jfenwick 2500 switch(m_op)
2097     {
2098     case PROD:
2099 caltinay 5972 for (int i=0;i<steps;++i,resultp+=resultStep)
2100     {
2101     const double *ptr_0 = &((*left)[lroffset]);
2102     const double *ptr_1 = &((*right)[rroffset]);
2103 jfenwick 2500
2104     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2105     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2106    
2107 caltinay 5972 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2108 jfenwick 2500
2109 caltinay 5972 lroffset+=leftStep;
2110     rroffset+=rightStep;
2111     }
2112     break;
2113 jfenwick 2500 default:
2114 caltinay 5972 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2115 jfenwick 2500 }
2116     roffset=offset;
2117 jfenwick 6512 return &m_samples_r;
2118 jfenwick 2500 }
2119    
2120 jfenwick 6517 const DataTypes::CplxVectorType*
2121     DataLazy::resolveNodeTProdCplx(int tid, int sampleNo, size_t& roffset) const
2122     {
2123     LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2124 jfenwick 1898
2125 jfenwick 6517 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
2126     // first work out which of the children are expanded
2127     bool leftExp=(m_left->m_readytype=='E');
2128     bool rightExp=(m_right->m_readytype=='E');
2129     int steps=getNumDPPSample();
2130     int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
2131     int rightStep=(rightExp?m_right->getNoValues() : 0);
2132    
2133     int resultStep=getNoValues();
2134     roffset=m_samplesize*tid;
2135     size_t offset=roffset;
2136    
2137     const CplxVectorType* left=m_left->resolveNodeSampleCplx(tid, sampleNo, lroffset);
2138    
2139     const CplxVectorType* right=m_right->resolveNodeSampleCplx(tid, sampleNo, rroffset);
2140    
2141     LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
2142     cout << getNoValues() << endl;)
2143    
2144    
2145     LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2146     LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2147     LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2148     LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2149     LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2150     LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2151     LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2152    
2153     cplx_t* resultp=&(m_samples_c[offset]); // results are stored at the vector offset we received
2154     switch(m_op)
2155     {
2156     case PROD:
2157     for (int i=0;i<steps;++i,resultp+=resultStep)
2158     {
2159     const cplx_t *ptr_0 = &((*left)[lroffset]);
2160     const cplx_t *ptr_1 = &((*right)[rroffset]);
2161    
2162     LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2163     LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2164    
2165     matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2166    
2167     lroffset+=leftStep;
2168     rroffset+=rightStep;
2169     }
2170     break;
2171     default:
2172     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2173     }
2174     roffset=offset;
2175     return &m_samples_c;
2176     }
2177    
2178 jfenwick 5938 const DataTypes::RealVectorType*
2179 jfenwick 4621 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
2180 jfenwick 1879 {
2181 jfenwick 2271 #ifdef _OPENMP
2182 caltinay 5972 int tid=omp_get_thread_num();
2183 jfenwick 2271 #else
2184 caltinay 5972 int tid=0;
2185 jfenwick 2271 #endif
2186 jfenwick 2777
2187     #ifdef LAZY_STACK_PROF
2188 caltinay 5972 stackstart[tid]=&tid;
2189     stackend[tid]=&tid;
2190     const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
2191     size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
2192     #pragma omp critical
2193     if (d>maxstackuse)
2194     {
2195 jfenwick 2777 cout << "Max resolve Stack use " << d << endl;
2196 caltinay 5972 maxstackuse=d;
2197     }
2198     return r;
2199 jfenwick 2777 #else
2200 caltinay 5972 return resolveNodeSample(tid, sampleNo, roffset);
2201 jfenwick 2777 #endif
2202 jfenwick 2271 }
2203    
2204    
2205 jfenwick 2497 // This needs to do the work of the identity constructor
2206 jfenwick 2177 void
2207     DataLazy::resolveToIdentity()
2208     {
2209     if (m_op==IDENTITY)
2210 caltinay 5972 return;
2211 jfenwick 6517 if (isComplex())
2212     {
2213     DataReady_ptr p=resolveNodeWorkerCplx();
2214     makeIdentity(p);
2215     }
2216     else
2217     {
2218     DataReady_ptr p=resolveNodeWorker();
2219     makeIdentity(p);
2220     }
2221 jfenwick 2177 }
2222 jfenwick 1889
2223 jfenwick 2177 void DataLazy::makeIdentity(const DataReady_ptr& p)
2224     {
2225     m_op=IDENTITY;
2226     m_axis_offset=0;
2227     m_transpose=0;
2228     m_SL=m_SM=m_SR=0;
2229     m_children=m_height=0;
2230     m_id=p;
2231     if(p->isConstant()) {m_readytype='C';}
2232     else if(p->isExpanded()) {m_readytype='E';}
2233     else if (p->isTagged()) {m_readytype='T';}
2234     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2235     m_samplesize=p->getNumDPPSample()*p->getNoValues();
2236     m_left.reset();
2237     m_right.reset();
2238 jfenwick 6512 m_iscompl=p->isComplex();
2239 jfenwick 2177 }
2240    
2241 jfenwick 2497
2242     DataReady_ptr
2243     DataLazy::resolve()
2244     {
2245     resolveToIdentity();
2246     return m_id;
2247     }
2248    
2249 jfenwick 2799
2250     /* This is really a static method but I think that caused problems in windows */
2251     void
2252     DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
2253     {
2254     if (dats.empty())
2255     {
2256 caltinay 5972 return;
2257 jfenwick 2799 }
2258     vector<DataLazy*> work;
2259     FunctionSpace fs=dats[0]->getFunctionSpace();
2260     bool match=true;
2261 jfenwick 2824 for (int i=dats.size()-1;i>=0;--i)
2262 jfenwick 2799 {
2263 caltinay 5972 if (dats[i]->m_readytype!='E')
2264     {
2265     dats[i]->collapse();
2266     }
2267     if (dats[i]->m_op!=IDENTITY)
2268     {
2269     work.push_back(dats[i]);
2270     if (fs!=dats[i]->getFunctionSpace())
2271     {
2272     match=false;
2273     }
2274     }
2275 jfenwick 2799 }
2276     if (work.empty())
2277     {
2278 caltinay 5972 return; // no work to do
2279 jfenwick 2799 }
2280 caltinay 5972 if (match) // all functionspaces match. Yes I realise this is overly strict
2281     { // it is possible that dats[0] is one of the objects which we discarded and
2282     // all the other functionspaces match.
2283     vector<DataExpanded*> dep;
2284 jfenwick 6042 vector<RealVectorType*> vecs;
2285 caltinay 5972 for (int i=0;i<work.size();++i)
2286     {
2287 jfenwick 6042 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
2288 caltinay 5972 vecs.push_back(&(dep[i]->getVectorRW()));
2289     }
2290     int totalsamples=work[0]->getNumSamples();
2291 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
2292 caltinay 5972 int sample;
2293     #pragma omp parallel private(sample, res)
2294     {
2295     size_t roffset=0;
2296     #pragma omp for schedule(static)
2297     for (sample=0;sample<totalsamples;++sample)
2298     {
2299     roffset=0;
2300     int j;
2301     for (j=work.size()-1;j>=0;--j)
2302     {
2303 jfenwick 2799 #ifdef _OPENMP
2304 caltinay 5972 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
2305 jfenwick 2799 #else
2306 caltinay 5972 res=work[j]->resolveNodeSample(0,sample,roffset);
2307 jfenwick 2799 #endif
2308 caltinay 5972 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
2309     memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
2310     }
2311     }
2312     }
2313     // Now we need to load the new results as identity ops into the lazy nodes
2314     for (int i=work.size()-1;i>=0;--i)
2315     {
2316 jfenwick 5985 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
2317 caltinay 5972 }
2318 jfenwick 2799 }
2319 caltinay 5972 else // functionspaces do not match
2320 jfenwick 2799 {
2321 caltinay 5972 for (int i=0;i<work.size();++i)
2322     {
2323     work[i]->resolveToIdentity();
2324     }
2325 jfenwick 2799 }
2326     }
2327    
2328    
2329    
2330 jfenwick 2500 // This version of resolve uses storage in each node to hold results
2331     DataReady_ptr
2332     DataLazy::resolveNodeWorker()
2333     {
2334 caltinay 5972 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2335 jfenwick 2500 {
2336     collapse();
2337     }
2338 caltinay 5972 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
2339 jfenwick 2500 {
2340     return m_id;
2341     }
2342 caltinay 5972 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2343 jfenwick 6042 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
2344     RealVectorType& resvec=result->getVectorRW();
2345 jfenwick 2500 DataReady_ptr resptr=DataReady_ptr(result);
2346    
2347     int sample;
2348     int totalsamples=getNumSamples();
2349 jfenwick 6042 const RealVectorType* res=0; // Storage for answer
2350 jfenwick 2500 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2351 jfenwick 2777 #pragma omp parallel private(sample,res)
2352 jfenwick 2500 {
2353 caltinay 5972 size_t roffset=0;
2354 jfenwick 2777 #ifdef LAZY_STACK_PROF
2355 caltinay 5972 stackstart[omp_get_thread_num()]=&roffset;
2356     stackend[omp_get_thread_num()]=&roffset;
2357 jfenwick 2777 #endif
2358 caltinay 5972 #pragma omp for schedule(static)
2359     for (sample=0;sample<totalsamples;++sample)
2360     {
2361     roffset=0;
2362 jfenwick 2500 #ifdef _OPENMP
2363 caltinay 5972 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2364 jfenwick 2500 #else
2365 caltinay 5972 res=resolveNodeSample(0,sample,roffset);
2366 jfenwick 2500 #endif
2367     LAZYDEBUG(cout << "Sample #" << sample << endl;)
2368     LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2369 caltinay 5972 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
2370     memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
2371     }
2372 jfenwick 2500 }
2373 jfenwick 2777 #ifdef LAZY_STACK_PROF
2374     for (int i=0;i<getNumberOfThreads();++i)
2375     {
2376 caltinay 5972 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
2377     // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
2378     if (r>maxstackuse)
2379     {
2380     maxstackuse=r;
2381     }
2382 jfenwick 2777 }
2383     cout << "Max resolve Stack use=" << maxstackus