/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2795 - (show annotations)
Wed Dec 2 02:39:23 2009 UTC (9 years, 3 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 55927 byte(s)
Removed node limit on lazy expressions.
The parameter LAZY_STR_FMT {0,1,2}  controls how lazy expressions are printed.
0 gives default output
1 gives a tree form.
2 just prints depth
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 #include "EscriptParams.h"
32
33 #include <iomanip> // for some fancy formatting in debug
34
35 // #define LAZYDEBUG(X) if (privdebug){X;}
36 #define LAZYDEBUG(X)
37 namespace
38 {
39 bool privdebug=false;
40
41 #define ENABLEDEBUG privdebug=true;
42 #define DISABLEDEBUG privdebug=false;
43 }
44
45 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46
47 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48
49
50 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51
52 /*
53 How does DataLazy work?
54 ~~~~~~~~~~~~~~~~~~~~~~~
55
56 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57 denoted left and right.
58
59 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60 This means that all "internal" nodes in the structure are instances of DataLazy.
61
62 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63 Note that IDENITY is not considered a unary operation.
64
65 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66 It must however form a DAG (directed acyclic graph).
67 I will refer to individual DataLazy objects with the structure as nodes.
68
69 Each node also stores:
70 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71 evaluated.
72 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
73 evaluate the expression.
74 - m_samplesize ~ the number of doubles stored in a sample.
75
76 When a new node is created, the above values are computed based on the values in the child nodes.
77 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78
79 The resolve method, which produces a DataReady from a DataLazy, does the following:
80 1) Create a DataReady to hold the new result.
81 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83
84 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85
86 resolveSample returns a Vector* and an offset within that vector where the result is stored.
87 Normally, this would be v, but for identity nodes their internal vector is returned instead.
88
89 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90
91 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93
94 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95 1) Add to the ES_optype.
96 2) determine what opgroup your operation belongs to (X)
97 3) add a string for the op to the end of ES_opstrings
98 4) increase ES_opcount
99 5) add an entry (X) to opgroups
100 6) add an entry to the switch in collapseToReady
101 7) add an entry to resolveX
102 */
103
104
105 using namespace std;
106 using namespace boost;
107
108 namespace escript
109 {
110
111 namespace
112 {
113
114
115 // enabling this will print out when ever the maximum stacksize used by resolve increases
116 // it assumes _OPENMP is also in use
117 //#define LAZY_STACK_PROF
118
119
120
121 #ifndef _OPENMP
122 #ifdef LAZY_STACK_PROF
123 #undef LAZY_STACK_PROF
124 #endif
125 #endif
126
127
128 #ifdef LAZY_STACK_PROF
129 std::vector<void*> stackstart(getNumberOfThreads());
130 std::vector<void*> stackend(getNumberOfThreads());
131 size_t maxstackuse=0;
132 #endif
133
134 enum ES_opgroup
135 {
136 G_UNKNOWN,
137 G_IDENTITY,
138 G_BINARY, // pointwise operations with two arguments
139 G_UNARY, // pointwise operations with one argument
140 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
141 G_NP1OUT, // non-pointwise op with one output
142 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
143 G_TENSORPROD, // general tensor product
144 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
145 G_REDUCTION // non-pointwise unary op with a scalar output
146 };
147
148
149
150
151 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
152 "sin","cos","tan",
153 "asin","acos","atan","sinh","cosh","tanh","erf",
154 "asinh","acosh","atanh",
155 "log10","log","sign","abs","neg","pos","exp","sqrt",
156 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
157 "symmetric","nonsymmetric",
158 "prod",
159 "transpose", "trace",
160 "swapaxes",
161 "minval", "maxval"};
162 int ES_opcount=43;
163 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
164 G_UNARY,G_UNARY,G_UNARY, //10
165 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
166 G_UNARY,G_UNARY,G_UNARY, // 20
167 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
168 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
169 G_NP1OUT,G_NP1OUT,
170 G_TENSORPROD,
171 G_NP1OUT_P, G_NP1OUT_P,
172 G_NP1OUT_2P,
173 G_REDUCTION, G_REDUCTION};
174 inline
175 ES_opgroup
176 getOpgroup(ES_optype op)
177 {
178 return opgroups[op];
179 }
180
181 // return the FunctionSpace of the result of "left op right"
182 FunctionSpace
183 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
184 {
185 // perhaps this should call interpolate and throw or something?
186 // maybe we need an interpolate node -
187 // that way, if interpolate is required in any other op we can just throw a
188 // programming error exception.
189
190 FunctionSpace l=left->getFunctionSpace();
191 FunctionSpace r=right->getFunctionSpace();
192 if (l!=r)
193 {
194 if (r.probeInterpolation(l))
195 {
196 return l;
197 }
198 if (l.probeInterpolation(r))
199 {
200 return r;
201 }
202 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
203 }
204 return l;
205 }
206
207 // return the shape of the result of "left op right"
208 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
209 DataTypes::ShapeType
210 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
211 {
212 if (left->getShape()!=right->getShape())
213 {
214 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
215 {
216 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
217 }
218
219 if (left->getRank()==0) // we need to allow scalar * anything
220 {
221 return right->getShape();
222 }
223 if (right->getRank()==0)
224 {
225 return left->getShape();
226 }
227 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
228 }
229 return left->getShape();
230 }
231
232 // return the shape for "op left"
233
234 DataTypes::ShapeType
235 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
236 {
237 switch(op)
238 {
239 case TRANS:
240 { // for the scoping of variables
241 const DataTypes::ShapeType& s=left->getShape();
242 DataTypes::ShapeType sh;
243 int rank=left->getRank();
244 if (axis_offset<0 || axis_offset>rank)
245 {
246 stringstream e;
247 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
248 throw DataException(e.str());
249 }
250 for (int i=0; i<rank; i++)
251 {
252 int index = (axis_offset+i)%rank;
253 sh.push_back(s[index]); // Append to new shape
254 }
255 return sh;
256 }
257 break;
258 case TRACE:
259 {
260 int rank=left->getRank();
261 if (rank<2)
262 {
263 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
264 }
265 if ((axis_offset>rank-2) || (axis_offset<0))
266 {
267 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
268 }
269 if (rank==2)
270 {
271 return DataTypes::scalarShape;
272 }
273 else if (rank==3)
274 {
275 DataTypes::ShapeType sh;
276 if (axis_offset==0)
277 {
278 sh.push_back(left->getShape()[2]);
279 }
280 else // offset==1
281 {
282 sh.push_back(left->getShape()[0]);
283 }
284 return sh;
285 }
286 else if (rank==4)
287 {
288 DataTypes::ShapeType sh;
289 const DataTypes::ShapeType& s=left->getShape();
290 if (axis_offset==0)
291 {
292 sh.push_back(s[2]);
293 sh.push_back(s[3]);
294 }
295 else if (axis_offset==1)
296 {
297 sh.push_back(s[0]);
298 sh.push_back(s[3]);
299 }
300 else // offset==2
301 {
302 sh.push_back(s[0]);
303 sh.push_back(s[1]);
304 }
305 return sh;
306 }
307 else // unknown rank
308 {
309 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
310 }
311 }
312 break;
313 default:
314 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
315 }
316 }
317
318 DataTypes::ShapeType
319 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
320 {
321 // This code taken from the Data.cpp swapaxes() method
322 // Some of the checks are probably redundant here
323 int axis0_tmp,axis1_tmp;
324 const DataTypes::ShapeType& s=left->getShape();
325 DataTypes::ShapeType out_shape;
326 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
327 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
328 int rank=left->getRank();
329 if (rank<2) {
330 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
331 }
332 if (axis0<0 || axis0>rank-1) {
333 stringstream e;
334 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
335 throw DataException(e.str());
336 }
337 if (axis1<0 || axis1>rank-1) {
338 stringstream e;
339 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
340 throw DataException(e.str());
341 }
342 if (axis0 == axis1) {
343 throw DataException("Error - Data::swapaxes: axis indices must be different.");
344 }
345 if (axis0 > axis1) {
346 axis0_tmp=axis1;
347 axis1_tmp=axis0;
348 } else {
349 axis0_tmp=axis0;
350 axis1_tmp=axis1;
351 }
352 for (int i=0; i<rank; i++) {
353 if (i == axis0_tmp) {
354 out_shape.push_back(s[axis1_tmp]);
355 } else if (i == axis1_tmp) {
356 out_shape.push_back(s[axis0_tmp]);
357 } else {
358 out_shape.push_back(s[i]);
359 }
360 }
361 return out_shape;
362 }
363
364
365 // determine the output shape for the general tensor product operation
366 // the additional parameters return information required later for the product
367 // the majority of this code is copy pasted from C_General_Tensor_Product
368 DataTypes::ShapeType
369 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
370 {
371
372 // Get rank and shape of inputs
373 int rank0 = left->getRank();
374 int rank1 = right->getRank();
375 const DataTypes::ShapeType& shape0 = left->getShape();
376 const DataTypes::ShapeType& shape1 = right->getShape();
377
378 // Prepare for the loops of the product and verify compatibility of shapes
379 int start0=0, start1=0;
380 if (transpose == 0) {}
381 else if (transpose == 1) { start0 = axis_offset; }
382 else if (transpose == 2) { start1 = rank1-axis_offset; }
383 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
384
385 if (rank0<axis_offset)
386 {
387 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
388 }
389
390 // Adjust the shapes for transpose
391 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
392 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
393 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
394 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
395
396 // Prepare for the loops of the product
397 SL=1, SM=1, SR=1;
398 for (int i=0; i<rank0-axis_offset; i++) {
399 SL *= tmpShape0[i];
400 }
401 for (int i=rank0-axis_offset; i<rank0; i++) {
402 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
403 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
404 }
405 SM *= tmpShape0[i];
406 }
407 for (int i=axis_offset; i<rank1; i++) {
408 SR *= tmpShape1[i];
409 }
410
411 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
412 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
413 { // block to limit the scope of out_index
414 int out_index=0;
415 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
416 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
417 }
418
419 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
420 {
421 ostringstream os;
422 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
423 throw DataException(os.str());
424 }
425
426 return shape2;
427 }
428
429 } // end anonymous namespace
430
431
432
433 // Return a string representing the operation
434 const std::string&
435 opToString(ES_optype op)
436 {
437 if (op<0 || op>=ES_opcount)
438 {
439 op=UNKNOWNOP;
440 }
441 return ES_opstrings[op];
442 }
443
444 void DataLazy::LazyNodeSetup()
445 {
446 #ifdef _OPENMP
447 int numthreads=omp_get_max_threads();
448 m_samples.resize(numthreads*m_samplesize);
449 m_sampleids=new int[numthreads];
450 for (int i=0;i<numthreads;++i)
451 {
452 m_sampleids[i]=-1;
453 }
454 #else
455 m_samples.resize(m_samplesize);
456 m_sampleids=new int[1];
457 m_sampleids[0]=-1;
458 #endif // _OPENMP
459 }
460
461
462 // Creates an identity node
463 DataLazy::DataLazy(DataAbstract_ptr p)
464 : parent(p->getFunctionSpace(),p->getShape())
465 ,m_sampleids(0),
466 m_samples(1)
467 {
468 if (p->isLazy())
469 {
470 // I don't want identity of Lazy.
471 // Question: Why would that be so bad?
472 // Answer: We assume that the child of ID is something we can call getVector on
473 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
474 }
475 else
476 {
477 p->makeLazyShared();
478 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
479 makeIdentity(dr);
480 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
481 }
482 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
483 }
484
485 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
486 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
487 m_op(op),
488 m_axis_offset(0),
489 m_transpose(0),
490 m_SL(0), m_SM(0), m_SR(0)
491 {
492 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
493 {
494 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
495 }
496
497 DataLazy_ptr lleft;
498 if (!left->isLazy())
499 {
500 lleft=DataLazy_ptr(new DataLazy(left));
501 }
502 else
503 {
504 lleft=dynamic_pointer_cast<DataLazy>(left);
505 }
506 m_readytype=lleft->m_readytype;
507 m_left=lleft;
508 m_samplesize=getNumDPPSample()*getNoValues();
509 m_children=m_left->m_children+1;
510 m_height=m_left->m_height+1;
511 LazyNodeSetup();
512 SIZELIMIT
513 }
514
515
516 // In this constructor we need to consider interpolation
517 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
518 : parent(resultFS(left,right,op), resultShape(left,right,op)),
519 m_op(op),
520 m_SL(0), m_SM(0), m_SR(0)
521 {
522 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
523 if ((getOpgroup(op)!=G_BINARY))
524 {
525 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
526 }
527
528 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
529 {
530 FunctionSpace fs=getFunctionSpace();
531 Data ltemp(left);
532 Data tmp(ltemp,fs);
533 left=tmp.borrowDataPtr();
534 }
535 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
536 {
537 Data tmp(Data(right),getFunctionSpace());
538 right=tmp.borrowDataPtr();
539 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
540 }
541 left->operandCheck(*right);
542
543 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
544 {
545 m_left=dynamic_pointer_cast<DataLazy>(left);
546 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
547 }
548 else
549 {
550 m_left=DataLazy_ptr(new DataLazy(left));
551 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
552 }
553 if (right->isLazy())
554 {
555 m_right=dynamic_pointer_cast<DataLazy>(right);
556 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
557 }
558 else
559 {
560 m_right=DataLazy_ptr(new DataLazy(right));
561 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
562 }
563 char lt=m_left->m_readytype;
564 char rt=m_right->m_readytype;
565 if (lt=='E' || rt=='E')
566 {
567 m_readytype='E';
568 }
569 else if (lt=='T' || rt=='T')
570 {
571 m_readytype='T';
572 }
573 else
574 {
575 m_readytype='C';
576 }
577 m_samplesize=getNumDPPSample()*getNoValues();
578 m_children=m_left->m_children+m_right->m_children+2;
579 m_height=max(m_left->m_height,m_right->m_height)+1;
580 LazyNodeSetup();
581 SIZELIMIT
582 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
583 }
584
585 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
586 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
587 m_op(op),
588 m_axis_offset(axis_offset),
589 m_transpose(transpose)
590 {
591 if ((getOpgroup(op)!=G_TENSORPROD))
592 {
593 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
594 }
595 if ((transpose>2) || (transpose<0))
596 {
597 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
598 }
599 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
600 {
601 FunctionSpace fs=getFunctionSpace();
602 Data ltemp(left);
603 Data tmp(ltemp,fs);
604 left=tmp.borrowDataPtr();
605 }
606 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
607 {
608 Data tmp(Data(right),getFunctionSpace());
609 right=tmp.borrowDataPtr();
610 }
611 // left->operandCheck(*right);
612
613 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
614 {
615 m_left=dynamic_pointer_cast<DataLazy>(left);
616 }
617 else
618 {
619 m_left=DataLazy_ptr(new DataLazy(left));
620 }
621 if (right->isLazy())
622 {
623 m_right=dynamic_pointer_cast<DataLazy>(right);
624 }
625 else
626 {
627 m_right=DataLazy_ptr(new DataLazy(right));
628 }
629 char lt=m_left->m_readytype;
630 char rt=m_right->m_readytype;
631 if (lt=='E' || rt=='E')
632 {
633 m_readytype='E';
634 }
635 else if (lt=='T' || rt=='T')
636 {
637 m_readytype='T';
638 }
639 else
640 {
641 m_readytype='C';
642 }
643 m_samplesize=getNumDPPSample()*getNoValues();
644 m_children=m_left->m_children+m_right->m_children+2;
645 m_height=max(m_left->m_height,m_right->m_height)+1;
646 LazyNodeSetup();
647 SIZELIMIT
648 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
649 }
650
651
652 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
653 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
654 m_op(op),
655 m_axis_offset(axis_offset),
656 m_transpose(0),
657 m_tol(0)
658 {
659 if ((getOpgroup(op)!=G_NP1OUT_P))
660 {
661 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
662 }
663 DataLazy_ptr lleft;
664 if (!left->isLazy())
665 {
666 lleft=DataLazy_ptr(new DataLazy(left));
667 }
668 else
669 {
670 lleft=dynamic_pointer_cast<DataLazy>(left);
671 }
672 m_readytype=lleft->m_readytype;
673 m_left=lleft;
674 m_samplesize=getNumDPPSample()*getNoValues();
675 m_children=m_left->m_children+1;
676 m_height=m_left->m_height+1;
677 LazyNodeSetup();
678 SIZELIMIT
679 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
680 }
681
682 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
683 : parent(left->getFunctionSpace(), left->getShape()),
684 m_op(op),
685 m_axis_offset(0),
686 m_transpose(0),
687 m_tol(tol)
688 {
689 if ((getOpgroup(op)!=G_UNARY_P))
690 {
691 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
692 }
693 DataLazy_ptr lleft;
694 if (!left->isLazy())
695 {
696 lleft=DataLazy_ptr(new DataLazy(left));
697 }
698 else
699 {
700 lleft=dynamic_pointer_cast<DataLazy>(left);
701 }
702 m_readytype=lleft->m_readytype;
703 m_left=lleft;
704 m_samplesize=getNumDPPSample()*getNoValues();
705 m_children=m_left->m_children+1;
706 m_height=m_left->m_height+1;
707 LazyNodeSetup();
708 SIZELIMIT
709 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
710 }
711
712
713 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
714 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
715 m_op(op),
716 m_axis_offset(axis0),
717 m_transpose(axis1),
718 m_tol(0)
719 {
720 if ((getOpgroup(op)!=G_NP1OUT_2P))
721 {
722 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
723 }
724 DataLazy_ptr lleft;
725 if (!left->isLazy())
726 {
727 lleft=DataLazy_ptr(new DataLazy(left));
728 }
729 else
730 {
731 lleft=dynamic_pointer_cast<DataLazy>(left);
732 }
733 m_readytype=lleft->m_readytype;
734 m_left=lleft;
735 m_samplesize=getNumDPPSample()*getNoValues();
736 m_children=m_left->m_children+1;
737 m_height=m_left->m_height+1;
738 LazyNodeSetup();
739 SIZELIMIT
740 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
741 }
742
743 DataLazy::~DataLazy()
744 {
745 delete[] m_sampleids;
746 }
747
748
749 /*
750 \brief Evaluates the expression using methods on Data.
751 This does the work for the collapse method.
752 For reasons of efficiency do not call this method on DataExpanded nodes.
753 */
754 DataReady_ptr
755 DataLazy::collapseToReady()
756 {
757 if (m_readytype=='E')
758 { // this is more an efficiency concern than anything else
759 throw DataException("Programmer Error - do not use collapse on Expanded data.");
760 }
761 if (m_op==IDENTITY)
762 {
763 return m_id;
764 }
765 DataReady_ptr pleft=m_left->collapseToReady();
766 Data left(pleft);
767 Data right;
768 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
769 {
770 right=Data(m_right->collapseToReady());
771 }
772 Data result;
773 switch(m_op)
774 {
775 case ADD:
776 result=left+right;
777 break;
778 case SUB:
779 result=left-right;
780 break;
781 case MUL:
782 result=left*right;
783 break;
784 case DIV:
785 result=left/right;
786 break;
787 case SIN:
788 result=left.sin();
789 break;
790 case COS:
791 result=left.cos();
792 break;
793 case TAN:
794 result=left.tan();
795 break;
796 case ASIN:
797 result=left.asin();
798 break;
799 case ACOS:
800 result=left.acos();
801 break;
802 case ATAN:
803 result=left.atan();
804 break;
805 case SINH:
806 result=left.sinh();
807 break;
808 case COSH:
809 result=left.cosh();
810 break;
811 case TANH:
812 result=left.tanh();
813 break;
814 case ERF:
815 result=left.erf();
816 break;
817 case ASINH:
818 result=left.asinh();
819 break;
820 case ACOSH:
821 result=left.acosh();
822 break;
823 case ATANH:
824 result=left.atanh();
825 break;
826 case LOG10:
827 result=left.log10();
828 break;
829 case LOG:
830 result=left.log();
831 break;
832 case SIGN:
833 result=left.sign();
834 break;
835 case ABS:
836 result=left.abs();
837 break;
838 case NEG:
839 result=left.neg();
840 break;
841 case POS:
842 // it doesn't mean anything for delayed.
843 // it will just trigger a deep copy of the lazy object
844 throw DataException("Programmer error - POS not supported for lazy data.");
845 break;
846 case EXP:
847 result=left.exp();
848 break;
849 case SQRT:
850 result=left.sqrt();
851 break;
852 case RECIP:
853 result=left.oneOver();
854 break;
855 case GZ:
856 result=left.wherePositive();
857 break;
858 case LZ:
859 result=left.whereNegative();
860 break;
861 case GEZ:
862 result=left.whereNonNegative();
863 break;
864 case LEZ:
865 result=left.whereNonPositive();
866 break;
867 case NEZ:
868 result=left.whereNonZero(m_tol);
869 break;
870 case EZ:
871 result=left.whereZero(m_tol);
872 break;
873 case SYM:
874 result=left.symmetric();
875 break;
876 case NSYM:
877 result=left.nonsymmetric();
878 break;
879 case PROD:
880 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
881 break;
882 case TRANS:
883 result=left.transpose(m_axis_offset);
884 break;
885 case TRACE:
886 result=left.trace(m_axis_offset);
887 break;
888 case SWAP:
889 result=left.swapaxes(m_axis_offset, m_transpose);
890 break;
891 case MINVAL:
892 result=left.minval();
893 break;
894 case MAXVAL:
895 result=left.minval();
896 break;
897 default:
898 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
899 }
900 return result.borrowReadyPtr();
901 }
902
903 /*
904 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
905 This method uses the original methods on the Data class to evaluate the expressions.
906 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
907 the purpose of using DataLazy in the first place).
908 */
909 void
910 DataLazy::collapse()
911 {
912 if (m_op==IDENTITY)
913 {
914 return;
915 }
916 if (m_readytype=='E')
917 { // this is more an efficiency concern than anything else
918 throw DataException("Programmer Error - do not use collapse on Expanded data.");
919 }
920 m_id=collapseToReady();
921 m_op=IDENTITY;
922 }
923
924
925
926
927
928
929 #define PROC_OP(TYPE,X) \
930 for (int j=0;j<onumsteps;++j)\
931 {\
932 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
933 { \
934 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
935 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
936 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
937 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
938 lroffset+=leftstep; \
939 rroffset+=rightstep; \
940 }\
941 lroffset+=oleftstep;\
942 rroffset+=orightstep;\
943 }
944
945
946 // The result will be stored in m_samples
947 // The return value is a pointer to the DataVector, offset is the offset within the return value
948 const DataTypes::ValueType*
949 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
950 {
951 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
952 // collapse so we have a 'E' node or an IDENTITY for some other type
953 if (m_readytype!='E' && m_op!=IDENTITY)
954 {
955 collapse();
956 }
957 if (m_op==IDENTITY)
958 {
959 const ValueType& vec=m_id->getVectorRO();
960 roffset=m_id->getPointOffset(sampleNo, 0);
961 #ifdef LAZY_STACK_PROF
962 int x;
963 if (&x<stackend[omp_get_thread_num()])
964 {
965 stackend[omp_get_thread_num()]=&x;
966 }
967 #endif
968 return &(vec);
969 }
970 if (m_readytype!='E')
971 {
972 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
973 }
974 if (m_sampleids[tid]==sampleNo)
975 {
976 roffset=tid*m_samplesize;
977 return &(m_samples); // sample is already resolved
978 }
979 m_sampleids[tid]=sampleNo;
980 switch (getOpgroup(m_op))
981 {
982 case G_UNARY:
983 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
984 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
985 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
986 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
987 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
988 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
989 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
990 default:
991 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
992 }
993 }
994
995 const DataTypes::ValueType*
996 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
997 {
998 // we assume that any collapsing has been done before we get here
999 // since we only have one argument we don't need to think about only
1000 // processing single points.
1001 // we will also know we won't get identity nodes
1002 if (m_readytype!='E')
1003 {
1004 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1005 }
1006 if (m_op==IDENTITY)
1007 {
1008 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1009 }
1010 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1011 const double* left=&((*leftres)[roffset]);
1012 roffset=m_samplesize*tid;
1013 double* result=&(m_samples[roffset]);
1014 switch (m_op)
1015 {
1016 case SIN:
1017 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1018 break;
1019 case COS:
1020 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1021 break;
1022 case TAN:
1023 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1024 break;
1025 case ASIN:
1026 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1027 break;
1028 case ACOS:
1029 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1030 break;
1031 case ATAN:
1032 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1033 break;
1034 case SINH:
1035 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1036 break;
1037 case COSH:
1038 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1039 break;
1040 case TANH:
1041 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1042 break;
1043 case ERF:
1044 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1046 #else
1047 tensor_unary_operation(m_samplesize, left, result, ::erf);
1048 break;
1049 #endif
1050 case ASINH:
1051 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1053 #else
1054 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1055 #endif
1056 break;
1057 case ACOSH:
1058 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1059 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1060 #else
1061 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1062 #endif
1063 break;
1064 case ATANH:
1065 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1066 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1067 #else
1068 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1069 #endif
1070 break;
1071 case LOG10:
1072 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1073 break;
1074 case LOG:
1075 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1076 break;
1077 case SIGN:
1078 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1079 break;
1080 case ABS:
1081 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1082 break;
1083 case NEG:
1084 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1085 break;
1086 case POS:
1087 // it doesn't mean anything for delayed.
1088 // it will just trigger a deep copy of the lazy object
1089 throw DataException("Programmer error - POS not supported for lazy data.");
1090 break;
1091 case EXP:
1092 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1093 break;
1094 case SQRT:
1095 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1096 break;
1097 case RECIP:
1098 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1099 break;
1100 case GZ:
1101 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1102 break;
1103 case LZ:
1104 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1105 break;
1106 case GEZ:
1107 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1108 break;
1109 case LEZ:
1110 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1111 break;
1112 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1113 case NEZ:
1114 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1115 break;
1116 case EZ:
1117 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1118 break;
1119
1120 default:
1121 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1122 }
1123 return &(m_samples);
1124 }
1125
1126
1127 const DataTypes::ValueType*
1128 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1129 {
1130 // we assume that any collapsing has been done before we get here
1131 // since we only have one argument we don't need to think about only
1132 // processing single points.
1133 // we will also know we won't get identity nodes
1134 if (m_readytype!='E')
1135 {
1136 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1137 }
1138 if (m_op==IDENTITY)
1139 {
1140 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1141 }
1142 size_t loffset=0;
1143 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1144
1145 roffset=m_samplesize*tid;
1146 unsigned int ndpps=getNumDPPSample();
1147 unsigned int psize=DataTypes::noValues(getShape());
1148 double* result=&(m_samples[roffset]);
1149 switch (m_op)
1150 {
1151 case MINVAL:
1152 {
1153 for (unsigned int z=0;z<ndpps;++z)
1154 {
1155 FMin op;
1156 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1157 loffset+=psize;
1158 result++;
1159 }
1160 }
1161 break;
1162 case MAXVAL:
1163 {
1164 for (unsigned int z=0;z<ndpps;++z)
1165 {
1166 FMax op;
1167 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1168 loffset+=psize;
1169 result++;
1170 }
1171 }
1172 break;
1173 default:
1174 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1175 }
1176 return &(m_samples);
1177 }
1178
1179 const DataTypes::ValueType*
1180 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1181 {
1182 // we assume that any collapsing has been done before we get here
1183 // since we only have one argument we don't need to think about only
1184 // processing single points.
1185 if (m_readytype!='E')
1186 {
1187 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1188 }
1189 if (m_op==IDENTITY)
1190 {
1191 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1192 }
1193 size_t subroffset;
1194 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1195 roffset=m_samplesize*tid;
1196 size_t loop=0;
1197 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1198 size_t step=getNoValues();
1199 size_t offset=roffset;
1200 switch (m_op)
1201 {
1202 case SYM:
1203 for (loop=0;loop<numsteps;++loop)
1204 {
1205 DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1206 subroffset+=step;
1207 offset+=step;
1208 }
1209 break;
1210 case NSYM:
1211 for (loop=0;loop<numsteps;++loop)
1212 {
1213 DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1214 subroffset+=step;
1215 offset+=step;
1216 }
1217 break;
1218 default:
1219 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1220 }
1221 return &m_samples;
1222 }
1223
1224 const DataTypes::ValueType*
1225 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1226 {
1227 // we assume that any collapsing has been done before we get here
1228 // since we only have one argument we don't need to think about only
1229 // processing single points.
1230 if (m_readytype!='E')
1231 {
1232 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1233 }
1234 if (m_op==IDENTITY)
1235 {
1236 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1237 }
1238 size_t subroffset;
1239 size_t offset;
1240 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1241 roffset=m_samplesize*tid;
1242 offset=roffset;
1243 size_t loop=0;
1244 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1245 size_t outstep=getNoValues();
1246 size_t instep=m_left->getNoValues();
1247 switch (m_op)
1248 {
1249 case TRACE:
1250 for (loop=0;loop<numsteps;++loop)
1251 {
1252 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1253 subroffset+=instep;
1254 offset+=outstep;
1255 }
1256 break;
1257 case TRANS:
1258 for (loop=0;loop<numsteps;++loop)
1259 {
1260 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1261 subroffset+=instep;
1262 offset+=outstep;
1263 }
1264 break;
1265 default:
1266 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1267 }
1268 return &m_samples;
1269 }
1270
1271
1272 const DataTypes::ValueType*
1273 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1274 {
1275 if (m_readytype!='E')
1276 {
1277 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1278 }
1279 if (m_op==IDENTITY)
1280 {
1281 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1282 }
1283 size_t subroffset;
1284 size_t offset;
1285 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1286 roffset=m_samplesize*tid;
1287 offset=roffset;
1288 size_t loop=0;
1289 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1290 size_t outstep=getNoValues();
1291 size_t instep=m_left->getNoValues();
1292 switch (m_op)
1293 {
1294 case SWAP:
1295 for (loop=0;loop<numsteps;++loop)
1296 {
1297 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1298 subroffset+=instep;
1299 offset+=outstep;
1300 }
1301 break;
1302 default:
1303 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1304 }
1305 return &m_samples;
1306 }
1307
1308
1309
1310 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1311 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1312 // If both children are expanded, then we can process them in a single operation (we treat
1313 // the whole sample as one big datapoint.
1314 // If one of the children is not expanded, then we need to treat each point in the sample
1315 // individually.
1316 // There is an additional complication when scalar operations are considered.
1317 // For example, 2+Vector.
1318 // In this case each double within the point is treated individually
1319 const DataTypes::ValueType*
1320 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1321 {
1322 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1323
1324 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1325 // first work out which of the children are expanded
1326 bool leftExp=(m_left->m_readytype=='E');
1327 bool rightExp=(m_right->m_readytype=='E');
1328 if (!leftExp && !rightExp)
1329 {
1330 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1331 }
1332 bool leftScalar=(m_left->getRank()==0);
1333 bool rightScalar=(m_right->getRank()==0);
1334 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1335 {
1336 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1337 }
1338 size_t leftsize=m_left->getNoValues();
1339 size_t rightsize=m_right->getNoValues();
1340 size_t chunksize=1; // how many doubles will be processed in one go
1341 int leftstep=0; // how far should the left offset advance after each step
1342 int rightstep=0;
1343 int numsteps=0; // total number of steps for the inner loop
1344 int oleftstep=0; // the o variables refer to the outer loop
1345 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1346 int onumsteps=1;
1347
1348 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1349 bool RES=(rightExp && rightScalar);
1350 bool LS=(!leftExp && leftScalar); // left is a single scalar
1351 bool RS=(!rightExp && rightScalar);
1352 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1353 bool RN=(!rightExp && !rightScalar);
1354 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1355 bool REN=(rightExp && !rightScalar);
1356
1357 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1358 {
1359 chunksize=m_left->getNumDPPSample()*leftsize;
1360 leftstep=0;
1361 rightstep=0;
1362 numsteps=1;
1363 }
1364 else if (LES || RES)
1365 {
1366 chunksize=1;
1367 if (LES) // left is an expanded scalar
1368 {
1369 if (RS)
1370 {
1371 leftstep=1;
1372 rightstep=0;
1373 numsteps=m_left->getNumDPPSample();
1374 }
1375 else // RN or REN
1376 {
1377 leftstep=0;
1378 oleftstep=1;
1379 rightstep=1;
1380 orightstep=(RN ? -(int)rightsize : 0);
1381 numsteps=rightsize;
1382 onumsteps=m_left->getNumDPPSample();
1383 }
1384 }
1385 else // right is an expanded scalar
1386 {
1387 if (LS)
1388 {
1389 rightstep=1;
1390 leftstep=0;
1391 numsteps=m_right->getNumDPPSample();
1392 }
1393 else
1394 {
1395 rightstep=0;
1396 orightstep=1;
1397 leftstep=1;
1398 oleftstep=(LN ? -(int)leftsize : 0);
1399 numsteps=leftsize;
1400 onumsteps=m_right->getNumDPPSample();
1401 }
1402 }
1403 }
1404 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1405 {
1406 if (LEN) // and Right will be a single value
1407 {
1408 chunksize=rightsize;
1409 leftstep=rightsize;
1410 rightstep=0;
1411 numsteps=m_left->getNumDPPSample();
1412 if (RS)
1413 {
1414 numsteps*=leftsize;
1415 }
1416 }
1417 else // REN
1418 {
1419 chunksize=leftsize;
1420 rightstep=leftsize;
1421 leftstep=0;
1422 numsteps=m_right->getNumDPPSample();
1423 if (LS)
1424 {
1425 numsteps*=rightsize;
1426 }
1427 }
1428 }
1429
1430 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1431 // Get the values of sub-expressions
1432 const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1433 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1434 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1435 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1436 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1437 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1438 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1439 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1440 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1441
1442 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1443 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1444
1445
1446 roffset=m_samplesize*tid;
1447 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1448 switch(m_op)
1449 {
1450 case ADD:
1451 PROC_OP(NO_ARG,plus<double>());
1452 break;
1453 case SUB:
1454 PROC_OP(NO_ARG,minus<double>());
1455 break;
1456 case MUL:
1457 PROC_OP(NO_ARG,multiplies<double>());
1458 break;
1459 case DIV:
1460 PROC_OP(NO_ARG,divides<double>());
1461 break;
1462 case POW:
1463 PROC_OP(double (double,double),::pow);
1464 break;
1465 default:
1466 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1467 }
1468 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1469 return &m_samples;
1470 }
1471
1472
1473 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1474 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1475 // unlike the other resolve helpers, we must treat these datapoints separately.
1476 const DataTypes::ValueType*
1477 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1478 {
1479 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1480
1481 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1482 // first work out which of the children are expanded
1483 bool leftExp=(m_left->m_readytype=='E');
1484 bool rightExp=(m_right->m_readytype=='E');
1485 int steps=getNumDPPSample();
1486 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1487 int rightStep=(rightExp?m_right->getNoValues() : 0);
1488
1489 int resultStep=getNoValues();
1490 roffset=m_samplesize*tid;
1491 size_t offset=roffset;
1492
1493 const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1494
1495 const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1496
1497 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1498 cout << getNoValues() << endl;)
1499
1500
1501 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1502 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1503 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1504 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1505 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1506 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1507 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1508
1509 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1510 switch(m_op)
1511 {
1512 case PROD:
1513 for (int i=0;i<steps;++i,resultp+=resultStep)
1514 {
1515 const double *ptr_0 = &((*left)[lroffset]);
1516 const double *ptr_1 = &((*right)[rroffset]);
1517
1518 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1519 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1520
1521 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1522
1523 lroffset+=leftStep;
1524 rroffset+=rightStep;
1525 }
1526 break;
1527 default:
1528 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1529 }
1530 roffset=offset;
1531 return &m_samples;
1532 }
1533
1534
1535 const DataTypes::ValueType*
1536 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1537 {
1538 #ifdef _OPENMP
1539 int tid=omp_get_thread_num();
1540 #else
1541 int tid=0;
1542 #endif
1543
1544 #ifdef LAZY_STACK_PROF
1545 stackstart[tid]=&tid;
1546 stackend[tid]=&tid;
1547 const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1548 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1549 #pragma omp critical
1550 if (d>maxstackuse)
1551 {
1552 cout << "Max resolve Stack use " << d << endl;
1553 maxstackuse=d;
1554 }
1555 return r;
1556 #else
1557 return resolveNodeSample(tid, sampleNo, roffset);
1558 #endif
1559 }
1560
1561
1562 // This needs to do the work of the identity constructor
1563 void
1564 DataLazy::resolveToIdentity()
1565 {
1566 if (m_op==IDENTITY)
1567 return;
1568 DataReady_ptr p=resolveNodeWorker();
1569 makeIdentity(p);
1570 }
1571
1572 void DataLazy::makeIdentity(const DataReady_ptr& p)
1573 {
1574 m_op=IDENTITY;
1575 m_axis_offset=0;
1576 m_transpose=0;
1577 m_SL=m_SM=m_SR=0;
1578 m_children=m_height=0;
1579 m_id=p;
1580 if(p->isConstant()) {m_readytype='C';}
1581 else if(p->isExpanded()) {m_readytype='E';}
1582 else if (p->isTagged()) {m_readytype='T';}
1583 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1584 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1585 m_left.reset();
1586 m_right.reset();
1587 }
1588
1589
1590 DataReady_ptr
1591 DataLazy::resolve()
1592 {
1593 resolveToIdentity();
1594 return m_id;
1595 }
1596
1597 // This version of resolve uses storage in each node to hold results
1598 DataReady_ptr
1599 DataLazy::resolveNodeWorker()
1600 {
1601 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1602 {
1603 collapse();
1604 }
1605 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1606 {
1607 return m_id;
1608 }
1609 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1610 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1611 ValueType& resvec=result->getVectorRW();
1612 DataReady_ptr resptr=DataReady_ptr(result);
1613
1614 int sample;
1615 int totalsamples=getNumSamples();
1616 const ValueType* res=0; // Storage for answer
1617 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1618 #pragma omp parallel private(sample,res)
1619 {
1620 size_t roffset=0;
1621 #ifdef LAZY_STACK_PROF
1622 stackstart[omp_get_thread_num()]=&roffset;
1623 stackend[omp_get_thread_num()]=&roffset;
1624 #endif
1625 #pragma omp for schedule(static)
1626 for (sample=0;sample<totalsamples;++sample)
1627 {
1628 roffset=0;
1629 #ifdef _OPENMP
1630 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1631 #else
1632 res=resolveNodeSample(0,sample,roffset);
1633 #endif
1634 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1635 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1636 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1637 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1638 }
1639 }
1640 #ifdef LAZY_STACK_PROF
1641 for (int i=0;i<getNumberOfThreads();++i)
1642 {
1643 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1644 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1645 if (r>maxstackuse)
1646 {
1647 maxstackuse=r;
1648 }
1649 }
1650 cout << "Max resolve Stack use=" << maxstackuse << endl;
1651 #endif
1652 return resptr;
1653 }
1654
1655 std::string
1656 DataLazy::toString() const
1657 {
1658 ostringstream oss;
1659 oss << "Lazy Data: [depth=" << m_height<< "] ";
1660 switch (escriptParams.getLAZY_STR_FMT())
1661 {
1662 case 1: // tree format
1663 oss << endl;
1664 intoTreeString(oss,"");
1665 break;
1666 case 2: // just the depth
1667 break;
1668 default:
1669 intoString(oss);
1670 break;
1671 }
1672 return oss.str();
1673 }
1674
1675
1676 void
1677 DataLazy::intoString(ostringstream& oss) const
1678 {
1679 // oss << "[" << m_children <<";"<<m_height <<"]";
1680 switch (getOpgroup(m_op))
1681 {
1682 case G_IDENTITY:
1683 if (m_id->isExpanded())
1684 {
1685 oss << "E";
1686 }
1687 else if (m_id->isTagged())
1688 {
1689 oss << "T";
1690 }
1691 else if (m_id->isConstant())
1692 {
1693 oss << "C";
1694 }
1695 else
1696 {
1697 oss << "?";
1698 }
1699 oss << '@' << m_id.get();
1700 break;
1701 case G_BINARY:
1702 oss << '(';
1703 m_left->intoString(oss);
1704 oss << ' ' << opToString(m_op) << ' ';
1705 m_right->intoString(oss);
1706 oss << ')';
1707 break;
1708 case G_UNARY:
1709 case G_UNARY_P:
1710 case G_NP1OUT:
1711 case G_NP1OUT_P:
1712 case G_REDUCTION:
1713 oss << opToString(m_op) << '(';
1714 m_left->intoString(oss);
1715 oss << ')';
1716 break;
1717 case G_TENSORPROD:
1718 oss << opToString(m_op) << '(';
1719 m_left->intoString(oss);
1720 oss << ", ";
1721 m_right->intoString(oss);
1722 oss << ')';
1723 break;
1724 case G_NP1OUT_2P:
1725 oss << opToString(m_op) << '(';
1726 m_left->intoString(oss);
1727 oss << ", " << m_axis_offset << ", " << m_transpose;
1728 oss << ')';
1729 break;
1730 default:
1731 oss << "UNKNOWN";
1732 }
1733 }
1734
1735
1736 void
1737 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1738 {
1739 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1740 switch (getOpgroup(m_op))
1741 {
1742 case G_IDENTITY:
1743 if (m_id->isExpanded())
1744 {
1745 oss << "E";
1746 }
1747 else if (m_id->isTagged())
1748 {
1749 oss << "T";
1750 }
1751 else if (m_id->isConstant())
1752 {
1753 oss << "C";
1754 }
1755 else
1756 {
1757 oss << "?";
1758 }
1759 oss << '@' << m_id.get() << endl;
1760 break;
1761 case G_BINARY:
1762 oss << opToString(m_op) << endl;
1763 indent+='.';
1764 m_left->intoTreeString(oss, indent);
1765 m_right->intoTreeString(oss, indent);
1766 break;
1767 case G_UNARY:
1768 case G_UNARY_P:
1769 case G_NP1OUT:
1770 case G_NP1OUT_P:
1771 case G_REDUCTION:
1772 oss << opToString(m_op) << endl;
1773 indent+='.';
1774 m_left->intoTreeString(oss, indent);
1775 break;
1776 case G_TENSORPROD:
1777 oss << opToString(m_op) << endl;
1778 indent+='.';
1779 m_left->intoTreeString(oss, indent);
1780 m_right->intoTreeString(oss, indent);
1781 break;
1782 case G_NP1OUT_2P:
1783 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1784 indent+='.';
1785 m_left->intoTreeString(oss, indent);
1786 break;
1787 default:
1788 oss << "UNKNOWN";
1789 }
1790 }
1791
1792
1793 DataAbstract*
1794 DataLazy::deepCopy()
1795 {
1796 switch (getOpgroup(m_op))
1797 {
1798 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1799 case G_UNARY:
1800 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1801 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1802 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1803 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1804 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1805 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1806 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1807 default:
1808 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1809 }
1810 }
1811
1812
1813
1814 // There is no single, natural interpretation of getLength on DataLazy.
1815 // Instances of DataReady can look at the size of their vectors.
1816 // For lazy though, it could be the size the data would be if it were resolved;
1817 // or it could be some function of the lengths of the DataReady instances which
1818 // form part of the expression.
1819 // Rather than have people making assumptions, I have disabled the method.
1820 DataTypes::ValueType::size_type
1821 DataLazy::getLength() const
1822 {
1823 throw DataException("getLength() does not make sense for lazy data.");
1824 }
1825
1826
1827 DataAbstract*
1828 DataLazy::getSlice(const DataTypes::RegionType& region) const
1829 {
1830 throw DataException("getSlice - not implemented for Lazy objects.");
1831 }
1832
1833
1834 // To do this we need to rely on our child nodes
1835 DataTypes::ValueType::size_type
1836 DataLazy::getPointOffset(int sampleNo,
1837 int dataPointNo)
1838 {
1839 if (m_op==IDENTITY)
1840 {
1841 return m_id->getPointOffset(sampleNo,dataPointNo);
1842 }
1843 if (m_readytype!='E')
1844 {
1845 collapse();
1846 return m_id->getPointOffset(sampleNo,dataPointNo);
1847 }
1848 // at this point we do not have an identity node and the expression will be Expanded
1849 // so we only need to know which child to ask
1850 if (m_left->m_readytype=='E')
1851 {
1852 return m_left->getPointOffset(sampleNo,dataPointNo);
1853 }
1854 else
1855 {
1856 return m_right->getPointOffset(sampleNo,dataPointNo);
1857 }
1858 }
1859
1860 // To do this we need to rely on our child nodes
1861 DataTypes::ValueType::size_type
1862 DataLazy::getPointOffset(int sampleNo,
1863 int dataPointNo) const
1864 {
1865 if (m_op==IDENTITY)
1866 {
1867 return m_id->getPointOffset(sampleNo,dataPointNo);
1868 }
1869 if (m_readytype=='E')
1870 {
1871 // at this point we do not have an identity node and the expression will be Expanded
1872 // so we only need to know which child to ask
1873 if (m_left->m_readytype=='E')
1874 {
1875 return m_left->getPointOffset(sampleNo,dataPointNo);
1876 }
1877 else
1878 {
1879 return m_right->getPointOffset(sampleNo,dataPointNo);
1880 }
1881 }
1882 if (m_readytype=='C')
1883 {
1884 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1885 }
1886 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1887 }
1888
1889
1890 // I have decided to let Data:: handle this issue.
1891 void
1892 DataLazy::setToZero()
1893 {
1894 // DataTypes::ValueType v(getNoValues(),0);
1895 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1896 // m_op=IDENTITY;
1897 // m_right.reset();
1898 // m_left.reset();
1899 // m_readytype='C';
1900 // m_buffsRequired=1;
1901
1902 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
1903 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1904 }
1905
1906 bool
1907 DataLazy::actsExpanded() const
1908 {
1909 return (m_readytype=='E');
1910 }
1911
1912 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26