/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Contents of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2791 - (show annotations)
Tue Dec 1 03:36:36 2009 UTC (9 years, 7 months ago) by jfenwick
File size: 55600 byte(s)
Added expression depth to toString for LazyData.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 #include "EscriptParams.h"
32
33 #include <iomanip> // for some fancy formatting in debug
34
35 // #define LAZYDEBUG(X) if (privdebug){X;}
36 #define LAZYDEBUG(X)
37 namespace
38 {
39 bool privdebug=false;
40
41 #define ENABLEDEBUG privdebug=true;
42 #define DISABLEDEBUG privdebug=false;
43 }
44
45 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46
47 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48
49
50 /*
51 How does DataLazy work?
52 ~~~~~~~~~~~~~~~~~~~~~~~
53
54 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55 denoted left and right.
56
57 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58 This means that all "internal" nodes in the structure are instances of DataLazy.
59
60 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61 Note that IDENITY is not considered a unary operation.
62
63 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64 It must however form a DAG (directed acyclic graph).
65 I will refer to individual DataLazy objects with the structure as nodes.
66
67 Each node also stores:
68 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69 evaluated.
70 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71 evaluate the expression.
72 - m_samplesize ~ the number of doubles stored in a sample.
73
74 When a new node is created, the above values are computed based on the values in the child nodes.
75 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76
77 The resolve method, which produces a DataReady from a DataLazy, does the following:
78 1) Create a DataReady to hold the new result.
79 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81
82 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83
84 resolveSample returns a Vector* and an offset within that vector where the result is stored.
85 Normally, this would be v, but for identity nodes their internal vector is returned instead.
86
87 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88
89 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91
92 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 1) Add to the ES_optype.
94 2) determine what opgroup your operation belongs to (X)
95 3) add a string for the op to the end of ES_opstrings
96 4) increase ES_opcount
97 5) add an entry (X) to opgroups
98 6) add an entry to the switch in collapseToReady
99 7) add an entry to resolveX
100 */
101
102
103 using namespace std;
104 using namespace boost;
105
106 namespace escript
107 {
108
109 namespace
110 {
111
112
113 // enabling this will print out when ever the maximum stacksize used by resolve increases
114 // it assumes _OPENMP is also in use
115 //#define LAZY_STACK_PROF
116
117
118
119 #ifndef _OPENMP
120 #ifdef LAZY_STACK_PROF
121 #undef LAZY_STACK_PROF
122 #endif
123 #endif
124
125
126 #ifdef LAZY_STACK_PROF
127 std::vector<void*> stackstart(getNumberOfThreads());
128 std::vector<void*> stackend(getNumberOfThreads());
129 size_t maxstackuse=0;
130 #endif
131
132 enum ES_opgroup
133 {
134 G_UNKNOWN,
135 G_IDENTITY,
136 G_BINARY, // pointwise operations with two arguments
137 G_UNARY, // pointwise operations with one argument
138 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
139 G_NP1OUT, // non-pointwise op with one output
140 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
141 G_TENSORPROD, // general tensor product
142 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
143 G_REDUCTION // non-pointwise unary op with a scalar output
144 };
145
146
147
148
149 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
150 "sin","cos","tan",
151 "asin","acos","atan","sinh","cosh","tanh","erf",
152 "asinh","acosh","atanh",
153 "log10","log","sign","abs","neg","pos","exp","sqrt",
154 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155 "symmetric","nonsymmetric",
156 "prod",
157 "transpose", "trace",
158 "swapaxes",
159 "minval", "maxval"};
160 int ES_opcount=43;
161 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162 G_UNARY,G_UNARY,G_UNARY, //10
163 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
164 G_UNARY,G_UNARY,G_UNARY, // 20
165 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
166 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
167 G_NP1OUT,G_NP1OUT,
168 G_TENSORPROD,
169 G_NP1OUT_P, G_NP1OUT_P,
170 G_NP1OUT_2P,
171 G_REDUCTION, G_REDUCTION};
172 inline
173 ES_opgroup
174 getOpgroup(ES_optype op)
175 {
176 return opgroups[op];
177 }
178
179 // return the FunctionSpace of the result of "left op right"
180 FunctionSpace
181 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
182 {
183 // perhaps this should call interpolate and throw or something?
184 // maybe we need an interpolate node -
185 // that way, if interpolate is required in any other op we can just throw a
186 // programming error exception.
187
188 FunctionSpace l=left->getFunctionSpace();
189 FunctionSpace r=right->getFunctionSpace();
190 if (l!=r)
191 {
192 if (r.probeInterpolation(l))
193 {
194 return l;
195 }
196 if (l.probeInterpolation(r))
197 {
198 return r;
199 }
200 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201 }
202 return l;
203 }
204
205 // return the shape of the result of "left op right"
206 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207 DataTypes::ShapeType
208 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209 {
210 if (left->getShape()!=right->getShape())
211 {
212 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213 {
214 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215 }
216
217 if (left->getRank()==0) // we need to allow scalar * anything
218 {
219 return right->getShape();
220 }
221 if (right->getRank()==0)
222 {
223 return left->getShape();
224 }
225 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
226 }
227 return left->getShape();
228 }
229
230 // return the shape for "op left"
231
232 DataTypes::ShapeType
233 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234 {
235 switch(op)
236 {
237 case TRANS:
238 { // for the scoping of variables
239 const DataTypes::ShapeType& s=left->getShape();
240 DataTypes::ShapeType sh;
241 int rank=left->getRank();
242 if (axis_offset<0 || axis_offset>rank)
243 {
244 stringstream e;
245 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
246 throw DataException(e.str());
247 }
248 for (int i=0; i<rank; i++)
249 {
250 int index = (axis_offset+i)%rank;
251 sh.push_back(s[index]); // Append to new shape
252 }
253 return sh;
254 }
255 break;
256 case TRACE:
257 {
258 int rank=left->getRank();
259 if (rank<2)
260 {
261 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
262 }
263 if ((axis_offset>rank-2) || (axis_offset<0))
264 {
265 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
266 }
267 if (rank==2)
268 {
269 return DataTypes::scalarShape;
270 }
271 else if (rank==3)
272 {
273 DataTypes::ShapeType sh;
274 if (axis_offset==0)
275 {
276 sh.push_back(left->getShape()[2]);
277 }
278 else // offset==1
279 {
280 sh.push_back(left->getShape()[0]);
281 }
282 return sh;
283 }
284 else if (rank==4)
285 {
286 DataTypes::ShapeType sh;
287 const DataTypes::ShapeType& s=left->getShape();
288 if (axis_offset==0)
289 {
290 sh.push_back(s[2]);
291 sh.push_back(s[3]);
292 }
293 else if (axis_offset==1)
294 {
295 sh.push_back(s[0]);
296 sh.push_back(s[3]);
297 }
298 else // offset==2
299 {
300 sh.push_back(s[0]);
301 sh.push_back(s[1]);
302 }
303 return sh;
304 }
305 else // unknown rank
306 {
307 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
308 }
309 }
310 break;
311 default:
312 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
313 }
314 }
315
316 DataTypes::ShapeType
317 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
318 {
319 // This code taken from the Data.cpp swapaxes() method
320 // Some of the checks are probably redundant here
321 int axis0_tmp,axis1_tmp;
322 const DataTypes::ShapeType& s=left->getShape();
323 DataTypes::ShapeType out_shape;
324 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
325 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
326 int rank=left->getRank();
327 if (rank<2) {
328 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
329 }
330 if (axis0<0 || axis0>rank-1) {
331 stringstream e;
332 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
333 throw DataException(e.str());
334 }
335 if (axis1<0 || axis1>rank-1) {
336 stringstream e;
337 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
338 throw DataException(e.str());
339 }
340 if (axis0 == axis1) {
341 throw DataException("Error - Data::swapaxes: axis indices must be different.");
342 }
343 if (axis0 > axis1) {
344 axis0_tmp=axis1;
345 axis1_tmp=axis0;
346 } else {
347 axis0_tmp=axis0;
348 axis1_tmp=axis1;
349 }
350 for (int i=0; i<rank; i++) {
351 if (i == axis0_tmp) {
352 out_shape.push_back(s[axis1_tmp]);
353 } else if (i == axis1_tmp) {
354 out_shape.push_back(s[axis0_tmp]);
355 } else {
356 out_shape.push_back(s[i]);
357 }
358 }
359 return out_shape;
360 }
361
362
363 // determine the output shape for the general tensor product operation
364 // the additional parameters return information required later for the product
365 // the majority of this code is copy pasted from C_General_Tensor_Product
366 DataTypes::ShapeType
367 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
368 {
369
370 // Get rank and shape of inputs
371 int rank0 = left->getRank();
372 int rank1 = right->getRank();
373 const DataTypes::ShapeType& shape0 = left->getShape();
374 const DataTypes::ShapeType& shape1 = right->getShape();
375
376 // Prepare for the loops of the product and verify compatibility of shapes
377 int start0=0, start1=0;
378 if (transpose == 0) {}
379 else if (transpose == 1) { start0 = axis_offset; }
380 else if (transpose == 2) { start1 = rank1-axis_offset; }
381 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
382
383 if (rank0<axis_offset)
384 {
385 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
386 }
387
388 // Adjust the shapes for transpose
389 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
390 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
391 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
392 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
393
394 // Prepare for the loops of the product
395 SL=1, SM=1, SR=1;
396 for (int i=0; i<rank0-axis_offset; i++) {
397 SL *= tmpShape0[i];
398 }
399 for (int i=rank0-axis_offset; i<rank0; i++) {
400 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
401 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
402 }
403 SM *= tmpShape0[i];
404 }
405 for (int i=axis_offset; i<rank1; i++) {
406 SR *= tmpShape1[i];
407 }
408
409 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
410 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
411 { // block to limit the scope of out_index
412 int out_index=0;
413 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
414 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
415 }
416
417 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
418 {
419 ostringstream os;
420 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
421 throw DataException(os.str());
422 }
423
424 return shape2;
425 }
426
427 } // end anonymous namespace
428
429
430
431 // Return a string representing the operation
432 const std::string&
433 opToString(ES_optype op)
434 {
435 if (op<0 || op>=ES_opcount)
436 {
437 op=UNKNOWNOP;
438 }
439 return ES_opstrings[op];
440 }
441
442 void DataLazy::LazyNodeSetup()
443 {
444 #ifdef _OPENMP
445 int numthreads=omp_get_max_threads();
446 m_samples.resize(numthreads*m_samplesize);
447 m_sampleids=new int[numthreads];
448 for (int i=0;i<numthreads;++i)
449 {
450 m_sampleids[i]=-1;
451 }
452 #else
453 m_samples.resize(m_samplesize);
454 m_sampleids=new int[1];
455 m_sampleids[0]=-1;
456 #endif // _OPENMP
457 }
458
459
460 // Creates an identity node
461 DataLazy::DataLazy(DataAbstract_ptr p)
462 : parent(p->getFunctionSpace(),p->getShape())
463 ,m_sampleids(0),
464 m_samples(1)
465 {
466 if (p->isLazy())
467 {
468 // I don't want identity of Lazy.
469 // Question: Why would that be so bad?
470 // Answer: We assume that the child of ID is something we can call getVector on
471 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
472 }
473 else
474 {
475 p->makeLazyShared();
476 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
477 makeIdentity(dr);
478 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
479 }
480 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
481 }
482
483 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
484 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
485 m_op(op),
486 m_axis_offset(0),
487 m_transpose(0),
488 m_SL(0), m_SM(0), m_SR(0)
489 {
490 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
491 {
492 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
493 }
494
495 DataLazy_ptr lleft;
496 if (!left->isLazy())
497 {
498 lleft=DataLazy_ptr(new DataLazy(left));
499 }
500 else
501 {
502 lleft=dynamic_pointer_cast<DataLazy>(left);
503 }
504 m_readytype=lleft->m_readytype;
505 m_left=lleft;
506 m_samplesize=getNumDPPSample()*getNoValues();
507 m_children=m_left->m_children+1;
508 m_height=m_left->m_height+1;
509 LazyNodeSetup();
510 SIZELIMIT
511 }
512
513
514 // In this constructor we need to consider interpolation
515 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
516 : parent(resultFS(left,right,op), resultShape(left,right,op)),
517 m_op(op),
518 m_SL(0), m_SM(0), m_SR(0)
519 {
520 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
521 if ((getOpgroup(op)!=G_BINARY))
522 {
523 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
524 }
525
526 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
527 {
528 FunctionSpace fs=getFunctionSpace();
529 Data ltemp(left);
530 Data tmp(ltemp,fs);
531 left=tmp.borrowDataPtr();
532 }
533 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
534 {
535 Data tmp(Data(right),getFunctionSpace());
536 right=tmp.borrowDataPtr();
537 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
538 }
539 left->operandCheck(*right);
540
541 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
542 {
543 m_left=dynamic_pointer_cast<DataLazy>(left);
544 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
545 }
546 else
547 {
548 m_left=DataLazy_ptr(new DataLazy(left));
549 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
550 }
551 if (right->isLazy())
552 {
553 m_right=dynamic_pointer_cast<DataLazy>(right);
554 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
555 }
556 else
557 {
558 m_right=DataLazy_ptr(new DataLazy(right));
559 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
560 }
561 char lt=m_left->m_readytype;
562 char rt=m_right->m_readytype;
563 if (lt=='E' || rt=='E')
564 {
565 m_readytype='E';
566 }
567 else if (lt=='T' || rt=='T')
568 {
569 m_readytype='T';
570 }
571 else
572 {
573 m_readytype='C';
574 }
575 m_samplesize=getNumDPPSample()*getNoValues();
576 m_children=m_left->m_children+m_right->m_children+2;
577 m_height=max(m_left->m_height,m_right->m_height)+1;
578 LazyNodeSetup();
579 SIZELIMIT
580 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
581 }
582
583 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
584 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
585 m_op(op),
586 m_axis_offset(axis_offset),
587 m_transpose(transpose)
588 {
589 if ((getOpgroup(op)!=G_TENSORPROD))
590 {
591 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
592 }
593 if ((transpose>2) || (transpose<0))
594 {
595 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
596 }
597 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
598 {
599 FunctionSpace fs=getFunctionSpace();
600 Data ltemp(left);
601 Data tmp(ltemp,fs);
602 left=tmp.borrowDataPtr();
603 }
604 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
605 {
606 Data tmp(Data(right),getFunctionSpace());
607 right=tmp.borrowDataPtr();
608 }
609 // left->operandCheck(*right);
610
611 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
612 {
613 m_left=dynamic_pointer_cast<DataLazy>(left);
614 }
615 else
616 {
617 m_left=DataLazy_ptr(new DataLazy(left));
618 }
619 if (right->isLazy())
620 {
621 m_right=dynamic_pointer_cast<DataLazy>(right);
622 }
623 else
624 {
625 m_right=DataLazy_ptr(new DataLazy(right));
626 }
627 char lt=m_left->m_readytype;
628 char rt=m_right->m_readytype;
629 if (lt=='E' || rt=='E')
630 {
631 m_readytype='E';
632 }
633 else if (lt=='T' || rt=='T')
634 {
635 m_readytype='T';
636 }
637 else
638 {
639 m_readytype='C';
640 }
641 m_samplesize=getNumDPPSample()*getNoValues();
642 m_children=m_left->m_children+m_right->m_children+2;
643 m_height=max(m_left->m_height,m_right->m_height)+1;
644 LazyNodeSetup();
645 SIZELIMIT
646 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
647 }
648
649
650 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
651 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
652 m_op(op),
653 m_axis_offset(axis_offset),
654 m_transpose(0),
655 m_tol(0)
656 {
657 if ((getOpgroup(op)!=G_NP1OUT_P))
658 {
659 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
660 }
661 DataLazy_ptr lleft;
662 if (!left->isLazy())
663 {
664 lleft=DataLazy_ptr(new DataLazy(left));
665 }
666 else
667 {
668 lleft=dynamic_pointer_cast<DataLazy>(left);
669 }
670 m_readytype=lleft->m_readytype;
671 m_left=lleft;
672 m_samplesize=getNumDPPSample()*getNoValues();
673 m_children=m_left->m_children+1;
674 m_height=m_left->m_height+1;
675 LazyNodeSetup();
676 SIZELIMIT
677 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
678 }
679
680 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
681 : parent(left->getFunctionSpace(), left->getShape()),
682 m_op(op),
683 m_axis_offset(0),
684 m_transpose(0),
685 m_tol(tol)
686 {
687 if ((getOpgroup(op)!=G_UNARY_P))
688 {
689 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
690 }
691 DataLazy_ptr lleft;
692 if (!left->isLazy())
693 {
694 lleft=DataLazy_ptr(new DataLazy(left));
695 }
696 else
697 {
698 lleft=dynamic_pointer_cast<DataLazy>(left);
699 }
700 m_readytype=lleft->m_readytype;
701 m_left=lleft;
702 m_samplesize=getNumDPPSample()*getNoValues();
703 m_children=m_left->m_children+1;
704 m_height=m_left->m_height+1;
705 LazyNodeSetup();
706 SIZELIMIT
707 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
708 }
709
710
711 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
712 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
713 m_op(op),
714 m_axis_offset(axis0),
715 m_transpose(axis1),
716 m_tol(0)
717 {
718 if ((getOpgroup(op)!=G_NP1OUT_2P))
719 {
720 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
721 }
722 DataLazy_ptr lleft;
723 if (!left->isLazy())
724 {
725 lleft=DataLazy_ptr(new DataLazy(left));
726 }
727 else
728 {
729 lleft=dynamic_pointer_cast<DataLazy>(left);
730 }
731 m_readytype=lleft->m_readytype;
732 m_left=lleft;
733 m_samplesize=getNumDPPSample()*getNoValues();
734 m_children=m_left->m_children+1;
735 m_height=m_left->m_height+1;
736 LazyNodeSetup();
737 SIZELIMIT
738 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
739 }
740
741 DataLazy::~DataLazy()
742 {
743 delete[] m_sampleids;
744 }
745
746
747 /*
748 \brief Evaluates the expression using methods on Data.
749 This does the work for the collapse method.
750 For reasons of efficiency do not call this method on DataExpanded nodes.
751 */
752 DataReady_ptr
753 DataLazy::collapseToReady()
754 {
755 if (m_readytype=='E')
756 { // this is more an efficiency concern than anything else
757 throw DataException("Programmer Error - do not use collapse on Expanded data.");
758 }
759 if (m_op==IDENTITY)
760 {
761 return m_id;
762 }
763 DataReady_ptr pleft=m_left->collapseToReady();
764 Data left(pleft);
765 Data right;
766 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
767 {
768 right=Data(m_right->collapseToReady());
769 }
770 Data result;
771 switch(m_op)
772 {
773 case ADD:
774 result=left+right;
775 break;
776 case SUB:
777 result=left-right;
778 break;
779 case MUL:
780 result=left*right;
781 break;
782 case DIV:
783 result=left/right;
784 break;
785 case SIN:
786 result=left.sin();
787 break;
788 case COS:
789 result=left.cos();
790 break;
791 case TAN:
792 result=left.tan();
793 break;
794 case ASIN:
795 result=left.asin();
796 break;
797 case ACOS:
798 result=left.acos();
799 break;
800 case ATAN:
801 result=left.atan();
802 break;
803 case SINH:
804 result=left.sinh();
805 break;
806 case COSH:
807 result=left.cosh();
808 break;
809 case TANH:
810 result=left.tanh();
811 break;
812 case ERF:
813 result=left.erf();
814 break;
815 case ASINH:
816 result=left.asinh();
817 break;
818 case ACOSH:
819 result=left.acosh();
820 break;
821 case ATANH:
822 result=left.atanh();
823 break;
824 case LOG10:
825 result=left.log10();
826 break;
827 case LOG:
828 result=left.log();
829 break;
830 case SIGN:
831 result=left.sign();
832 break;
833 case ABS:
834 result=left.abs();
835 break;
836 case NEG:
837 result=left.neg();
838 break;
839 case POS:
840 // it doesn't mean anything for delayed.
841 // it will just trigger a deep copy of the lazy object
842 throw DataException("Programmer error - POS not supported for lazy data.");
843 break;
844 case EXP:
845 result=left.exp();
846 break;
847 case SQRT:
848 result=left.sqrt();
849 break;
850 case RECIP:
851 result=left.oneOver();
852 break;
853 case GZ:
854 result=left.wherePositive();
855 break;
856 case LZ:
857 result=left.whereNegative();
858 break;
859 case GEZ:
860 result=left.whereNonNegative();
861 break;
862 case LEZ:
863 result=left.whereNonPositive();
864 break;
865 case NEZ:
866 result=left.whereNonZero(m_tol);
867 break;
868 case EZ:
869 result=left.whereZero(m_tol);
870 break;
871 case SYM:
872 result=left.symmetric();
873 break;
874 case NSYM:
875 result=left.nonsymmetric();
876 break;
877 case PROD:
878 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
879 break;
880 case TRANS:
881 result=left.transpose(m_axis_offset);
882 break;
883 case TRACE:
884 result=left.trace(m_axis_offset);
885 break;
886 case SWAP:
887 result=left.swapaxes(m_axis_offset, m_transpose);
888 break;
889 case MINVAL:
890 result=left.minval();
891 break;
892 case MAXVAL:
893 result=left.minval();
894 break;
895 default:
896 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
897 }
898 return result.borrowReadyPtr();
899 }
900
901 /*
902 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
903 This method uses the original methods on the Data class to evaluate the expressions.
904 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
905 the purpose of using DataLazy in the first place).
906 */
907 void
908 DataLazy::collapse()
909 {
910 if (m_op==IDENTITY)
911 {
912 return;
913 }
914 if (m_readytype=='E')
915 { // this is more an efficiency concern than anything else
916 throw DataException("Programmer Error - do not use collapse on Expanded data.");
917 }
918 m_id=collapseToReady();
919 m_op=IDENTITY;
920 }
921
922
923
924
925
926
927 #define PROC_OP(TYPE,X) \
928 for (int j=0;j<onumsteps;++j)\
929 {\
930 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
931 { \
932 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
933 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
934 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
935 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
936 lroffset+=leftstep; \
937 rroffset+=rightstep; \
938 }\
939 lroffset+=oleftstep;\
940 rroffset+=orightstep;\
941 }
942
943
944 // The result will be stored in m_samples
945 // The return value is a pointer to the DataVector, offset is the offset within the return value
946 const DataTypes::ValueType*
947 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
948 {
949 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
950 // collapse so we have a 'E' node or an IDENTITY for some other type
951 if (m_readytype!='E' && m_op!=IDENTITY)
952 {
953 collapse();
954 }
955 if (m_op==IDENTITY)
956 {
957 const ValueType& vec=m_id->getVectorRO();
958 roffset=m_id->getPointOffset(sampleNo, 0);
959 #ifdef LAZY_STACK_PROF
960 int x;
961 if (&x<stackend[omp_get_thread_num()])
962 {
963 stackend[omp_get_thread_num()]=&x;
964 }
965 #endif
966 return &(vec);
967 }
968 if (m_readytype!='E')
969 {
970 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
971 }
972 if (m_sampleids[tid]==sampleNo)
973 {
974 roffset=tid*m_samplesize;
975 return &(m_samples); // sample is already resolved
976 }
977 m_sampleids[tid]=sampleNo;
978 switch (getOpgroup(m_op))
979 {
980 case G_UNARY:
981 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
982 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
983 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
984 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
985 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
986 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
987 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
988 default:
989 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
990 }
991 }
992
993 const DataTypes::ValueType*
994 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
995 {
996 // we assume that any collapsing has been done before we get here
997 // since we only have one argument we don't need to think about only
998 // processing single points.
999 // we will also know we won't get identity nodes
1000 if (m_readytype!='E')
1001 {
1002 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1003 }
1004 if (m_op==IDENTITY)
1005 {
1006 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1007 }
1008 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1009 const double* left=&((*leftres)[roffset]);
1010 roffset=m_samplesize*tid;
1011 double* result=&(m_samples[roffset]);
1012 switch (m_op)
1013 {
1014 case SIN:
1015 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1016 break;
1017 case COS:
1018 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1019 break;
1020 case TAN:
1021 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1022 break;
1023 case ASIN:
1024 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1025 break;
1026 case ACOS:
1027 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1028 break;
1029 case ATAN:
1030 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1031 break;
1032 case SINH:
1033 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1034 break;
1035 case COSH:
1036 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1037 break;
1038 case TANH:
1039 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1040 break;
1041 case ERF:
1042 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1043 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1044 #else
1045 tensor_unary_operation(m_samplesize, left, result, ::erf);
1046 break;
1047 #endif
1048 case ASINH:
1049 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1050 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1051 #else
1052 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1053 #endif
1054 break;
1055 case ACOSH:
1056 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1057 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1058 #else
1059 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1060 #endif
1061 break;
1062 case ATANH:
1063 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1064 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1065 #else
1066 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1067 #endif
1068 break;
1069 case LOG10:
1070 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1071 break;
1072 case LOG:
1073 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1074 break;
1075 case SIGN:
1076 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1077 break;
1078 case ABS:
1079 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1080 break;
1081 case NEG:
1082 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1083 break;
1084 case POS:
1085 // it doesn't mean anything for delayed.
1086 // it will just trigger a deep copy of the lazy object
1087 throw DataException("Programmer error - POS not supported for lazy data.");
1088 break;
1089 case EXP:
1090 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1091 break;
1092 case SQRT:
1093 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1094 break;
1095 case RECIP:
1096 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1097 break;
1098 case GZ:
1099 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1100 break;
1101 case LZ:
1102 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1103 break;
1104 case GEZ:
1105 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1106 break;
1107 case LEZ:
1108 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1109 break;
1110 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1111 case NEZ:
1112 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1113 break;
1114 case EZ:
1115 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1116 break;
1117
1118 default:
1119 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1120 }
1121 return &(m_samples);
1122 }
1123
1124
1125 const DataTypes::ValueType*
1126 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1127 {
1128 // we assume that any collapsing has been done before we get here
1129 // since we only have one argument we don't need to think about only
1130 // processing single points.
1131 // we will also know we won't get identity nodes
1132 if (m_readytype!='E')
1133 {
1134 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1135 }
1136 if (m_op==IDENTITY)
1137 {
1138 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1139 }
1140 size_t loffset=0;
1141 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1142
1143 roffset=m_samplesize*tid;
1144 unsigned int ndpps=getNumDPPSample();
1145 unsigned int psize=DataTypes::noValues(getShape());
1146 double* result=&(m_samples[roffset]);
1147 switch (m_op)
1148 {
1149 case MINVAL:
1150 {
1151 for (unsigned int z=0;z<ndpps;++z)
1152 {
1153 FMin op;
1154 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1155 loffset+=psize;
1156 result++;
1157 }
1158 }
1159 break;
1160 case MAXVAL:
1161 {
1162 for (unsigned int z=0;z<ndpps;++z)
1163 {
1164 FMax op;
1165 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1166 loffset+=psize;
1167 result++;
1168 }
1169 }
1170 break;
1171 default:
1172 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1173 }
1174 return &(m_samples);
1175 }
1176
1177 const DataTypes::ValueType*
1178 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1179 {
1180 // we assume that any collapsing has been done before we get here
1181 // since we only have one argument we don't need to think about only
1182 // processing single points.
1183 if (m_readytype!='E')
1184 {
1185 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1186 }
1187 if (m_op==IDENTITY)
1188 {
1189 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1190 }
1191 size_t subroffset;
1192 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1193 roffset=m_samplesize*tid;
1194 size_t loop=0;
1195 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1196 size_t step=getNoValues();
1197 size_t offset=roffset;
1198 switch (m_op)
1199 {
1200 case SYM:
1201 for (loop=0;loop<numsteps;++loop)
1202 {
1203 DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1204 subroffset+=step;
1205 offset+=step;
1206 }
1207 break;
1208 case NSYM:
1209 for (loop=0;loop<numsteps;++loop)
1210 {
1211 DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1212 subroffset+=step;
1213 offset+=step;
1214 }
1215 break;
1216 default:
1217 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1218 }
1219 return &m_samples;
1220 }
1221
1222 const DataTypes::ValueType*
1223 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1224 {
1225 // we assume that any collapsing has been done before we get here
1226 // since we only have one argument we don't need to think about only
1227 // processing single points.
1228 if (m_readytype!='E')
1229 {
1230 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1231 }
1232 if (m_op==IDENTITY)
1233 {
1234 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1235 }
1236 size_t subroffset;
1237 size_t offset;
1238 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1239 roffset=m_samplesize*tid;
1240 offset=roffset;
1241 size_t loop=0;
1242 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1243 size_t outstep=getNoValues();
1244 size_t instep=m_left->getNoValues();
1245 switch (m_op)
1246 {
1247 case TRACE:
1248 for (loop=0;loop<numsteps;++loop)
1249 {
1250 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1251 subroffset+=instep;
1252 offset+=outstep;
1253 }
1254 break;
1255 case TRANS:
1256 for (loop=0;loop<numsteps;++loop)
1257 {
1258 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1259 subroffset+=instep;
1260 offset+=outstep;
1261 }
1262 break;
1263 default:
1264 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1265 }
1266 return &m_samples;
1267 }
1268
1269
1270 const DataTypes::ValueType*
1271 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1272 {
1273 if (m_readytype!='E')
1274 {
1275 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1276 }
1277 if (m_op==IDENTITY)
1278 {
1279 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1280 }
1281 size_t subroffset;
1282 size_t offset;
1283 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1284 roffset=m_samplesize*tid;
1285 offset=roffset;
1286 size_t loop=0;
1287 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1288 size_t outstep=getNoValues();
1289 size_t instep=m_left->getNoValues();
1290 switch (m_op)
1291 {
1292 case SWAP:
1293 for (loop=0;loop<numsteps;++loop)
1294 {
1295 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1296 subroffset+=instep;
1297 offset+=outstep;
1298 }
1299 break;
1300 default:
1301 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1302 }
1303 return &m_samples;
1304 }
1305
1306
1307
1308 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1309 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1310 // If both children are expanded, then we can process them in a single operation (we treat
1311 // the whole sample as one big datapoint.
1312 // If one of the children is not expanded, then we need to treat each point in the sample
1313 // individually.
1314 // There is an additional complication when scalar operations are considered.
1315 // For example, 2+Vector.
1316 // In this case each double within the point is treated individually
1317 const DataTypes::ValueType*
1318 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1319 {
1320 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1321
1322 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1323 // first work out which of the children are expanded
1324 bool leftExp=(m_left->m_readytype=='E');
1325 bool rightExp=(m_right->m_readytype=='E');
1326 if (!leftExp && !rightExp)
1327 {
1328 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1329 }
1330 bool leftScalar=(m_left->getRank()==0);
1331 bool rightScalar=(m_right->getRank()==0);
1332 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1333 {
1334 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1335 }
1336 size_t leftsize=m_left->getNoValues();
1337 size_t rightsize=m_right->getNoValues();
1338 size_t chunksize=1; // how many doubles will be processed in one go
1339 int leftstep=0; // how far should the left offset advance after each step
1340 int rightstep=0;
1341 int numsteps=0; // total number of steps for the inner loop
1342 int oleftstep=0; // the o variables refer to the outer loop
1343 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1344 int onumsteps=1;
1345
1346 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1347 bool RES=(rightExp && rightScalar);
1348 bool LS=(!leftExp && leftScalar); // left is a single scalar
1349 bool RS=(!rightExp && rightScalar);
1350 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1351 bool RN=(!rightExp && !rightScalar);
1352 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1353 bool REN=(rightExp && !rightScalar);
1354
1355 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1356 {
1357 chunksize=m_left->getNumDPPSample()*leftsize;
1358 leftstep=0;
1359 rightstep=0;
1360 numsteps=1;
1361 }
1362 else if (LES || RES)
1363 {
1364 chunksize=1;
1365 if (LES) // left is an expanded scalar
1366 {
1367 if (RS)
1368 {
1369 leftstep=1;
1370 rightstep=0;
1371 numsteps=m_left->getNumDPPSample();
1372 }
1373 else // RN or REN
1374 {
1375 leftstep=0;
1376 oleftstep=1;
1377 rightstep=1;
1378 orightstep=(RN ? -(int)rightsize : 0);
1379 numsteps=rightsize;
1380 onumsteps=m_left->getNumDPPSample();
1381 }
1382 }
1383 else // right is an expanded scalar
1384 {
1385 if (LS)
1386 {
1387 rightstep=1;
1388 leftstep=0;
1389 numsteps=m_right->getNumDPPSample();
1390 }
1391 else
1392 {
1393 rightstep=0;
1394 orightstep=1;
1395 leftstep=1;
1396 oleftstep=(LN ? -(int)leftsize : 0);
1397 numsteps=leftsize;
1398 onumsteps=m_right->getNumDPPSample();
1399 }
1400 }
1401 }
1402 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1403 {
1404 if (LEN) // and Right will be a single value
1405 {
1406 chunksize=rightsize;
1407 leftstep=rightsize;
1408 rightstep=0;
1409 numsteps=m_left->getNumDPPSample();
1410 if (RS)
1411 {
1412 numsteps*=leftsize;
1413 }
1414 }
1415 else // REN
1416 {
1417 chunksize=leftsize;
1418 rightstep=leftsize;
1419 leftstep=0;
1420 numsteps=m_right->getNumDPPSample();
1421 if (LS)
1422 {
1423 numsteps*=rightsize;
1424 }
1425 }
1426 }
1427
1428 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1429 // Get the values of sub-expressions
1430 const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1431 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1432 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1433 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1434 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1435 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1436 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1437 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1438 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1439
1440 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1441 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1442
1443
1444 roffset=m_samplesize*tid;
1445 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1446 switch(m_op)
1447 {
1448 case ADD:
1449 PROC_OP(NO_ARG,plus<double>());
1450 break;
1451 case SUB:
1452 PROC_OP(NO_ARG,minus<double>());
1453 break;
1454 case MUL:
1455 PROC_OP(NO_ARG,multiplies<double>());
1456 break;
1457 case DIV:
1458 PROC_OP(NO_ARG,divides<double>());
1459 break;
1460 case POW:
1461 PROC_OP(double (double,double),::pow);
1462 break;
1463 default:
1464 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1465 }
1466 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1467 return &m_samples;
1468 }
1469
1470
1471 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1472 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1473 // unlike the other resolve helpers, we must treat these datapoints separately.
1474 const DataTypes::ValueType*
1475 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1476 {
1477 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1478
1479 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1480 // first work out which of the children are expanded
1481 bool leftExp=(m_left->m_readytype=='E');
1482 bool rightExp=(m_right->m_readytype=='E');
1483 int steps=getNumDPPSample();
1484 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1485 int rightStep=(rightExp?m_right->getNoValues() : 0);
1486
1487 int resultStep=getNoValues();
1488 roffset=m_samplesize*tid;
1489 size_t offset=roffset;
1490
1491 const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1492
1493 const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1494
1495 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1496 cout << getNoValues() << endl;)
1497
1498
1499 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1500 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1501 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1502 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1503 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1504 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1505 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1506
1507 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1508 switch(m_op)
1509 {
1510 case PROD:
1511 for (int i=0;i<steps;++i,resultp+=resultStep)
1512 {
1513 const double *ptr_0 = &((*left)[lroffset]);
1514 const double *ptr_1 = &((*right)[rroffset]);
1515
1516 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1517 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1518
1519 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1520
1521 lroffset+=leftStep;
1522 rroffset+=rightStep;
1523 }
1524 break;
1525 default:
1526 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1527 }
1528 roffset=offset;
1529 return &m_samples;
1530 }
1531
1532
1533 const DataTypes::ValueType*
1534 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1535 {
1536 #ifdef _OPENMP
1537 int tid=omp_get_thread_num();
1538 #else
1539 int tid=0;
1540 #endif
1541
1542 #ifdef LAZY_STACK_PROF
1543 stackstart[tid]=&tid;
1544 stackend[tid]=&tid;
1545 const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1546 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1547 #pragma omp critical
1548 if (d>maxstackuse)
1549 {
1550 cout << "Max resolve Stack use " << d << endl;
1551 maxstackuse=d;
1552 }
1553 return r;
1554 #else
1555 return resolveNodeSample(tid, sampleNo, roffset);
1556 #endif
1557 }
1558
1559
1560 // This needs to do the work of the identity constructor
1561 void
1562 DataLazy::resolveToIdentity()
1563 {
1564 if (m_op==IDENTITY)
1565 return;
1566 DataReady_ptr p=resolveNodeWorker();
1567 makeIdentity(p);
1568 }
1569
1570 void DataLazy::makeIdentity(const DataReady_ptr& p)
1571 {
1572 m_op=IDENTITY;
1573 m_axis_offset=0;
1574 m_transpose=0;
1575 m_SL=m_SM=m_SR=0;
1576 m_children=m_height=0;
1577 m_id=p;
1578 if(p->isConstant()) {m_readytype='C';}
1579 else if(p->isExpanded()) {m_readytype='E';}
1580 else if (p->isTagged()) {m_readytype='T';}
1581 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1582 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1583 m_left.reset();
1584 m_right.reset();
1585 }
1586
1587
1588 DataReady_ptr
1589 DataLazy::resolve()
1590 {
1591 resolveToIdentity();
1592 return m_id;
1593 }
1594
1595 // This version of resolve uses storage in each node to hold results
1596 DataReady_ptr
1597 DataLazy::resolveNodeWorker()
1598 {
1599 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1600 {
1601 collapse();
1602 }
1603 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1604 {
1605 return m_id;
1606 }
1607 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1608 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1609 ValueType& resvec=result->getVectorRW();
1610 DataReady_ptr resptr=DataReady_ptr(result);
1611
1612 int sample;
1613 int totalsamples=getNumSamples();
1614 const ValueType* res=0; // Storage for answer
1615 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1616 #pragma omp parallel private(sample,res)
1617 {
1618 size_t roffset=0;
1619 #ifdef LAZY_STACK_PROF
1620 stackstart[omp_get_thread_num()]=&roffset;
1621 stackend[omp_get_thread_num()]=&roffset;
1622 #endif
1623 #pragma omp for schedule(static)
1624 for (sample=0;sample<totalsamples;++sample)
1625 {
1626 roffset=0;
1627 #ifdef _OPENMP
1628 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1629 #else
1630 res=resolveNodeSample(0,sample,roffset);
1631 #endif
1632 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1633 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1634 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1635 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1636 }
1637 }
1638 #ifdef LAZY_STACK_PROF
1639 for (int i=0;i<getNumberOfThreads();++i)
1640 {
1641 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1642 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1643 if (r>maxstackuse)
1644 {
1645 maxstackuse=r;
1646 }
1647 }
1648 cout << "Max resolve Stack use=" << maxstackuse << endl;
1649 #endif
1650 return resptr;
1651 }
1652
1653 std::string
1654 DataLazy::toString() const
1655 {
1656 ostringstream oss;
1657 oss << "Lazy Data: [depth=" << m_height<< "] ";
1658 if (escriptParams.getPRINT_LAZY_TREE()==0)
1659 {
1660 intoString(oss);
1661 }
1662 else
1663 {
1664 oss << endl;
1665 intoTreeString(oss,"");
1666 }
1667 return oss.str();
1668 }
1669
1670
1671 void
1672 DataLazy::intoString(ostringstream& oss) const
1673 {
1674 // oss << "[" << m_children <<";"<<m_height <<"]";
1675 switch (getOpgroup(m_op))
1676 {
1677 case G_IDENTITY:
1678 if (m_id->isExpanded())
1679 {
1680 oss << "E";
1681 }
1682 else if (m_id->isTagged())
1683 {
1684 oss << "T";
1685 }
1686 else if (m_id->isConstant())
1687 {
1688 oss << "C";
1689 }
1690 else
1691 {
1692 oss << "?";
1693 }
1694 oss << '@' << m_id.get();
1695 break;
1696 case G_BINARY:
1697 oss << '(';
1698 m_left->intoString(oss);
1699 oss << ' ' << opToString(m_op) << ' ';
1700 m_right->intoString(oss);
1701 oss << ')';
1702 break;
1703 case G_UNARY:
1704 case G_UNARY_P:
1705 case G_NP1OUT:
1706 case G_NP1OUT_P:
1707 case G_REDUCTION:
1708 oss << opToString(m_op) << '(';
1709 m_left->intoString(oss);
1710 oss << ')';
1711 break;
1712 case G_TENSORPROD:
1713 oss << opToString(m_op) << '(';
1714 m_left->intoString(oss);
1715 oss << ", ";
1716 m_right->intoString(oss);
1717 oss << ')';
1718 break;
1719 case G_NP1OUT_2P:
1720 oss << opToString(m_op) << '(';
1721 m_left->intoString(oss);
1722 oss << ", " << m_axis_offset << ", " << m_transpose;
1723 oss << ')';
1724 break;
1725 default:
1726 oss << "UNKNOWN";
1727 }
1728 }
1729
1730
1731 void
1732 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1733 {
1734 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1735 switch (getOpgroup(m_op))
1736 {
1737 case G_IDENTITY:
1738 if (m_id->isExpanded())
1739 {
1740 oss << "E";
1741 }
1742 else if (m_id->isTagged())
1743 {
1744 oss << "T";
1745 }
1746 else if (m_id->isConstant())
1747 {
1748 oss << "C";
1749 }
1750 else
1751 {
1752 oss << "?";
1753 }
1754 oss << '@' << m_id.get() << endl;
1755 break;
1756 case G_BINARY:
1757 oss << opToString(m_op) << endl;
1758 indent+='.';
1759 m_left->intoTreeString(oss, indent);
1760 m_right->intoTreeString(oss, indent);
1761 break;
1762 case G_UNARY:
1763 case G_UNARY_P:
1764 case G_NP1OUT:
1765 case G_NP1OUT_P:
1766 case G_REDUCTION:
1767 oss << opToString(m_op) << endl;
1768 indent+='.';
1769 m_left->intoTreeString(oss, indent);
1770 break;
1771 case G_TENSORPROD:
1772 oss << opToString(m_op) << endl;
1773 indent+='.';
1774 m_left->intoTreeString(oss, indent);
1775 m_right->intoTreeString(oss, indent);
1776 break;
1777 case G_NP1OUT_2P:
1778 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1779 indent+='.';
1780 m_left->intoTreeString(oss, indent);
1781 break;
1782 default:
1783 oss << "UNKNOWN";
1784 }
1785 }
1786
1787
1788 DataAbstract*
1789 DataLazy::deepCopy()
1790 {
1791 switch (getOpgroup(m_op))
1792 {
1793 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1794 case G_UNARY:
1795 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1796 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1797 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1798 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1799 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1800 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1801 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1802 default:
1803 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1804 }
1805 }
1806
1807
1808
1809 // There is no single, natural interpretation of getLength on DataLazy.
1810 // Instances of DataReady can look at the size of their vectors.
1811 // For lazy though, it could be the size the data would be if it were resolved;
1812 // or it could be some function of the lengths of the DataReady instances which
1813 // form part of the expression.
1814 // Rather than have people making assumptions, I have disabled the method.
1815 DataTypes::ValueType::size_type
1816 DataLazy::getLength() const
1817 {
1818 throw DataException("getLength() does not make sense for lazy data.");
1819 }
1820
1821
1822 DataAbstract*
1823 DataLazy::getSlice(const DataTypes::RegionType& region) const
1824 {
1825 throw DataException("getSlice - not implemented for Lazy objects.");
1826 }
1827
1828
1829 // To do this we need to rely on our child nodes
1830 DataTypes::ValueType::size_type
1831 DataLazy::getPointOffset(int sampleNo,
1832 int dataPointNo)
1833 {
1834 if (m_op==IDENTITY)
1835 {
1836 return m_id->getPointOffset(sampleNo,dataPointNo);
1837 }
1838 if (m_readytype!='E')
1839 {
1840 collapse();
1841 return m_id->getPointOffset(sampleNo,dataPointNo);
1842 }
1843 // at this point we do not have an identity node and the expression will be Expanded
1844 // so we only need to know which child to ask
1845 if (m_left->m_readytype=='E')
1846 {
1847 return m_left->getPointOffset(sampleNo,dataPointNo);
1848 }
1849 else
1850 {
1851 return m_right->getPointOffset(sampleNo,dataPointNo);
1852 }
1853 }
1854
1855 // To do this we need to rely on our child nodes
1856 DataTypes::ValueType::size_type
1857 DataLazy::getPointOffset(int sampleNo,
1858 int dataPointNo) const
1859 {
1860 if (m_op==IDENTITY)
1861 {
1862 return m_id->getPointOffset(sampleNo,dataPointNo);
1863 }
1864 if (m_readytype=='E')
1865 {
1866 // at this point we do not have an identity node and the expression will be Expanded
1867 // so we only need to know which child to ask
1868 if (m_left->m_readytype=='E')
1869 {
1870 return m_left->getPointOffset(sampleNo,dataPointNo);
1871 }
1872 else
1873 {
1874 return m_right->getPointOffset(sampleNo,dataPointNo);
1875 }
1876 }
1877 if (m_readytype=='C')
1878 {
1879 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1880 }
1881 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1882 }
1883
1884
1885 // I have decided to let Data:: handle this issue.
1886 void
1887 DataLazy::setToZero()
1888 {
1889 // DataTypes::ValueType v(getNoValues(),0);
1890 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1891 // m_op=IDENTITY;
1892 // m_right.reset();
1893 // m_left.reset();
1894 // m_readytype='C';
1895 // m_buffsRequired=1;
1896
1897 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
1898 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1899 }
1900
1901 bool
1902 DataLazy::actsExpanded() const
1903 {
1904 return (m_readytype=='E');
1905 }
1906
1907 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26