/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2777 - (show annotations)
Thu Nov 26 01:06:00 2009 UTC (9 years, 3 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 55425 byte(s)
Added the LAZY_STACK_PROF #define for Lazy.
If enabled lazy will print the (roughly) maximum stack used by any openmp 
thread over the course of this session.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 #include "EscriptParams.h"
32
33 #include <iomanip> // for some fancy formatting in debug
34
35 // #define LAZYDEBUG(X) if (privdebug){X;}
36 #define LAZYDEBUG(X)
37 namespace
38 {
39 bool privdebug=false;
40
41 #define ENABLEDEBUG privdebug=true;
42 #define DISABLEDEBUG privdebug=false;
43 }
44
45 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46
47 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48
49
50 /*
51 How does DataLazy work?
52 ~~~~~~~~~~~~~~~~~~~~~~~
53
54 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55 denoted left and right.
56
57 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58 This means that all "internal" nodes in the structure are instances of DataLazy.
59
60 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61 Note that IDENITY is not considered a unary operation.
62
63 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64 It must however form a DAG (directed acyclic graph).
65 I will refer to individual DataLazy objects with the structure as nodes.
66
67 Each node also stores:
68 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69 evaluated.
70 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71 evaluate the expression.
72 - m_samplesize ~ the number of doubles stored in a sample.
73
74 When a new node is created, the above values are computed based on the values in the child nodes.
75 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76
77 The resolve method, which produces a DataReady from a DataLazy, does the following:
78 1) Create a DataReady to hold the new result.
79 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81
82 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83
84 resolveSample returns a Vector* and an offset within that vector where the result is stored.
85 Normally, this would be v, but for identity nodes their internal vector is returned instead.
86
87 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88
89 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91
92 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 1) Add to the ES_optype.
94 2) determine what opgroup your operation belongs to (X)
95 3) add a string for the op to the end of ES_opstrings
96 4) increase ES_opcount
97 5) add an entry (X) to opgroups
98 6) add an entry to the switch in collapseToReady
99 7) add an entry to resolveX
100 */
101
102
103 using namespace std;
104 using namespace boost;
105
106 namespace escript
107 {
108
109 namespace
110 {
111
112
113 // enabling this will print out when ever the maximum stacksize used by resolve increases
114 // it assumes _OPENMP is also in use
115 //#define LAZY_STACK_PROF
116
117
118
119 #ifndef _OPENMP
120 #ifdef LAZY_STACK_PROF
121 #undef LAZY_STACK_PROF
122 #endif
123 #endif
124
125
126 #ifdef LAZY_STACK_PROF
127 std::vector<void*> stackstart(getNumberOfThreads());
128 std::vector<void*> stackend(getNumberOfThreads());
129 size_t maxstackuse=0;
130 #endif
131
132 enum ES_opgroup
133 {
134 G_UNKNOWN,
135 G_IDENTITY,
136 G_BINARY, // pointwise operations with two arguments
137 G_UNARY, // pointwise operations with one argument
138 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
139 G_NP1OUT, // non-pointwise op with one output
140 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
141 G_TENSORPROD, // general tensor product
142 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
143 G_REDUCTION // non-pointwise unary op with a scalar output
144 };
145
146
147
148
149 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
150 "sin","cos","tan",
151 "asin","acos","atan","sinh","cosh","tanh","erf",
152 "asinh","acosh","atanh",
153 "log10","log","sign","abs","neg","pos","exp","sqrt",
154 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155 "symmetric","nonsymmetric",
156 "prod",
157 "transpose", "trace",
158 "swapaxes",
159 "minval", "maxval"};
160 int ES_opcount=43;
161 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162 G_UNARY,G_UNARY,G_UNARY, //10
163 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
164 G_UNARY,G_UNARY,G_UNARY, // 20
165 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
166 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
167 G_NP1OUT,G_NP1OUT,
168 G_TENSORPROD,
169 G_NP1OUT_P, G_NP1OUT_P,
170 G_NP1OUT_2P,
171 G_REDUCTION, G_REDUCTION};
172 inline
173 ES_opgroup
174 getOpgroup(ES_optype op)
175 {
176 return opgroups[op];
177 }
178
179 // return the FunctionSpace of the result of "left op right"
180 FunctionSpace
181 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
182 {
183 // perhaps this should call interpolate and throw or something?
184 // maybe we need an interpolate node -
185 // that way, if interpolate is required in any other op we can just throw a
186 // programming error exception.
187
188 FunctionSpace l=left->getFunctionSpace();
189 FunctionSpace r=right->getFunctionSpace();
190 if (l!=r)
191 {
192 if (r.probeInterpolation(l))
193 {
194 return l;
195 }
196 if (l.probeInterpolation(r))
197 {
198 return r;
199 }
200 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201 }
202 return l;
203 }
204
205 // return the shape of the result of "left op right"
206 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207 DataTypes::ShapeType
208 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209 {
210 if (left->getShape()!=right->getShape())
211 {
212 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213 {
214 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215 }
216
217 if (left->getRank()==0) // we need to allow scalar * anything
218 {
219 return right->getShape();
220 }
221 if (right->getRank()==0)
222 {
223 return left->getShape();
224 }
225 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
226 }
227 return left->getShape();
228 }
229
230 // return the shape for "op left"
231
232 DataTypes::ShapeType
233 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234 {
235 switch(op)
236 {
237 case TRANS:
238 { // for the scoping of variables
239 const DataTypes::ShapeType& s=left->getShape();
240 DataTypes::ShapeType sh;
241 int rank=left->getRank();
242 if (axis_offset<0 || axis_offset>rank)
243 {
244 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
245 }
246 for (int i=0; i<rank; i++)
247 {
248 int index = (axis_offset+i)%rank;
249 sh.push_back(s[index]); // Append to new shape
250 }
251 return sh;
252 }
253 break;
254 case TRACE:
255 {
256 int rank=left->getRank();
257 if (rank<2)
258 {
259 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
260 }
261 if ((axis_offset>rank-2) || (axis_offset<0))
262 {
263 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
264 }
265 if (rank==2)
266 {
267 return DataTypes::scalarShape;
268 }
269 else if (rank==3)
270 {
271 DataTypes::ShapeType sh;
272 if (axis_offset==0)
273 {
274 sh.push_back(left->getShape()[2]);
275 }
276 else // offset==1
277 {
278 sh.push_back(left->getShape()[0]);
279 }
280 return sh;
281 }
282 else if (rank==4)
283 {
284 DataTypes::ShapeType sh;
285 const DataTypes::ShapeType& s=left->getShape();
286 if (axis_offset==0)
287 {
288 sh.push_back(s[2]);
289 sh.push_back(s[3]);
290 }
291 else if (axis_offset==1)
292 {
293 sh.push_back(s[0]);
294 sh.push_back(s[3]);
295 }
296 else // offset==2
297 {
298 sh.push_back(s[0]);
299 sh.push_back(s[1]);
300 }
301 return sh;
302 }
303 else // unknown rank
304 {
305 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
306 }
307 }
308 break;
309 default:
310 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
311 }
312 }
313
314 DataTypes::ShapeType
315 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
316 {
317 // This code taken from the Data.cpp swapaxes() method
318 // Some of the checks are probably redundant here
319 int axis0_tmp,axis1_tmp;
320 const DataTypes::ShapeType& s=left->getShape();
321 DataTypes::ShapeType out_shape;
322 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
323 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
324 int rank=left->getRank();
325 if (rank<2) {
326 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
327 }
328 if (axis0<0 || axis0>rank-1) {
329 throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
330 }
331 if (axis1<0 || axis1>rank-1) {
332 throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
333 }
334 if (axis0 == axis1) {
335 throw DataException("Error - Data::swapaxes: axis indices must be different.");
336 }
337 if (axis0 > axis1) {
338 axis0_tmp=axis1;
339 axis1_tmp=axis0;
340 } else {
341 axis0_tmp=axis0;
342 axis1_tmp=axis1;
343 }
344 for (int i=0; i<rank; i++) {
345 if (i == axis0_tmp) {
346 out_shape.push_back(s[axis1_tmp]);
347 } else if (i == axis1_tmp) {
348 out_shape.push_back(s[axis0_tmp]);
349 } else {
350 out_shape.push_back(s[i]);
351 }
352 }
353 return out_shape;
354 }
355
356
357 // determine the output shape for the general tensor product operation
358 // the additional parameters return information required later for the product
359 // the majority of this code is copy pasted from C_General_Tensor_Product
360 DataTypes::ShapeType
361 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
362 {
363
364 // Get rank and shape of inputs
365 int rank0 = left->getRank();
366 int rank1 = right->getRank();
367 const DataTypes::ShapeType& shape0 = left->getShape();
368 const DataTypes::ShapeType& shape1 = right->getShape();
369
370 // Prepare for the loops of the product and verify compatibility of shapes
371 int start0=0, start1=0;
372 if (transpose == 0) {}
373 else if (transpose == 1) { start0 = axis_offset; }
374 else if (transpose == 2) { start1 = rank1-axis_offset; }
375 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
376
377 if (rank0<axis_offset)
378 {
379 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
380 }
381
382 // Adjust the shapes for transpose
383 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
384 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
385 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
386 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
387
388 // Prepare for the loops of the product
389 SL=1, SM=1, SR=1;
390 for (int i=0; i<rank0-axis_offset; i++) {
391 SL *= tmpShape0[i];
392 }
393 for (int i=rank0-axis_offset; i<rank0; i++) {
394 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
395 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
396 }
397 SM *= tmpShape0[i];
398 }
399 for (int i=axis_offset; i<rank1; i++) {
400 SR *= tmpShape1[i];
401 }
402
403 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
404 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
405 { // block to limit the scope of out_index
406 int out_index=0;
407 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
408 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
409 }
410
411 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
412 {
413 ostringstream os;
414 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
415 throw DataException(os.str());
416 }
417
418 return shape2;
419 }
420
421 } // end anonymous namespace
422
423
424
425 // Return a string representing the operation
426 const std::string&
427 opToString(ES_optype op)
428 {
429 if (op<0 || op>=ES_opcount)
430 {
431 op=UNKNOWNOP;
432 }
433 return ES_opstrings[op];
434 }
435
436 void DataLazy::LazyNodeSetup()
437 {
438 #ifdef _OPENMP
439 int numthreads=omp_get_max_threads();
440 m_samples.resize(numthreads*m_samplesize);
441 m_sampleids=new int[numthreads];
442 for (int i=0;i<numthreads;++i)
443 {
444 m_sampleids[i]=-1;
445 }
446 #else
447 m_samples.resize(m_samplesize);
448 m_sampleids=new int[1];
449 m_sampleids[0]=-1;
450 #endif // _OPENMP
451 }
452
453
454 // Creates an identity node
455 DataLazy::DataLazy(DataAbstract_ptr p)
456 : parent(p->getFunctionSpace(),p->getShape())
457 ,m_sampleids(0),
458 m_samples(1)
459 {
460 if (p->isLazy())
461 {
462 // I don't want identity of Lazy.
463 // Question: Why would that be so bad?
464 // Answer: We assume that the child of ID is something we can call getVector on
465 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
466 }
467 else
468 {
469 p->makeLazyShared();
470 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
471 makeIdentity(dr);
472 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
473 }
474 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
475 }
476
477 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
478 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
479 m_op(op),
480 m_axis_offset(0),
481 m_transpose(0),
482 m_SL(0), m_SM(0), m_SR(0)
483 {
484 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
485 {
486 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
487 }
488
489 DataLazy_ptr lleft;
490 if (!left->isLazy())
491 {
492 lleft=DataLazy_ptr(new DataLazy(left));
493 }
494 else
495 {
496 lleft=dynamic_pointer_cast<DataLazy>(left);
497 }
498 m_readytype=lleft->m_readytype;
499 m_left=lleft;
500 m_samplesize=getNumDPPSample()*getNoValues();
501 m_children=m_left->m_children+1;
502 m_height=m_left->m_height+1;
503 LazyNodeSetup();
504 SIZELIMIT
505 }
506
507
508 // In this constructor we need to consider interpolation
509 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
510 : parent(resultFS(left,right,op), resultShape(left,right,op)),
511 m_op(op),
512 m_SL(0), m_SM(0), m_SR(0)
513 {
514 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
515 if ((getOpgroup(op)!=G_BINARY))
516 {
517 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
518 }
519
520 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
521 {
522 FunctionSpace fs=getFunctionSpace();
523 Data ltemp(left);
524 Data tmp(ltemp,fs);
525 left=tmp.borrowDataPtr();
526 }
527 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
528 {
529 Data tmp(Data(right),getFunctionSpace());
530 right=tmp.borrowDataPtr();
531 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
532 }
533 left->operandCheck(*right);
534
535 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
536 {
537 m_left=dynamic_pointer_cast<DataLazy>(left);
538 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
539 }
540 else
541 {
542 m_left=DataLazy_ptr(new DataLazy(left));
543 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
544 }
545 if (right->isLazy())
546 {
547 m_right=dynamic_pointer_cast<DataLazy>(right);
548 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
549 }
550 else
551 {
552 m_right=DataLazy_ptr(new DataLazy(right));
553 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
554 }
555 char lt=m_left->m_readytype;
556 char rt=m_right->m_readytype;
557 if (lt=='E' || rt=='E')
558 {
559 m_readytype='E';
560 }
561 else if (lt=='T' || rt=='T')
562 {
563 m_readytype='T';
564 }
565 else
566 {
567 m_readytype='C';
568 }
569 m_samplesize=getNumDPPSample()*getNoValues();
570 m_children=m_left->m_children+m_right->m_children+2;
571 m_height=max(m_left->m_height,m_right->m_height)+1;
572 LazyNodeSetup();
573 SIZELIMIT
574 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
575 }
576
577 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
578 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
579 m_op(op),
580 m_axis_offset(axis_offset),
581 m_transpose(transpose)
582 {
583 if ((getOpgroup(op)!=G_TENSORPROD))
584 {
585 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
586 }
587 if ((transpose>2) || (transpose<0))
588 {
589 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
590 }
591 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
592 {
593 FunctionSpace fs=getFunctionSpace();
594 Data ltemp(left);
595 Data tmp(ltemp,fs);
596 left=tmp.borrowDataPtr();
597 }
598 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
599 {
600 Data tmp(Data(right),getFunctionSpace());
601 right=tmp.borrowDataPtr();
602 }
603 // left->operandCheck(*right);
604
605 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
606 {
607 m_left=dynamic_pointer_cast<DataLazy>(left);
608 }
609 else
610 {
611 m_left=DataLazy_ptr(new DataLazy(left));
612 }
613 if (right->isLazy())
614 {
615 m_right=dynamic_pointer_cast<DataLazy>(right);
616 }
617 else
618 {
619 m_right=DataLazy_ptr(new DataLazy(right));
620 }
621 char lt=m_left->m_readytype;
622 char rt=m_right->m_readytype;
623 if (lt=='E' || rt=='E')
624 {
625 m_readytype='E';
626 }
627 else if (lt=='T' || rt=='T')
628 {
629 m_readytype='T';
630 }
631 else
632 {
633 m_readytype='C';
634 }
635 m_samplesize=getNumDPPSample()*getNoValues();
636 m_children=m_left->m_children+m_right->m_children+2;
637 m_height=max(m_left->m_height,m_right->m_height)+1;
638 LazyNodeSetup();
639 SIZELIMIT
640 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
641 }
642
643
644 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
645 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
646 m_op(op),
647 m_axis_offset(axis_offset),
648 m_transpose(0),
649 m_tol(0)
650 {
651 if ((getOpgroup(op)!=G_NP1OUT_P))
652 {
653 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
654 }
655 DataLazy_ptr lleft;
656 if (!left->isLazy())
657 {
658 lleft=DataLazy_ptr(new DataLazy(left));
659 }
660 else
661 {
662 lleft=dynamic_pointer_cast<DataLazy>(left);
663 }
664 m_readytype=lleft->m_readytype;
665 m_left=lleft;
666 m_samplesize=getNumDPPSample()*getNoValues();
667 m_children=m_left->m_children+1;
668 m_height=m_left->m_height+1;
669 LazyNodeSetup();
670 SIZELIMIT
671 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
672 }
673
674 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
675 : parent(left->getFunctionSpace(), left->getShape()),
676 m_op(op),
677 m_axis_offset(0),
678 m_transpose(0),
679 m_tol(tol)
680 {
681 if ((getOpgroup(op)!=G_UNARY_P))
682 {
683 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
684 }
685 DataLazy_ptr lleft;
686 if (!left->isLazy())
687 {
688 lleft=DataLazy_ptr(new DataLazy(left));
689 }
690 else
691 {
692 lleft=dynamic_pointer_cast<DataLazy>(left);
693 }
694 m_readytype=lleft->m_readytype;
695 m_left=lleft;
696 m_samplesize=getNumDPPSample()*getNoValues();
697 m_children=m_left->m_children+1;
698 m_height=m_left->m_height+1;
699 LazyNodeSetup();
700 SIZELIMIT
701 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
702 }
703
704
705 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
706 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
707 m_op(op),
708 m_axis_offset(axis0),
709 m_transpose(axis1),
710 m_tol(0)
711 {
712 if ((getOpgroup(op)!=G_NP1OUT_2P))
713 {
714 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
715 }
716 DataLazy_ptr lleft;
717 if (!left->isLazy())
718 {
719 lleft=DataLazy_ptr(new DataLazy(left));
720 }
721 else
722 {
723 lleft=dynamic_pointer_cast<DataLazy>(left);
724 }
725 m_readytype=lleft->m_readytype;
726 m_left=lleft;
727 m_samplesize=getNumDPPSample()*getNoValues();
728 m_children=m_left->m_children+1;
729 m_height=m_left->m_height+1;
730 LazyNodeSetup();
731 SIZELIMIT
732 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
733 }
734
735 DataLazy::~DataLazy()
736 {
737 delete[] m_sampleids;
738 }
739
740
741 /*
742 \brief Evaluates the expression using methods on Data.
743 This does the work for the collapse method.
744 For reasons of efficiency do not call this method on DataExpanded nodes.
745 */
746 DataReady_ptr
747 DataLazy::collapseToReady()
748 {
749 if (m_readytype=='E')
750 { // this is more an efficiency concern than anything else
751 throw DataException("Programmer Error - do not use collapse on Expanded data.");
752 }
753 if (m_op==IDENTITY)
754 {
755 return m_id;
756 }
757 DataReady_ptr pleft=m_left->collapseToReady();
758 Data left(pleft);
759 Data right;
760 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
761 {
762 right=Data(m_right->collapseToReady());
763 }
764 Data result;
765 switch(m_op)
766 {
767 case ADD:
768 result=left+right;
769 break;
770 case SUB:
771 result=left-right;
772 break;
773 case MUL:
774 result=left*right;
775 break;
776 case DIV:
777 result=left/right;
778 break;
779 case SIN:
780 result=left.sin();
781 break;
782 case COS:
783 result=left.cos();
784 break;
785 case TAN:
786 result=left.tan();
787 break;
788 case ASIN:
789 result=left.asin();
790 break;
791 case ACOS:
792 result=left.acos();
793 break;
794 case ATAN:
795 result=left.atan();
796 break;
797 case SINH:
798 result=left.sinh();
799 break;
800 case COSH:
801 result=left.cosh();
802 break;
803 case TANH:
804 result=left.tanh();
805 break;
806 case ERF:
807 result=left.erf();
808 break;
809 case ASINH:
810 result=left.asinh();
811 break;
812 case ACOSH:
813 result=left.acosh();
814 break;
815 case ATANH:
816 result=left.atanh();
817 break;
818 case LOG10:
819 result=left.log10();
820 break;
821 case LOG:
822 result=left.log();
823 break;
824 case SIGN:
825 result=left.sign();
826 break;
827 case ABS:
828 result=left.abs();
829 break;
830 case NEG:
831 result=left.neg();
832 break;
833 case POS:
834 // it doesn't mean anything for delayed.
835 // it will just trigger a deep copy of the lazy object
836 throw DataException("Programmer error - POS not supported for lazy data.");
837 break;
838 case EXP:
839 result=left.exp();
840 break;
841 case SQRT:
842 result=left.sqrt();
843 break;
844 case RECIP:
845 result=left.oneOver();
846 break;
847 case GZ:
848 result=left.wherePositive();
849 break;
850 case LZ:
851 result=left.whereNegative();
852 break;
853 case GEZ:
854 result=left.whereNonNegative();
855 break;
856 case LEZ:
857 result=left.whereNonPositive();
858 break;
859 case NEZ:
860 result=left.whereNonZero(m_tol);
861 break;
862 case EZ:
863 result=left.whereZero(m_tol);
864 break;
865 case SYM:
866 result=left.symmetric();
867 break;
868 case NSYM:
869 result=left.nonsymmetric();
870 break;
871 case PROD:
872 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
873 break;
874 case TRANS:
875 result=left.transpose(m_axis_offset);
876 break;
877 case TRACE:
878 result=left.trace(m_axis_offset);
879 break;
880 case SWAP:
881 result=left.swapaxes(m_axis_offset, m_transpose);
882 break;
883 case MINVAL:
884 result=left.minval();
885 break;
886 case MAXVAL:
887 result=left.minval();
888 break;
889 default:
890 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
891 }
892 return result.borrowReadyPtr();
893 }
894
895 /*
896 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
897 This method uses the original methods on the Data class to evaluate the expressions.
898 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
899 the purpose of using DataLazy in the first place).
900 */
901 void
902 DataLazy::collapse()
903 {
904 if (m_op==IDENTITY)
905 {
906 return;
907 }
908 if (m_readytype=='E')
909 { // this is more an efficiency concern than anything else
910 throw DataException("Programmer Error - do not use collapse on Expanded data.");
911 }
912 m_id=collapseToReady();
913 m_op=IDENTITY;
914 }
915
916
917
918
919
920
921 #define PROC_OP(TYPE,X) \
922 for (int j=0;j<onumsteps;++j)\
923 {\
924 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
925 { \
926 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
927 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
928 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
929 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
930 lroffset+=leftstep; \
931 rroffset+=rightstep; \
932 }\
933 lroffset+=oleftstep;\
934 rroffset+=orightstep;\
935 }
936
937
938 // The result will be stored in m_samples
939 // The return value is a pointer to the DataVector, offset is the offset within the return value
940 const DataTypes::ValueType*
941 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
942 {
943 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
944 // collapse so we have a 'E' node or an IDENTITY for some other type
945 if (m_readytype!='E' && m_op!=IDENTITY)
946 {
947 collapse();
948 }
949 if (m_op==IDENTITY)
950 {
951 const ValueType& vec=m_id->getVectorRO();
952 roffset=m_id->getPointOffset(sampleNo, 0);
953 #ifdef LAZY_STACK_PROF
954 int x;
955 if (&x<stackend[omp_get_thread_num()])
956 {
957 stackend[omp_get_thread_num()]=&x;
958 }
959 #endif
960 return &(vec);
961 }
962 if (m_readytype!='E')
963 {
964 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
965 }
966 if (m_sampleids[tid]==sampleNo)
967 {
968 roffset=tid*m_samplesize;
969 return &(m_samples); // sample is already resolved
970 }
971 m_sampleids[tid]=sampleNo;
972 switch (getOpgroup(m_op))
973 {
974 case G_UNARY:
975 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
976 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
977 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
978 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
979 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
980 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
981 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
982 default:
983 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
984 }
985 }
986
987 const DataTypes::ValueType*
988 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
989 {
990 // we assume that any collapsing has been done before we get here
991 // since we only have one argument we don't need to think about only
992 // processing single points.
993 // we will also know we won't get identity nodes
994 if (m_readytype!='E')
995 {
996 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
997 }
998 if (m_op==IDENTITY)
999 {
1000 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1001 }
1002 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1003 const double* left=&((*leftres)[roffset]);
1004 roffset=m_samplesize*tid;
1005 double* result=&(m_samples[roffset]);
1006 switch (m_op)
1007 {
1008 case SIN:
1009 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1010 break;
1011 case COS:
1012 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1013 break;
1014 case TAN:
1015 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1016 break;
1017 case ASIN:
1018 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1019 break;
1020 case ACOS:
1021 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1022 break;
1023 case ATAN:
1024 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1025 break;
1026 case SINH:
1027 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1028 break;
1029 case COSH:
1030 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1031 break;
1032 case TANH:
1033 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1034 break;
1035 case ERF:
1036 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1037 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1038 #else
1039 tensor_unary_operation(m_samplesize, left, result, ::erf);
1040 break;
1041 #endif
1042 case ASINH:
1043 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1044 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1045 #else
1046 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1047 #endif
1048 break;
1049 case ACOSH:
1050 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1051 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1052 #else
1053 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1054 #endif
1055 break;
1056 case ATANH:
1057 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1058 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1059 #else
1060 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1061 #endif
1062 break;
1063 case LOG10:
1064 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1065 break;
1066 case LOG:
1067 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1068 break;
1069 case SIGN:
1070 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1071 break;
1072 case ABS:
1073 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1074 break;
1075 case NEG:
1076 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1077 break;
1078 case POS:
1079 // it doesn't mean anything for delayed.
1080 // it will just trigger a deep copy of the lazy object
1081 throw DataException("Programmer error - POS not supported for lazy data.");
1082 break;
1083 case EXP:
1084 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1085 break;
1086 case SQRT:
1087 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1088 break;
1089 case RECIP:
1090 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1091 break;
1092 case GZ:
1093 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1094 break;
1095 case LZ:
1096 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1097 break;
1098 case GEZ:
1099 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1100 break;
1101 case LEZ:
1102 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1103 break;
1104 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1105 case NEZ:
1106 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1107 break;
1108 case EZ:
1109 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1110 break;
1111
1112 default:
1113 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1114 }
1115 return &(m_samples);
1116 }
1117
1118
1119 const DataTypes::ValueType*
1120 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1121 {
1122 // we assume that any collapsing has been done before we get here
1123 // since we only have one argument we don't need to think about only
1124 // processing single points.
1125 // we will also know we won't get identity nodes
1126 if (m_readytype!='E')
1127 {
1128 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1129 }
1130 if (m_op==IDENTITY)
1131 {
1132 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1133 }
1134 size_t loffset=0;
1135 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1136
1137 roffset=m_samplesize*tid;
1138 unsigned int ndpps=getNumDPPSample();
1139 unsigned int psize=DataTypes::noValues(getShape());
1140 double* result=&(m_samples[roffset]);
1141 switch (m_op)
1142 {
1143 case MINVAL:
1144 {
1145 for (unsigned int z=0;z<ndpps;++z)
1146 {
1147 FMin op;
1148 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1149 loffset+=psize;
1150 result++;
1151 }
1152 }
1153 break;
1154 case MAXVAL:
1155 {
1156 for (unsigned int z=0;z<ndpps;++z)
1157 {
1158 FMax op;
1159 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1160 loffset+=psize;
1161 result++;
1162 }
1163 }
1164 break;
1165 default:
1166 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1167 }
1168 return &(m_samples);
1169 }
1170
1171 const DataTypes::ValueType*
1172 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1173 {
1174 // we assume that any collapsing has been done before we get here
1175 // since we only have one argument we don't need to think about only
1176 // processing single points.
1177 if (m_readytype!='E')
1178 {
1179 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1180 }
1181 if (m_op==IDENTITY)
1182 {
1183 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1184 }
1185 size_t subroffset;
1186 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1187 roffset=m_samplesize*tid;
1188 size_t loop=0;
1189 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190 size_t step=getNoValues();
1191 size_t offset=roffset;
1192 switch (m_op)
1193 {
1194 case SYM:
1195 for (loop=0;loop<numsteps;++loop)
1196 {
1197 DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1198 subroffset+=step;
1199 offset+=step;
1200 }
1201 break;
1202 case NSYM:
1203 for (loop=0;loop<numsteps;++loop)
1204 {
1205 DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1206 subroffset+=step;
1207 offset+=step;
1208 }
1209 break;
1210 default:
1211 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1212 }
1213 return &m_samples;
1214 }
1215
1216 const DataTypes::ValueType*
1217 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1218 {
1219 // we assume that any collapsing has been done before we get here
1220 // since we only have one argument we don't need to think about only
1221 // processing single points.
1222 if (m_readytype!='E')
1223 {
1224 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1225 }
1226 if (m_op==IDENTITY)
1227 {
1228 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1229 }
1230 size_t subroffset;
1231 size_t offset;
1232 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233 roffset=m_samplesize*tid;
1234 offset=roffset;
1235 size_t loop=0;
1236 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1237 size_t outstep=getNoValues();
1238 size_t instep=m_left->getNoValues();
1239 switch (m_op)
1240 {
1241 case TRACE:
1242 for (loop=0;loop<numsteps;++loop)
1243 {
1244 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1245 subroffset+=instep;
1246 offset+=outstep;
1247 }
1248 break;
1249 case TRANS:
1250 for (loop=0;loop<numsteps;++loop)
1251 {
1252 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1253 subroffset+=instep;
1254 offset+=outstep;
1255 }
1256 break;
1257 default:
1258 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1259 }
1260 return &m_samples;
1261 }
1262
1263
1264 const DataTypes::ValueType*
1265 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1266 {
1267 if (m_readytype!='E')
1268 {
1269 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1270 }
1271 if (m_op==IDENTITY)
1272 {
1273 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1274 }
1275 size_t subroffset;
1276 size_t offset;
1277 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1278 roffset=m_samplesize*tid;
1279 offset=roffset;
1280 size_t loop=0;
1281 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1282 size_t outstep=getNoValues();
1283 size_t instep=m_left->getNoValues();
1284 switch (m_op)
1285 {
1286 case SWAP:
1287 for (loop=0;loop<numsteps;++loop)
1288 {
1289 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1290 subroffset+=instep;
1291 offset+=outstep;
1292 }
1293 break;
1294 default:
1295 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1296 }
1297 return &m_samples;
1298 }
1299
1300
1301
1302 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1303 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1304 // If both children are expanded, then we can process them in a single operation (we treat
1305 // the whole sample as one big datapoint.
1306 // If one of the children is not expanded, then we need to treat each point in the sample
1307 // individually.
1308 // There is an additional complication when scalar operations are considered.
1309 // For example, 2+Vector.
1310 // In this case each double within the point is treated individually
1311 const DataTypes::ValueType*
1312 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1313 {
1314 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1315
1316 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1317 // first work out which of the children are expanded
1318 bool leftExp=(m_left->m_readytype=='E');
1319 bool rightExp=(m_right->m_readytype=='E');
1320 if (!leftExp && !rightExp)
1321 {
1322 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1323 }
1324 bool leftScalar=(m_left->getRank()==0);
1325 bool rightScalar=(m_right->getRank()==0);
1326 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1327 {
1328 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1329 }
1330 size_t leftsize=m_left->getNoValues();
1331 size_t rightsize=m_right->getNoValues();
1332 size_t chunksize=1; // how many doubles will be processed in one go
1333 int leftstep=0; // how far should the left offset advance after each step
1334 int rightstep=0;
1335 int numsteps=0; // total number of steps for the inner loop
1336 int oleftstep=0; // the o variables refer to the outer loop
1337 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1338 int onumsteps=1;
1339
1340 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1341 bool RES=(rightExp && rightScalar);
1342 bool LS=(!leftExp && leftScalar); // left is a single scalar
1343 bool RS=(!rightExp && rightScalar);
1344 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1345 bool RN=(!rightExp && !rightScalar);
1346 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1347 bool REN=(rightExp && !rightScalar);
1348
1349 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1350 {
1351 chunksize=m_left->getNumDPPSample()*leftsize;
1352 leftstep=0;
1353 rightstep=0;
1354 numsteps=1;
1355 }
1356 else if (LES || RES)
1357 {
1358 chunksize=1;
1359 if (LES) // left is an expanded scalar
1360 {
1361 if (RS)
1362 {
1363 leftstep=1;
1364 rightstep=0;
1365 numsteps=m_left->getNumDPPSample();
1366 }
1367 else // RN or REN
1368 {
1369 leftstep=0;
1370 oleftstep=1;
1371 rightstep=1;
1372 orightstep=(RN ? -(int)rightsize : 0);
1373 numsteps=rightsize;
1374 onumsteps=m_left->getNumDPPSample();
1375 }
1376 }
1377 else // right is an expanded scalar
1378 {
1379 if (LS)
1380 {
1381 rightstep=1;
1382 leftstep=0;
1383 numsteps=m_right->getNumDPPSample();
1384 }
1385 else
1386 {
1387 rightstep=0;
1388 orightstep=1;
1389 leftstep=1;
1390 oleftstep=(LN ? -(int)leftsize : 0);
1391 numsteps=leftsize;
1392 onumsteps=m_right->getNumDPPSample();
1393 }
1394 }
1395 }
1396 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1397 {
1398 if (LEN) // and Right will be a single value
1399 {
1400 chunksize=rightsize;
1401 leftstep=rightsize;
1402 rightstep=0;
1403 numsteps=m_left->getNumDPPSample();
1404 if (RS)
1405 {
1406 numsteps*=leftsize;
1407 }
1408 }
1409 else // REN
1410 {
1411 chunksize=leftsize;
1412 rightstep=leftsize;
1413 leftstep=0;
1414 numsteps=m_right->getNumDPPSample();
1415 if (LS)
1416 {
1417 numsteps*=rightsize;
1418 }
1419 }
1420 }
1421
1422 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1423 // Get the values of sub-expressions
1424 const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1425 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1426 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1427 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1428 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1429 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1430 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1431 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1432 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1433
1434 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1435 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1436
1437
1438 roffset=m_samplesize*tid;
1439 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1440 switch(m_op)
1441 {
1442 case ADD:
1443 PROC_OP(NO_ARG,plus<double>());
1444 break;
1445 case SUB:
1446 PROC_OP(NO_ARG,minus<double>());
1447 break;
1448 case MUL:
1449 PROC_OP(NO_ARG,multiplies<double>());
1450 break;
1451 case DIV:
1452 PROC_OP(NO_ARG,divides<double>());
1453 break;
1454 case POW:
1455 PROC_OP(double (double,double),::pow);
1456 break;
1457 default:
1458 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1459 }
1460 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1461 return &m_samples;
1462 }
1463
1464
1465 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1466 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1467 // unlike the other resolve helpers, we must treat these datapoints separately.
1468 const DataTypes::ValueType*
1469 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1470 {
1471 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1472
1473 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1474 // first work out which of the children are expanded
1475 bool leftExp=(m_left->m_readytype=='E');
1476 bool rightExp=(m_right->m_readytype=='E');
1477 int steps=getNumDPPSample();
1478 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1479 int rightStep=(rightExp?m_right->getNoValues() : 0);
1480
1481 int resultStep=getNoValues();
1482 roffset=m_samplesize*tid;
1483 size_t offset=roffset;
1484
1485 const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1486
1487 const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1488
1489 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1490 cout << getNoValues() << endl;)
1491
1492
1493 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1494 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1495 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1496 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1497 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1498 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1499 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1500
1501 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1502 switch(m_op)
1503 {
1504 case PROD:
1505 for (int i=0;i<steps;++i,resultp+=resultStep)
1506 {
1507 const double *ptr_0 = &((*left)[lroffset]);
1508 const double *ptr_1 = &((*right)[rroffset]);
1509
1510 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1511 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1512
1513 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1514
1515 lroffset+=leftStep;
1516 rroffset+=rightStep;
1517 }
1518 break;
1519 default:
1520 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1521 }
1522 roffset=offset;
1523 return &m_samples;
1524 }
1525
1526
1527 const DataTypes::ValueType*
1528 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1529 {
1530 #ifdef _OPENMP
1531 int tid=omp_get_thread_num();
1532 #else
1533 int tid=0;
1534 #endif
1535
1536 #ifdef LAZY_STACK_PROF
1537 stackstart[tid]=&tid;
1538 stackend[tid]=&tid;
1539 const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1540 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1541 #pragma omp critical
1542 if (d>maxstackuse)
1543 {
1544 cout << "Max resolve Stack use " << d << endl;
1545 maxstackuse=d;
1546 }
1547 return r;
1548 #else
1549 return resolveNodeSample(tid, sampleNo, roffset);
1550 #endif
1551 }
1552
1553
1554 // This needs to do the work of the identity constructor
1555 void
1556 DataLazy::resolveToIdentity()
1557 {
1558 if (m_op==IDENTITY)
1559 return;
1560 DataReady_ptr p=resolveNodeWorker();
1561 makeIdentity(p);
1562 }
1563
1564 void DataLazy::makeIdentity(const DataReady_ptr& p)
1565 {
1566 m_op=IDENTITY;
1567 m_axis_offset=0;
1568 m_transpose=0;
1569 m_SL=m_SM=m_SR=0;
1570 m_children=m_height=0;
1571 m_id=p;
1572 if(p->isConstant()) {m_readytype='C';}
1573 else if(p->isExpanded()) {m_readytype='E';}
1574 else if (p->isTagged()) {m_readytype='T';}
1575 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1576 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1577 m_left.reset();
1578 m_right.reset();
1579 }
1580
1581
1582 DataReady_ptr
1583 DataLazy::resolve()
1584 {
1585 resolveToIdentity();
1586 return m_id;
1587 }
1588
1589 // This version of resolve uses storage in each node to hold results
1590 DataReady_ptr
1591 DataLazy::resolveNodeWorker()
1592 {
1593 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1594 {
1595 collapse();
1596 }
1597 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1598 {
1599 return m_id;
1600 }
1601 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1602 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1603 ValueType& resvec=result->getVectorRW();
1604 DataReady_ptr resptr=DataReady_ptr(result);
1605
1606 int sample;
1607 int totalsamples=getNumSamples();
1608 const ValueType* res=0; // Storage for answer
1609 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1610 #pragma omp parallel private(sample,res)
1611 {
1612 size_t roffset=0;
1613 #ifdef LAZY_STACK_PROF
1614 stackstart[omp_get_thread_num()]=&roffset;
1615 stackend[omp_get_thread_num()]=&roffset;
1616 #endif
1617 #pragma omp for schedule(static)
1618 for (sample=0;sample<totalsamples;++sample)
1619 {
1620 roffset=0;
1621 #ifdef _OPENMP
1622 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1623 #else
1624 res=resolveNodeSample(0,sample,roffset);
1625 #endif
1626 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1627 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1628 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1629 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1630 }
1631 }
1632 #ifdef LAZY_STACK_PROF
1633 for (int i=0;i<getNumberOfThreads();++i)
1634 {
1635 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1636 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1637 if (r>maxstackuse)
1638 {
1639 maxstackuse=r;
1640 }
1641 }
1642 cout << "Max resolve Stack use=" << maxstackuse << endl;
1643 #endif
1644 return resptr;
1645 }
1646
1647 std::string
1648 DataLazy::toString() const
1649 {
1650 ostringstream oss;
1651 oss << "Lazy Data:";
1652 if (escriptParams.getPRINT_LAZY_TREE()==0)
1653 {
1654 intoString(oss);
1655 }
1656 else
1657 {
1658 oss << endl;
1659 intoTreeString(oss,"");
1660 }
1661 return oss.str();
1662 }
1663
1664
1665 void
1666 DataLazy::intoString(ostringstream& oss) const
1667 {
1668 // oss << "[" << m_children <<";"<<m_height <<"]";
1669 switch (getOpgroup(m_op))
1670 {
1671 case G_IDENTITY:
1672 if (m_id->isExpanded())
1673 {
1674 oss << "E";
1675 }
1676 else if (m_id->isTagged())
1677 {
1678 oss << "T";
1679 }
1680 else if (m_id->isConstant())
1681 {
1682 oss << "C";
1683 }
1684 else
1685 {
1686 oss << "?";
1687 }
1688 oss << '@' << m_id.get();
1689 break;
1690 case G_BINARY:
1691 oss << '(';
1692 m_left->intoString(oss);
1693 oss << ' ' << opToString(m_op) << ' ';
1694 m_right->intoString(oss);
1695 oss << ')';
1696 break;
1697 case G_UNARY:
1698 case G_UNARY_P:
1699 case G_NP1OUT:
1700 case G_NP1OUT_P:
1701 case G_REDUCTION:
1702 oss << opToString(m_op) << '(';
1703 m_left->intoString(oss);
1704 oss << ')';
1705 break;
1706 case G_TENSORPROD:
1707 oss << opToString(m_op) << '(';
1708 m_left->intoString(oss);
1709 oss << ", ";
1710 m_right->intoString(oss);
1711 oss << ')';
1712 break;
1713 case G_NP1OUT_2P:
1714 oss << opToString(m_op) << '(';
1715 m_left->intoString(oss);
1716 oss << ", " << m_axis_offset << ", " << m_transpose;
1717 oss << ')';
1718 break;
1719 default:
1720 oss << "UNKNOWN";
1721 }
1722 }
1723
1724
1725 void
1726 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1727 {
1728 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1729 switch (getOpgroup(m_op))
1730 {
1731 case G_IDENTITY:
1732 if (m_id->isExpanded())
1733 {
1734 oss << "E";
1735 }
1736 else if (m_id->isTagged())
1737 {
1738 oss << "T";
1739 }
1740 else if (m_id->isConstant())
1741 {
1742 oss << "C";
1743 }
1744 else
1745 {
1746 oss << "?";
1747 }
1748 oss << '@' << m_id.get() << endl;
1749 break;
1750 case G_BINARY:
1751 oss << opToString(m_op) << endl;
1752 indent+='.';
1753 m_left->intoTreeString(oss, indent);
1754 m_right->intoTreeString(oss, indent);
1755 break;
1756 case G_UNARY:
1757 case G_UNARY_P:
1758 case G_NP1OUT:
1759 case G_NP1OUT_P:
1760 case G_REDUCTION:
1761 oss << opToString(m_op) << endl;
1762 indent+='.';
1763 m_left->intoTreeString(oss, indent);
1764 break;
1765 case G_TENSORPROD:
1766 oss << opToString(m_op) << endl;
1767 indent+='.';
1768 m_left->intoTreeString(oss, indent);
1769 m_right->intoTreeString(oss, indent);
1770 break;
1771 case G_NP1OUT_2P:
1772 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1773 indent+='.';
1774 m_left->intoTreeString(oss, indent);
1775 break;
1776 default:
1777 oss << "UNKNOWN";
1778 }
1779 }
1780
1781
1782 DataAbstract*
1783 DataLazy::deepCopy()
1784 {
1785 switch (getOpgroup(m_op))
1786 {
1787 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1788 case G_UNARY:
1789 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1790 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1791 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1792 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1793 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1794 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1795 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1796 default:
1797 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1798 }
1799 }
1800
1801
1802
1803 // There is no single, natural interpretation of getLength on DataLazy.
1804 // Instances of DataReady can look at the size of their vectors.
1805 // For lazy though, it could be the size the data would be if it were resolved;
1806 // or it could be some function of the lengths of the DataReady instances which
1807 // form part of the expression.
1808 // Rather than have people making assumptions, I have disabled the method.
1809 DataTypes::ValueType::size_type
1810 DataLazy::getLength() const
1811 {
1812 throw DataException("getLength() does not make sense for lazy data.");
1813 }
1814
1815
1816 DataAbstract*
1817 DataLazy::getSlice(const DataTypes::RegionType& region) const
1818 {
1819 throw DataException("getSlice - not implemented for Lazy objects.");
1820 }
1821
1822
1823 // To do this we need to rely on our child nodes
1824 DataTypes::ValueType::size_type
1825 DataLazy::getPointOffset(int sampleNo,
1826 int dataPointNo)
1827 {
1828 if (m_op==IDENTITY)
1829 {
1830 return m_id->getPointOffset(sampleNo,dataPointNo);
1831 }
1832 if (m_readytype!='E')
1833 {
1834 collapse();
1835 return m_id->getPointOffset(sampleNo,dataPointNo);
1836 }
1837 // at this point we do not have an identity node and the expression will be Expanded
1838 // so we only need to know which child to ask
1839 if (m_left->m_readytype=='E')
1840 {
1841 return m_left->getPointOffset(sampleNo,dataPointNo);
1842 }
1843 else
1844 {
1845 return m_right->getPointOffset(sampleNo,dataPointNo);
1846 }
1847 }
1848
1849 // To do this we need to rely on our child nodes
1850 DataTypes::ValueType::size_type
1851 DataLazy::getPointOffset(int sampleNo,
1852 int dataPointNo) const
1853 {
1854 if (m_op==IDENTITY)
1855 {
1856 return m_id->getPointOffset(sampleNo,dataPointNo);
1857 }
1858 if (m_readytype=='E')
1859 {
1860 // at this point we do not have an identity node and the expression will be Expanded
1861 // so we only need to know which child to ask
1862 if (m_left->m_readytype=='E')
1863 {
1864 return m_left->getPointOffset(sampleNo,dataPointNo);
1865 }
1866 else
1867 {
1868 return m_right->getPointOffset(sampleNo,dataPointNo);
1869 }
1870 }
1871 if (m_readytype=='C')
1872 {
1873 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1874 }
1875 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1876 }
1877
1878
1879 // I have decided to let Data:: handle this issue.
1880 void
1881 DataLazy::setToZero()
1882 {
1883 // DataTypes::ValueType v(getNoValues(),0);
1884 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1885 // m_op=IDENTITY;
1886 // m_right.reset();
1887 // m_left.reset();
1888 // m_readytype='C';
1889 // m_buffsRequired=1;
1890
1891 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
1892 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1893 }
1894
1895 bool
1896 DataLazy::actsExpanded() const
1897 {
1898 return (m_readytype=='E');
1899 }
1900
1901 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26