/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2779 - (show annotations)
Thu Nov 26 03:51:15 2009 UTC (9 years, 4 months ago) by caltinay
Original Path: trunk/escript/src/DataLazy.cpp
File size: 55549 byte(s)
Fixed more instances of garbage=cstring+integer errors (d'oh!)

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 #include "EscriptParams.h"
32
33 #include <iomanip> // for some fancy formatting in debug
34
35 // #define LAZYDEBUG(X) if (privdebug){X;}
36 #define LAZYDEBUG(X)
37 namespace
38 {
39 bool privdebug=false;
40
41 #define ENABLEDEBUG privdebug=true;
42 #define DISABLEDEBUG privdebug=false;
43 }
44
45 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46
47 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48
49
50 /*
51 How does DataLazy work?
52 ~~~~~~~~~~~~~~~~~~~~~~~
53
54 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55 denoted left and right.
56
57 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58 This means that all "internal" nodes in the structure are instances of DataLazy.
59
60 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61 Note that IDENITY is not considered a unary operation.
62
63 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64 It must however form a DAG (directed acyclic graph).
65 I will refer to individual DataLazy objects with the structure as nodes.
66
67 Each node also stores:
68 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69 evaluated.
70 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71 evaluate the expression.
72 - m_samplesize ~ the number of doubles stored in a sample.
73
74 When a new node is created, the above values are computed based on the values in the child nodes.
75 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76
77 The resolve method, which produces a DataReady from a DataLazy, does the following:
78 1) Create a DataReady to hold the new result.
79 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81
82 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83
84 resolveSample returns a Vector* and an offset within that vector where the result is stored.
85 Normally, this would be v, but for identity nodes their internal vector is returned instead.
86
87 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88
89 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91
92 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93 1) Add to the ES_optype.
94 2) determine what opgroup your operation belongs to (X)
95 3) add a string for the op to the end of ES_opstrings
96 4) increase ES_opcount
97 5) add an entry (X) to opgroups
98 6) add an entry to the switch in collapseToReady
99 7) add an entry to resolveX
100 */
101
102
103 using namespace std;
104 using namespace boost;
105
106 namespace escript
107 {
108
109 namespace
110 {
111
112
113 // enabling this will print out when ever the maximum stacksize used by resolve increases
114 // it assumes _OPENMP is also in use
115 //#define LAZY_STACK_PROF
116
117
118
119 #ifndef _OPENMP
120 #ifdef LAZY_STACK_PROF
121 #undef LAZY_STACK_PROF
122 #endif
123 #endif
124
125
126 #ifdef LAZY_STACK_PROF
127 std::vector<void*> stackstart(getNumberOfThreads());
128 std::vector<void*> stackend(getNumberOfThreads());
129 size_t maxstackuse=0;
130 #endif
131
132 enum ES_opgroup
133 {
134 G_UNKNOWN,
135 G_IDENTITY,
136 G_BINARY, // pointwise operations with two arguments
137 G_UNARY, // pointwise operations with one argument
138 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
139 G_NP1OUT, // non-pointwise op with one output
140 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
141 G_TENSORPROD, // general tensor product
142 G_NP1OUT_2P, // non-pointwise op with one output requiring two params
143 G_REDUCTION // non-pointwise unary op with a scalar output
144 };
145
146
147
148
149 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
150 "sin","cos","tan",
151 "asin","acos","atan","sinh","cosh","tanh","erf",
152 "asinh","acosh","atanh",
153 "log10","log","sign","abs","neg","pos","exp","sqrt",
154 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155 "symmetric","nonsymmetric",
156 "prod",
157 "transpose", "trace",
158 "swapaxes",
159 "minval", "maxval"};
160 int ES_opcount=43;
161 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162 G_UNARY,G_UNARY,G_UNARY, //10
163 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
164 G_UNARY,G_UNARY,G_UNARY, // 20
165 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
166 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
167 G_NP1OUT,G_NP1OUT,
168 G_TENSORPROD,
169 G_NP1OUT_P, G_NP1OUT_P,
170 G_NP1OUT_2P,
171 G_REDUCTION, G_REDUCTION};
172 inline
173 ES_opgroup
174 getOpgroup(ES_optype op)
175 {
176 return opgroups[op];
177 }
178
179 // return the FunctionSpace of the result of "left op right"
180 FunctionSpace
181 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
182 {
183 // perhaps this should call interpolate and throw or something?
184 // maybe we need an interpolate node -
185 // that way, if interpolate is required in any other op we can just throw a
186 // programming error exception.
187
188 FunctionSpace l=left->getFunctionSpace();
189 FunctionSpace r=right->getFunctionSpace();
190 if (l!=r)
191 {
192 if (r.probeInterpolation(l))
193 {
194 return l;
195 }
196 if (l.probeInterpolation(r))
197 {
198 return r;
199 }
200 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201 }
202 return l;
203 }
204
205 // return the shape of the result of "left op right"
206 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207 DataTypes::ShapeType
208 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209 {
210 if (left->getShape()!=right->getShape())
211 {
212 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213 {
214 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215 }
216
217 if (left->getRank()==0) // we need to allow scalar * anything
218 {
219 return right->getShape();
220 }
221 if (right->getRank()==0)
222 {
223 return left->getShape();
224 }
225 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
226 }
227 return left->getShape();
228 }
229
230 // return the shape for "op left"
231
232 DataTypes::ShapeType
233 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234 {
235 switch(op)
236 {
237 case TRANS:
238 { // for the scoping of variables
239 const DataTypes::ShapeType& s=left->getShape();
240 DataTypes::ShapeType sh;
241 int rank=left->getRank();
242 if (axis_offset<0 || axis_offset>rank)
243 {
244 stringstream e;
245 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
246 throw DataException(e.str());
247 }
248 for (int i=0; i<rank; i++)
249 {
250 int index = (axis_offset+i)%rank;
251 sh.push_back(s[index]); // Append to new shape
252 }
253 return sh;
254 }
255 break;
256 case TRACE:
257 {
258 int rank=left->getRank();
259 if (rank<2)
260 {
261 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
262 }
263 if ((axis_offset>rank-2) || (axis_offset<0))
264 {
265 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
266 }
267 if (rank==2)
268 {
269 return DataTypes::scalarShape;
270 }
271 else if (rank==3)
272 {
273 DataTypes::ShapeType sh;
274 if (axis_offset==0)
275 {
276 sh.push_back(left->getShape()[2]);
277 }
278 else // offset==1
279 {
280 sh.push_back(left->getShape()[0]);
281 }
282 return sh;
283 }
284 else if (rank==4)
285 {
286 DataTypes::ShapeType sh;
287 const DataTypes::ShapeType& s=left->getShape();
288 if (axis_offset==0)
289 {
290 sh.push_back(s[2]);
291 sh.push_back(s[3]);
292 }
293 else if (axis_offset==1)
294 {
295 sh.push_back(s[0]);
296 sh.push_back(s[3]);
297 }
298 else // offset==2
299 {
300 sh.push_back(s[0]);
301 sh.push_back(s[1]);
302 }
303 return sh;
304 }
305 else // unknown rank
306 {
307 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
308 }
309 }
310 break;
311 default:
312 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
313 }
314 }
315
316 DataTypes::ShapeType
317 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
318 {
319 // This code taken from the Data.cpp swapaxes() method
320 // Some of the checks are probably redundant here
321 int axis0_tmp,axis1_tmp;
322 const DataTypes::ShapeType& s=left->getShape();
323 DataTypes::ShapeType out_shape;
324 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
325 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
326 int rank=left->getRank();
327 if (rank<2) {
328 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
329 }
330 if (axis0<0 || axis0>rank-1) {
331 stringstream e;
332 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
333 throw DataException(e.str());
334 }
335 if (axis1<0 || axis1>rank-1) {
336 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
337 throw DataException(e.str());
338 }
339 if (axis0 == axis1) {
340 throw DataException("Error - Data::swapaxes: axis indices must be different.");
341 }
342 if (axis0 > axis1) {
343 axis0_tmp=axis1;
344 axis1_tmp=axis0;
345 } else {
346 axis0_tmp=axis0;
347 axis1_tmp=axis1;
348 }
349 for (int i=0; i<rank; i++) {
350 if (i == axis0_tmp) {
351 out_shape.push_back(s[axis1_tmp]);
352 } else if (i == axis1_tmp) {
353 out_shape.push_back(s[axis0_tmp]);
354 } else {
355 out_shape.push_back(s[i]);
356 }
357 }
358 return out_shape;
359 }
360
361
362 // determine the output shape for the general tensor product operation
363 // the additional parameters return information required later for the product
364 // the majority of this code is copy pasted from C_General_Tensor_Product
365 DataTypes::ShapeType
366 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
367 {
368
369 // Get rank and shape of inputs
370 int rank0 = left->getRank();
371 int rank1 = right->getRank();
372 const DataTypes::ShapeType& shape0 = left->getShape();
373 const DataTypes::ShapeType& shape1 = right->getShape();
374
375 // Prepare for the loops of the product and verify compatibility of shapes
376 int start0=0, start1=0;
377 if (transpose == 0) {}
378 else if (transpose == 1) { start0 = axis_offset; }
379 else if (transpose == 2) { start1 = rank1-axis_offset; }
380 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
381
382 if (rank0<axis_offset)
383 {
384 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
385 }
386
387 // Adjust the shapes for transpose
388 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
389 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
390 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
391 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
392
393 // Prepare for the loops of the product
394 SL=1, SM=1, SR=1;
395 for (int i=0; i<rank0-axis_offset; i++) {
396 SL *= tmpShape0[i];
397 }
398 for (int i=rank0-axis_offset; i<rank0; i++) {
399 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
400 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
401 }
402 SM *= tmpShape0[i];
403 }
404 for (int i=axis_offset; i<rank1; i++) {
405 SR *= tmpShape1[i];
406 }
407
408 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
409 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
410 { // block to limit the scope of out_index
411 int out_index=0;
412 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
413 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
414 }
415
416 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
417 {
418 ostringstream os;
419 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
420 throw DataException(os.str());
421 }
422
423 return shape2;
424 }
425
426 } // end anonymous namespace
427
428
429
430 // Return a string representing the operation
431 const std::string&
432 opToString(ES_optype op)
433 {
434 if (op<0 || op>=ES_opcount)
435 {
436 op=UNKNOWNOP;
437 }
438 return ES_opstrings[op];
439 }
440
441 void DataLazy::LazyNodeSetup()
442 {
443 #ifdef _OPENMP
444 int numthreads=omp_get_max_threads();
445 m_samples.resize(numthreads*m_samplesize);
446 m_sampleids=new int[numthreads];
447 for (int i=0;i<numthreads;++i)
448 {
449 m_sampleids[i]=-1;
450 }
451 #else
452 m_samples.resize(m_samplesize);
453 m_sampleids=new int[1];
454 m_sampleids[0]=-1;
455 #endif // _OPENMP
456 }
457
458
459 // Creates an identity node
460 DataLazy::DataLazy(DataAbstract_ptr p)
461 : parent(p->getFunctionSpace(),p->getShape())
462 ,m_sampleids(0),
463 m_samples(1)
464 {
465 if (p->isLazy())
466 {
467 // I don't want identity of Lazy.
468 // Question: Why would that be so bad?
469 // Answer: We assume that the child of ID is something we can call getVector on
470 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
471 }
472 else
473 {
474 p->makeLazyShared();
475 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
476 makeIdentity(dr);
477 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
478 }
479 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
480 }
481
482 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
484 m_op(op),
485 m_axis_offset(0),
486 m_transpose(0),
487 m_SL(0), m_SM(0), m_SR(0)
488 {
489 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
490 {
491 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
492 }
493
494 DataLazy_ptr lleft;
495 if (!left->isLazy())
496 {
497 lleft=DataLazy_ptr(new DataLazy(left));
498 }
499 else
500 {
501 lleft=dynamic_pointer_cast<DataLazy>(left);
502 }
503 m_readytype=lleft->m_readytype;
504 m_left=lleft;
505 m_samplesize=getNumDPPSample()*getNoValues();
506 m_children=m_left->m_children+1;
507 m_height=m_left->m_height+1;
508 LazyNodeSetup();
509 SIZELIMIT
510 }
511
512
513 // In this constructor we need to consider interpolation
514 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
515 : parent(resultFS(left,right,op), resultShape(left,right,op)),
516 m_op(op),
517 m_SL(0), m_SM(0), m_SR(0)
518 {
519 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
520 if ((getOpgroup(op)!=G_BINARY))
521 {
522 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
523 }
524
525 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
526 {
527 FunctionSpace fs=getFunctionSpace();
528 Data ltemp(left);
529 Data tmp(ltemp,fs);
530 left=tmp.borrowDataPtr();
531 }
532 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
533 {
534 Data tmp(Data(right),getFunctionSpace());
535 right=tmp.borrowDataPtr();
536 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
537 }
538 left->operandCheck(*right);
539
540 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
541 {
542 m_left=dynamic_pointer_cast<DataLazy>(left);
543 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
544 }
545 else
546 {
547 m_left=DataLazy_ptr(new DataLazy(left));
548 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
549 }
550 if (right->isLazy())
551 {
552 m_right=dynamic_pointer_cast<DataLazy>(right);
553 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
554 }
555 else
556 {
557 m_right=DataLazy_ptr(new DataLazy(right));
558 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
559 }
560 char lt=m_left->m_readytype;
561 char rt=m_right->m_readytype;
562 if (lt=='E' || rt=='E')
563 {
564 m_readytype='E';
565 }
566 else if (lt=='T' || rt=='T')
567 {
568 m_readytype='T';
569 }
570 else
571 {
572 m_readytype='C';
573 }
574 m_samplesize=getNumDPPSample()*getNoValues();
575 m_children=m_left->m_children+m_right->m_children+2;
576 m_height=max(m_left->m_height,m_right->m_height)+1;
577 LazyNodeSetup();
578 SIZELIMIT
579 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
580 }
581
582 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
583 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
584 m_op(op),
585 m_axis_offset(axis_offset),
586 m_transpose(transpose)
587 {
588 if ((getOpgroup(op)!=G_TENSORPROD))
589 {
590 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
591 }
592 if ((transpose>2) || (transpose<0))
593 {
594 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
595 }
596 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
597 {
598 FunctionSpace fs=getFunctionSpace();
599 Data ltemp(left);
600 Data tmp(ltemp,fs);
601 left=tmp.borrowDataPtr();
602 }
603 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
604 {
605 Data tmp(Data(right),getFunctionSpace());
606 right=tmp.borrowDataPtr();
607 }
608 // left->operandCheck(*right);
609
610 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
611 {
612 m_left=dynamic_pointer_cast<DataLazy>(left);
613 }
614 else
615 {
616 m_left=DataLazy_ptr(new DataLazy(left));
617 }
618 if (right->isLazy())
619 {
620 m_right=dynamic_pointer_cast<DataLazy>(right);
621 }
622 else
623 {
624 m_right=DataLazy_ptr(new DataLazy(right));
625 }
626 char lt=m_left->m_readytype;
627 char rt=m_right->m_readytype;
628 if (lt=='E' || rt=='E')
629 {
630 m_readytype='E';
631 }
632 else if (lt=='T' || rt=='T')
633 {
634 m_readytype='T';
635 }
636 else
637 {
638 m_readytype='C';
639 }
640 m_samplesize=getNumDPPSample()*getNoValues();
641 m_children=m_left->m_children+m_right->m_children+2;
642 m_height=max(m_left->m_height,m_right->m_height)+1;
643 LazyNodeSetup();
644 SIZELIMIT
645 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
646 }
647
648
649 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
650 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
651 m_op(op),
652 m_axis_offset(axis_offset),
653 m_transpose(0),
654 m_tol(0)
655 {
656 if ((getOpgroup(op)!=G_NP1OUT_P))
657 {
658 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
659 }
660 DataLazy_ptr lleft;
661 if (!left->isLazy())
662 {
663 lleft=DataLazy_ptr(new DataLazy(left));
664 }
665 else
666 {
667 lleft=dynamic_pointer_cast<DataLazy>(left);
668 }
669 m_readytype=lleft->m_readytype;
670 m_left=lleft;
671 m_samplesize=getNumDPPSample()*getNoValues();
672 m_children=m_left->m_children+1;
673 m_height=m_left->m_height+1;
674 LazyNodeSetup();
675 SIZELIMIT
676 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
677 }
678
679 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
680 : parent(left->getFunctionSpace(), left->getShape()),
681 m_op(op),
682 m_axis_offset(0),
683 m_transpose(0),
684 m_tol(tol)
685 {
686 if ((getOpgroup(op)!=G_UNARY_P))
687 {
688 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
689 }
690 DataLazy_ptr lleft;
691 if (!left->isLazy())
692 {
693 lleft=DataLazy_ptr(new DataLazy(left));
694 }
695 else
696 {
697 lleft=dynamic_pointer_cast<DataLazy>(left);
698 }
699 m_readytype=lleft->m_readytype;
700 m_left=lleft;
701 m_samplesize=getNumDPPSample()*getNoValues();
702 m_children=m_left->m_children+1;
703 m_height=m_left->m_height+1;
704 LazyNodeSetup();
705 SIZELIMIT
706 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
707 }
708
709
710 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
711 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
712 m_op(op),
713 m_axis_offset(axis0),
714 m_transpose(axis1),
715 m_tol(0)
716 {
717 if ((getOpgroup(op)!=G_NP1OUT_2P))
718 {
719 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
720 }
721 DataLazy_ptr lleft;
722 if (!left->isLazy())
723 {
724 lleft=DataLazy_ptr(new DataLazy(left));
725 }
726 else
727 {
728 lleft=dynamic_pointer_cast<DataLazy>(left);
729 }
730 m_readytype=lleft->m_readytype;
731 m_left=lleft;
732 m_samplesize=getNumDPPSample()*getNoValues();
733 m_children=m_left->m_children+1;
734 m_height=m_left->m_height+1;
735 LazyNodeSetup();
736 SIZELIMIT
737 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
738 }
739
740 DataLazy::~DataLazy()
741 {
742 delete[] m_sampleids;
743 }
744
745
746 /*
747 \brief Evaluates the expression using methods on Data.
748 This does the work for the collapse method.
749 For reasons of efficiency do not call this method on DataExpanded nodes.
750 */
751 DataReady_ptr
752 DataLazy::collapseToReady()
753 {
754 if (m_readytype=='E')
755 { // this is more an efficiency concern than anything else
756 throw DataException("Programmer Error - do not use collapse on Expanded data.");
757 }
758 if (m_op==IDENTITY)
759 {
760 return m_id;
761 }
762 DataReady_ptr pleft=m_left->collapseToReady();
763 Data left(pleft);
764 Data right;
765 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
766 {
767 right=Data(m_right->collapseToReady());
768 }
769 Data result;
770 switch(m_op)
771 {
772 case ADD:
773 result=left+right;
774 break;
775 case SUB:
776 result=left-right;
777 break;
778 case MUL:
779 result=left*right;
780 break;
781 case DIV:
782 result=left/right;
783 break;
784 case SIN:
785 result=left.sin();
786 break;
787 case COS:
788 result=left.cos();
789 break;
790 case TAN:
791 result=left.tan();
792 break;
793 case ASIN:
794 result=left.asin();
795 break;
796 case ACOS:
797 result=left.acos();
798 break;
799 case ATAN:
800 result=left.atan();
801 break;
802 case SINH:
803 result=left.sinh();
804 break;
805 case COSH:
806 result=left.cosh();
807 break;
808 case TANH:
809 result=left.tanh();
810 break;
811 case ERF:
812 result=left.erf();
813 break;
814 case ASINH:
815 result=left.asinh();
816 break;
817 case ACOSH:
818 result=left.acosh();
819 break;
820 case ATANH:
821 result=left.atanh();
822 break;
823 case LOG10:
824 result=left.log10();
825 break;
826 case LOG:
827 result=left.log();
828 break;
829 case SIGN:
830 result=left.sign();
831 break;
832 case ABS:
833 result=left.abs();
834 break;
835 case NEG:
836 result=left.neg();
837 break;
838 case POS:
839 // it doesn't mean anything for delayed.
840 // it will just trigger a deep copy of the lazy object
841 throw DataException("Programmer error - POS not supported for lazy data.");
842 break;
843 case EXP:
844 result=left.exp();
845 break;
846 case SQRT:
847 result=left.sqrt();
848 break;
849 case RECIP:
850 result=left.oneOver();
851 break;
852 case GZ:
853 result=left.wherePositive();
854 break;
855 case LZ:
856 result=left.whereNegative();
857 break;
858 case GEZ:
859 result=left.whereNonNegative();
860 break;
861 case LEZ:
862 result=left.whereNonPositive();
863 break;
864 case NEZ:
865 result=left.whereNonZero(m_tol);
866 break;
867 case EZ:
868 result=left.whereZero(m_tol);
869 break;
870 case SYM:
871 result=left.symmetric();
872 break;
873 case NSYM:
874 result=left.nonsymmetric();
875 break;
876 case PROD:
877 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
878 break;
879 case TRANS:
880 result=left.transpose(m_axis_offset);
881 break;
882 case TRACE:
883 result=left.trace(m_axis_offset);
884 break;
885 case SWAP:
886 result=left.swapaxes(m_axis_offset, m_transpose);
887 break;
888 case MINVAL:
889 result=left.minval();
890 break;
891 case MAXVAL:
892 result=left.minval();
893 break;
894 default:
895 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
896 }
897 return result.borrowReadyPtr();
898 }
899
900 /*
901 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
902 This method uses the original methods on the Data class to evaluate the expressions.
903 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
904 the purpose of using DataLazy in the first place).
905 */
906 void
907 DataLazy::collapse()
908 {
909 if (m_op==IDENTITY)
910 {
911 return;
912 }
913 if (m_readytype=='E')
914 { // this is more an efficiency concern than anything else
915 throw DataException("Programmer Error - do not use collapse on Expanded data.");
916 }
917 m_id=collapseToReady();
918 m_op=IDENTITY;
919 }
920
921
922
923
924
925
926 #define PROC_OP(TYPE,X) \
927 for (int j=0;j<onumsteps;++j)\
928 {\
929 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
930 { \
931 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
932 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
933 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
934 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
935 lroffset+=leftstep; \
936 rroffset+=rightstep; \
937 }\
938 lroffset+=oleftstep;\
939 rroffset+=orightstep;\
940 }
941
942
943 // The result will be stored in m_samples
944 // The return value is a pointer to the DataVector, offset is the offset within the return value
945 const DataTypes::ValueType*
946 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
947 {
948 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
949 // collapse so we have a 'E' node or an IDENTITY for some other type
950 if (m_readytype!='E' && m_op!=IDENTITY)
951 {
952 collapse();
953 }
954 if (m_op==IDENTITY)
955 {
956 const ValueType& vec=m_id->getVectorRO();
957 roffset=m_id->getPointOffset(sampleNo, 0);
958 #ifdef LAZY_STACK_PROF
959 int x;
960 if (&x<stackend[omp_get_thread_num()])
961 {
962 stackend[omp_get_thread_num()]=&x;
963 }
964 #endif
965 return &(vec);
966 }
967 if (m_readytype!='E')
968 {
969 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
970 }
971 if (m_sampleids[tid]==sampleNo)
972 {
973 roffset=tid*m_samplesize;
974 return &(m_samples); // sample is already resolved
975 }
976 m_sampleids[tid]=sampleNo;
977 switch (getOpgroup(m_op))
978 {
979 case G_UNARY:
980 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
981 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
982 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
983 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987 default:
988 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
989 }
990 }
991
992 const DataTypes::ValueType*
993 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
994 {
995 // we assume that any collapsing has been done before we get here
996 // since we only have one argument we don't need to think about only
997 // processing single points.
998 // we will also know we won't get identity nodes
999 if (m_readytype!='E')
1000 {
1001 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1002 }
1003 if (m_op==IDENTITY)
1004 {
1005 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1006 }
1007 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1008 const double* left=&((*leftres)[roffset]);
1009 roffset=m_samplesize*tid;
1010 double* result=&(m_samples[roffset]);
1011 switch (m_op)
1012 {
1013 case SIN:
1014 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1015 break;
1016 case COS:
1017 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1018 break;
1019 case TAN:
1020 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1021 break;
1022 case ASIN:
1023 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1024 break;
1025 case ACOS:
1026 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1027 break;
1028 case ATAN:
1029 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1030 break;
1031 case SINH:
1032 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1033 break;
1034 case COSH:
1035 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1036 break;
1037 case TANH:
1038 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1039 break;
1040 case ERF:
1041 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1042 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1043 #else
1044 tensor_unary_operation(m_samplesize, left, result, ::erf);
1045 break;
1046 #endif
1047 case ASINH:
1048 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1049 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1050 #else
1051 tensor_unary_operation(m_samplesize, left, result, ::asinh);
1052 #endif
1053 break;
1054 case ACOSH:
1055 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1056 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1057 #else
1058 tensor_unary_operation(m_samplesize, left, result, ::acosh);
1059 #endif
1060 break;
1061 case ATANH:
1062 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1063 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1064 #else
1065 tensor_unary_operation(m_samplesize, left, result, ::atanh);
1066 #endif
1067 break;
1068 case LOG10:
1069 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1070 break;
1071 case LOG:
1072 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1073 break;
1074 case SIGN:
1075 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1076 break;
1077 case ABS:
1078 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1079 break;
1080 case NEG:
1081 tensor_unary_operation(m_samplesize, left, result, negate<double>());
1082 break;
1083 case POS:
1084 // it doesn't mean anything for delayed.
1085 // it will just trigger a deep copy of the lazy object
1086 throw DataException("Programmer error - POS not supported for lazy data.");
1087 break;
1088 case EXP:
1089 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1090 break;
1091 case SQRT:
1092 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1093 break;
1094 case RECIP:
1095 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1096 break;
1097 case GZ:
1098 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1099 break;
1100 case LZ:
1101 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1102 break;
1103 case GEZ:
1104 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1105 break;
1106 case LEZ:
1107 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1108 break;
1109 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1110 case NEZ:
1111 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1112 break;
1113 case EZ:
1114 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1115 break;
1116
1117 default:
1118 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1119 }
1120 return &(m_samples);
1121 }
1122
1123
1124 const DataTypes::ValueType*
1125 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1126 {
1127 // we assume that any collapsing has been done before we get here
1128 // since we only have one argument we don't need to think about only
1129 // processing single points.
1130 // we will also know we won't get identity nodes
1131 if (m_readytype!='E')
1132 {
1133 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134 }
1135 if (m_op==IDENTITY)
1136 {
1137 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1138 }
1139 size_t loffset=0;
1140 const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1141
1142 roffset=m_samplesize*tid;
1143 unsigned int ndpps=getNumDPPSample();
1144 unsigned int psize=DataTypes::noValues(getShape());
1145 double* result=&(m_samples[roffset]);
1146 switch (m_op)
1147 {
1148 case MINVAL:
1149 {
1150 for (unsigned int z=0;z<ndpps;++z)
1151 {
1152 FMin op;
1153 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1154 loffset+=psize;
1155 result++;
1156 }
1157 }
1158 break;
1159 case MAXVAL:
1160 {
1161 for (unsigned int z=0;z<ndpps;++z)
1162 {
1163 FMax op;
1164 *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1165 loffset+=psize;
1166 result++;
1167 }
1168 }
1169 break;
1170 default:
1171 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1172 }
1173 return &(m_samples);
1174 }
1175
1176 const DataTypes::ValueType*
1177 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1178 {
1179 // we assume that any collapsing has been done before we get here
1180 // since we only have one argument we don't need to think about only
1181 // processing single points.
1182 if (m_readytype!='E')
1183 {
1184 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1185 }
1186 if (m_op==IDENTITY)
1187 {
1188 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1189 }
1190 size_t subroffset;
1191 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1192 roffset=m_samplesize*tid;
1193 size_t loop=0;
1194 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1195 size_t step=getNoValues();
1196 size_t offset=roffset;
1197 switch (m_op)
1198 {
1199 case SYM:
1200 for (loop=0;loop<numsteps;++loop)
1201 {
1202 DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1203 subroffset+=step;
1204 offset+=step;
1205 }
1206 break;
1207 case NSYM:
1208 for (loop=0;loop<numsteps;++loop)
1209 {
1210 DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1211 subroffset+=step;
1212 offset+=step;
1213 }
1214 break;
1215 default:
1216 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1217 }
1218 return &m_samples;
1219 }
1220
1221 const DataTypes::ValueType*
1222 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1223 {
1224 // we assume that any collapsing has been done before we get here
1225 // since we only have one argument we don't need to think about only
1226 // processing single points.
1227 if (m_readytype!='E')
1228 {
1229 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1230 }
1231 if (m_op==IDENTITY)
1232 {
1233 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1234 }
1235 size_t subroffset;
1236 size_t offset;
1237 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1238 roffset=m_samplesize*tid;
1239 offset=roffset;
1240 size_t loop=0;
1241 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1242 size_t outstep=getNoValues();
1243 size_t instep=m_left->getNoValues();
1244 switch (m_op)
1245 {
1246 case TRACE:
1247 for (loop=0;loop<numsteps;++loop)
1248 {
1249 DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1250 subroffset+=instep;
1251 offset+=outstep;
1252 }
1253 break;
1254 case TRANS:
1255 for (loop=0;loop<numsteps;++loop)
1256 {
1257 DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1258 subroffset+=instep;
1259 offset+=outstep;
1260 }
1261 break;
1262 default:
1263 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1264 }
1265 return &m_samples;
1266 }
1267
1268
1269 const DataTypes::ValueType*
1270 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1271 {
1272 if (m_readytype!='E')
1273 {
1274 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1275 }
1276 if (m_op==IDENTITY)
1277 {
1278 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1279 }
1280 size_t subroffset;
1281 size_t offset;
1282 const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1283 roffset=m_samplesize*tid;
1284 offset=roffset;
1285 size_t loop=0;
1286 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1287 size_t outstep=getNoValues();
1288 size_t instep=m_left->getNoValues();
1289 switch (m_op)
1290 {
1291 case SWAP:
1292 for (loop=0;loop<numsteps;++loop)
1293 {
1294 DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1295 subroffset+=instep;
1296 offset+=outstep;
1297 }
1298 break;
1299 default:
1300 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1301 }
1302 return &m_samples;
1303 }
1304
1305
1306
1307 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1308 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1309 // If both children are expanded, then we can process them in a single operation (we treat
1310 // the whole sample as one big datapoint.
1311 // If one of the children is not expanded, then we need to treat each point in the sample
1312 // individually.
1313 // There is an additional complication when scalar operations are considered.
1314 // For example, 2+Vector.
1315 // In this case each double within the point is treated individually
1316 const DataTypes::ValueType*
1317 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1318 {
1319 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1320
1321 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1322 // first work out which of the children are expanded
1323 bool leftExp=(m_left->m_readytype=='E');
1324 bool rightExp=(m_right->m_readytype=='E');
1325 if (!leftExp && !rightExp)
1326 {
1327 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1328 }
1329 bool leftScalar=(m_left->getRank()==0);
1330 bool rightScalar=(m_right->getRank()==0);
1331 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1332 {
1333 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1334 }
1335 size_t leftsize=m_left->getNoValues();
1336 size_t rightsize=m_right->getNoValues();
1337 size_t chunksize=1; // how many doubles will be processed in one go
1338 int leftstep=0; // how far should the left offset advance after each step
1339 int rightstep=0;
1340 int numsteps=0; // total number of steps for the inner loop
1341 int oleftstep=0; // the o variables refer to the outer loop
1342 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1343 int onumsteps=1;
1344
1345 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1346 bool RES=(rightExp && rightScalar);
1347 bool LS=(!leftExp && leftScalar); // left is a single scalar
1348 bool RS=(!rightExp && rightScalar);
1349 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1350 bool RN=(!rightExp && !rightScalar);
1351 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1352 bool REN=(rightExp && !rightScalar);
1353
1354 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1355 {
1356 chunksize=m_left->getNumDPPSample()*leftsize;
1357 leftstep=0;
1358 rightstep=0;
1359 numsteps=1;
1360 }
1361 else if (LES || RES)
1362 {
1363 chunksize=1;
1364 if (LES) // left is an expanded scalar
1365 {
1366 if (RS)
1367 {
1368 leftstep=1;
1369 rightstep=0;
1370 numsteps=m_left->getNumDPPSample();
1371 }
1372 else // RN or REN
1373 {
1374 leftstep=0;
1375 oleftstep=1;
1376 rightstep=1;
1377 orightstep=(RN ? -(int)rightsize : 0);
1378 numsteps=rightsize;
1379 onumsteps=m_left->getNumDPPSample();
1380 }
1381 }
1382 else // right is an expanded scalar
1383 {
1384 if (LS)
1385 {
1386 rightstep=1;
1387 leftstep=0;
1388 numsteps=m_right->getNumDPPSample();
1389 }
1390 else
1391 {
1392 rightstep=0;
1393 orightstep=1;
1394 leftstep=1;
1395 oleftstep=(LN ? -(int)leftsize : 0);
1396 numsteps=leftsize;
1397 onumsteps=m_right->getNumDPPSample();
1398 }
1399 }
1400 }
1401 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1402 {
1403 if (LEN) // and Right will be a single value
1404 {
1405 chunksize=rightsize;
1406 leftstep=rightsize;
1407 rightstep=0;
1408 numsteps=m_left->getNumDPPSample();
1409 if (RS)
1410 {
1411 numsteps*=leftsize;
1412 }
1413 }
1414 else // REN
1415 {
1416 chunksize=leftsize;
1417 rightstep=leftsize;
1418 leftstep=0;
1419 numsteps=m_right->getNumDPPSample();
1420 if (LS)
1421 {
1422 numsteps*=rightsize;
1423 }
1424 }
1425 }
1426
1427 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1428 // Get the values of sub-expressions
1429 const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1430 const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1431 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1432 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1433 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1434 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1435 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1436 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1437 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1438
1439 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1440 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1441
1442
1443 roffset=m_samplesize*tid;
1444 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we recieved
1445 switch(m_op)
1446 {
1447 case ADD:
1448 PROC_OP(NO_ARG,plus<double>());
1449 break;
1450 case SUB:
1451 PROC_OP(NO_ARG,minus<double>());
1452 break;
1453 case MUL:
1454 PROC_OP(NO_ARG,multiplies<double>());
1455 break;
1456 case DIV:
1457 PROC_OP(NO_ARG,divides<double>());
1458 break;
1459 case POW:
1460 PROC_OP(double (double,double),::pow);
1461 break;
1462 default:
1463 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1464 }
1465 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1466 return &m_samples;
1467 }
1468
1469
1470 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1471 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1472 // unlike the other resolve helpers, we must treat these datapoints separately.
1473 const DataTypes::ValueType*
1474 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1475 {
1476 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1477
1478 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1479 // first work out which of the children are expanded
1480 bool leftExp=(m_left->m_readytype=='E');
1481 bool rightExp=(m_right->m_readytype=='E');
1482 int steps=getNumDPPSample();
1483 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1484 int rightStep=(rightExp?m_right->getNoValues() : 0);
1485
1486 int resultStep=getNoValues();
1487 roffset=m_samplesize*tid;
1488 size_t offset=roffset;
1489
1490 const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1491
1492 const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1493
1494 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1495 cout << getNoValues() << endl;)
1496
1497
1498 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1499 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1500 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1501 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1502 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1503 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1504 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1505
1506 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we recieved
1507 switch(m_op)
1508 {
1509 case PROD:
1510 for (int i=0;i<steps;++i,resultp+=resultStep)
1511 {
1512 const double *ptr_0 = &((*left)[lroffset]);
1513 const double *ptr_1 = &((*right)[rroffset]);
1514
1515 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1516 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1517
1518 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1519
1520 lroffset+=leftStep;
1521 rroffset+=rightStep;
1522 }
1523 break;
1524 default:
1525 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1526 }
1527 roffset=offset;
1528 return &m_samples;
1529 }
1530
1531
1532 const DataTypes::ValueType*
1533 DataLazy::resolveSample(int sampleNo, size_t& roffset)
1534 {
1535 #ifdef _OPENMP
1536 int tid=omp_get_thread_num();
1537 #else
1538 int tid=0;
1539 #endif
1540
1541 #ifdef LAZY_STACK_PROF
1542 stackstart[tid]=&tid;
1543 stackend[tid]=&tid;
1544 const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1545 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1546 #pragma omp critical
1547 if (d>maxstackuse)
1548 {
1549 cout << "Max resolve Stack use " << d << endl;
1550 maxstackuse=d;
1551 }
1552 return r;
1553 #else
1554 return resolveNodeSample(tid, sampleNo, roffset);
1555 #endif
1556 }
1557
1558
1559 // This needs to do the work of the identity constructor
1560 void
1561 DataLazy::resolveToIdentity()
1562 {
1563 if (m_op==IDENTITY)
1564 return;
1565 DataReady_ptr p=resolveNodeWorker();
1566 makeIdentity(p);
1567 }
1568
1569 void DataLazy::makeIdentity(const DataReady_ptr& p)
1570 {
1571 m_op=IDENTITY;
1572 m_axis_offset=0;
1573 m_transpose=0;
1574 m_SL=m_SM=m_SR=0;
1575 m_children=m_height=0;
1576 m_id=p;
1577 if(p->isConstant()) {m_readytype='C';}
1578 else if(p->isExpanded()) {m_readytype='E';}
1579 else if (p->isTagged()) {m_readytype='T';}
1580 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1581 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1582 m_left.reset();
1583 m_right.reset();
1584 }
1585
1586
1587 DataReady_ptr
1588 DataLazy::resolve()
1589 {
1590 resolveToIdentity();
1591 return m_id;
1592 }
1593
1594 // This version of resolve uses storage in each node to hold results
1595 DataReady_ptr
1596 DataLazy::resolveNodeWorker()
1597 {
1598 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1599 {
1600 collapse();
1601 }
1602 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1603 {
1604 return m_id;
1605 }
1606 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1607 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1608 ValueType& resvec=result->getVectorRW();
1609 DataReady_ptr resptr=DataReady_ptr(result);
1610
1611 int sample;
1612 int totalsamples=getNumSamples();
1613 const ValueType* res=0; // Storage for answer
1614 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1615 #pragma omp parallel private(sample,res)
1616 {
1617 size_t roffset=0;
1618 #ifdef LAZY_STACK_PROF
1619 stackstart[omp_get_thread_num()]=&roffset;
1620 stackend[omp_get_thread_num()]=&roffset;
1621 #endif
1622 #pragma omp for schedule(static)
1623 for (sample=0;sample<totalsamples;++sample)
1624 {
1625 roffset=0;
1626 #ifdef _OPENMP
1627 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1628 #else
1629 res=resolveNodeSample(0,sample,roffset);
1630 #endif
1631 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1632 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1633 DataVector::size_type outoffset=result->getPointOffset(sample,0);
1634 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1635 }
1636 }
1637 #ifdef LAZY_STACK_PROF
1638 for (int i=0;i<getNumberOfThreads();++i)
1639 {
1640 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1641 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1642 if (r>maxstackuse)
1643 {
1644 maxstackuse=r;
1645 }
1646 }
1647 cout << "Max resolve Stack use=" << maxstackuse << endl;
1648 #endif
1649 return resptr;
1650 }
1651
1652 std::string
1653 DataLazy::toString() const
1654 {
1655 ostringstream oss;
1656 oss << "Lazy Data:";
1657 if (escriptParams.getPRINT_LAZY_TREE()==0)
1658 {
1659 intoString(oss);
1660 }
1661 else
1662 {
1663 oss << endl;
1664 intoTreeString(oss,"");
1665 }
1666 return oss.str();
1667 }
1668
1669
1670 void
1671 DataLazy::intoString(ostringstream& oss) const
1672 {
1673 // oss << "[" << m_children <<";"<<m_height <<"]";
1674 switch (getOpgroup(m_op))
1675 {
1676 case G_IDENTITY:
1677 if (m_id->isExpanded())
1678 {
1679 oss << "E";
1680 }
1681 else if (m_id->isTagged())
1682 {
1683 oss << "T";
1684 }
1685 else if (m_id->isConstant())
1686 {
1687 oss << "C";
1688 }
1689 else
1690 {
1691 oss << "?";
1692 }
1693 oss << '@' << m_id.get();
1694 break;
1695 case G_BINARY:
1696 oss << '(';
1697 m_left->intoString(oss);
1698 oss << ' ' << opToString(m_op) << ' ';
1699 m_right->intoString(oss);
1700 oss << ')';
1701 break;
1702 case G_UNARY:
1703 case G_UNARY_P:
1704 case G_NP1OUT:
1705 case G_NP1OUT_P:
1706 case G_REDUCTION:
1707 oss << opToString(m_op) << '(';
1708 m_left->intoString(oss);
1709 oss << ')';
1710 break;
1711 case G_TENSORPROD:
1712 oss << opToString(m_op) << '(';
1713 m_left->intoString(oss);
1714 oss << ", ";
1715 m_right->intoString(oss);
1716 oss << ')';
1717 break;
1718 case G_NP1OUT_2P:
1719 oss << opToString(m_op) << '(';
1720 m_left->intoString(oss);
1721 oss << ", " << m_axis_offset << ", " << m_transpose;
1722 oss << ')';
1723 break;
1724 default:
1725 oss << "UNKNOWN";
1726 }
1727 }
1728
1729
1730 void
1731 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1732 {
1733 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1734 switch (getOpgroup(m_op))
1735 {
1736 case G_IDENTITY:
1737 if (m_id->isExpanded())
1738 {
1739 oss << "E";
1740 }
1741 else if (m_id->isTagged())
1742 {
1743 oss << "T";
1744 }
1745 else if (m_id->isConstant())
1746 {
1747 oss << "C";
1748 }
1749 else
1750 {
1751 oss << "?";
1752 }
1753 oss << '@' << m_id.get() << endl;
1754 break;
1755 case G_BINARY:
1756 oss << opToString(m_op) << endl;
1757 indent+='.';
1758 m_left->intoTreeString(oss, indent);
1759 m_right->intoTreeString(oss, indent);
1760 break;
1761 case G_UNARY:
1762 case G_UNARY_P:
1763 case G_NP1OUT:
1764 case G_NP1OUT_P:
1765 case G_REDUCTION:
1766 oss << opToString(m_op) << endl;
1767 indent+='.';
1768 m_left->intoTreeString(oss, indent);
1769 break;
1770 case G_TENSORPROD:
1771 oss << opToString(m_op) << endl;
1772 indent+='.';
1773 m_left->intoTreeString(oss, indent);
1774 m_right->intoTreeString(oss, indent);
1775 break;
1776 case G_NP1OUT_2P:
1777 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1778 indent+='.';
1779 m_left->intoTreeString(oss, indent);
1780 break;
1781 default:
1782 oss << "UNKNOWN";
1783 }
1784 }
1785
1786
1787 DataAbstract*
1788 DataLazy::deepCopy()
1789 {
1790 switch (getOpgroup(m_op))
1791 {
1792 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1793 case G_UNARY:
1794 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1795 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1796 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1797 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1798 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1799 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1800 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1801 default:
1802 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1803 }
1804 }
1805
1806
1807
1808 // There is no single, natural interpretation of getLength on DataLazy.
1809 // Instances of DataReady can look at the size of their vectors.
1810 // For lazy though, it could be the size the data would be if it were resolved;
1811 // or it could be some function of the lengths of the DataReady instances which
1812 // form part of the expression.
1813 // Rather than have people making assumptions, I have disabled the method.
1814 DataTypes::ValueType::size_type
1815 DataLazy::getLength() const
1816 {
1817 throw DataException("getLength() does not make sense for lazy data.");
1818 }
1819
1820
1821 DataAbstract*
1822 DataLazy::getSlice(const DataTypes::RegionType& region) const
1823 {
1824 throw DataException("getSlice - not implemented for Lazy objects.");
1825 }
1826
1827
1828 // To do this we need to rely on our child nodes
1829 DataTypes::ValueType::size_type
1830 DataLazy::getPointOffset(int sampleNo,
1831 int dataPointNo)
1832 {
1833 if (m_op==IDENTITY)
1834 {
1835 return m_id->getPointOffset(sampleNo,dataPointNo);
1836 }
1837 if (m_readytype!='E')
1838 {
1839 collapse();
1840 return m_id->getPointOffset(sampleNo,dataPointNo);
1841 }
1842 // at this point we do not have an identity node and the expression will be Expanded
1843 // so we only need to know which child to ask
1844 if (m_left->m_readytype=='E')
1845 {
1846 return m_left->getPointOffset(sampleNo,dataPointNo);
1847 }
1848 else
1849 {
1850 return m_right->getPointOffset(sampleNo,dataPointNo);
1851 }
1852 }
1853
1854 // To do this we need to rely on our child nodes
1855 DataTypes::ValueType::size_type
1856 DataLazy::getPointOffset(int sampleNo,
1857 int dataPointNo) const
1858 {
1859 if (m_op==IDENTITY)
1860 {
1861 return m_id->getPointOffset(sampleNo,dataPointNo);
1862 }
1863 if (m_readytype=='E')
1864 {
1865 // at this point we do not have an identity node and the expression will be Expanded
1866 // so we only need to know which child to ask
1867 if (m_left->m_readytype=='E')
1868 {
1869 return m_left->getPointOffset(sampleNo,dataPointNo);
1870 }
1871 else
1872 {
1873 return m_right->getPointOffset(sampleNo,dataPointNo);
1874 }
1875 }
1876 if (m_readytype=='C')
1877 {
1878 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1879 }
1880 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1881 }
1882
1883
1884 // I have decided to let Data:: handle this issue.
1885 void
1886 DataLazy::setToZero()
1887 {
1888 // DataTypes::ValueType v(getNoValues(),0);
1889 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1890 // m_op=IDENTITY;
1891 // m_right.reset();
1892 // m_left.reset();
1893 // m_readytype='C';
1894 // m_buffsRequired=1;
1895
1896 privdebug=privdebug; // to stop the compiler complaining about unused privdebug
1897 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1898 }
1899
1900 bool
1901 DataLazy::actsExpanded() const
1902 {
1903 return (m_readytype=='E');
1904 }
1905
1906 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26