/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6511 - (show annotations)
Fri Mar 3 01:41:39 2017 UTC (2 years ago) by jfenwick
File size: 62652 byte(s)
For some experiments

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "DataLazy.h"
18 #include "Data.h"
19 #include "DataTypes.h"
20 #include "EscriptParams.h"
21 #include "FunctionSpace.h"
22 #include "Utils.h"
23 #include "DataVectorOps.h"
24
25 #include <iomanip> // for some fancy formatting in debug
26
27 using namespace escript::DataTypes;
28
29 #define NO_ARG
30
31 // #define LAZYDEBUG(X) if (privdebug){X;}
32 #define LAZYDEBUG(X)
33 namespace
34 {
35 bool privdebug=false;
36
37 #define ENABLEDEBUG privdebug=true;
38 #define DISABLEDEBUG privdebug=false;
39 }
40
41 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42
43 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44
45
46 #define SIZELIMIT \
47 if (m_height > escript::escriptParams.getTooManyLevels()) {\
48 if (escript::escriptParams.getLazyVerbose()) {\
49 cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50 }\
51 resolveToIdentity();\
52 }
53
54 /*
55 How does DataLazy work?
56 ~~~~~~~~~~~~~~~~~~~~~~~
57
58 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59 denoted left and right.
60
61 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62 This means that all "internal" nodes in the structure are instances of DataLazy.
63
64 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65 Note that IDENTITY is not considered a unary operation.
66
67 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68 It must however form a DAG (directed acyclic graph).
69 I will refer to individual DataLazy objects with the structure as nodes.
70
71 Each node also stores:
72 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73 evaluated.
74 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75 evaluate the expression.
76 - m_samplesize ~ the number of doubles stored in a sample.
77
78 When a new node is created, the above values are computed based on the values in the child nodes.
79 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80
81 The resolve method, which produces a DataReady from a DataLazy, does the following:
82 1) Create a DataReady to hold the new result.
83 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85
86 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87
88 resolveSample returns a Vector* and an offset within that vector where the result is stored.
89 Normally, this would be v, but for identity nodes their internal vector is returned instead.
90
91 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92
93 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95
96 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97 1) Add to the ES_optype.
98 2) determine what opgroup your operation belongs to (X)
99 3) add a string for the op to the end of ES_opstrings
100 4) increase ES_opcount
101 5) add an entry (X) to opgroups
102 6) add an entry to the switch in collapseToReady
103 7) add an entry to resolveX
104 */
105
106
107 using namespace std;
108 using namespace boost;
109
110 namespace escript
111 {
112
113 namespace
114 {
115
116
117 // enabling this will print out when ever the maximum stacksize used by resolve increases
118 // it assumes _OPENMP is also in use
119 //#define LAZY_STACK_PROF
120
121
122
123 #ifndef _OPENMP
124 #ifdef LAZY_STACK_PROF
125 #undef LAZY_STACK_PROF
126 #endif
127 #endif
128
129
130 #ifdef LAZY_STACK_PROF
131 std::vector<void*> stackstart(getNumberOfThreads());
132 std::vector<void*> stackend(getNumberOfThreads());
133 size_t maxstackuse=0;
134 #endif
135
136
137 // return the FunctionSpace of the result of "left op right"
138 FunctionSpace
139 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140 {
141 // perhaps this should call interpolate and throw or something?
142 // maybe we need an interpolate node -
143 // that way, if interpolate is required in any other op we can just throw a
144 // programming error exception.
145
146 FunctionSpace l=left->getFunctionSpace();
147 FunctionSpace r=right->getFunctionSpace();
148 if (l!=r)
149 {
150 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
151 if (res==1)
152 {
153 return l;
154 }
155 if (res==-1)
156 {
157 return r;
158 }
159 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
160 }
161 return l;
162 }
163
164 // return the shape of the result of "left op right"
165 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
166 DataTypes::ShapeType
167 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
168 {
169 if (left->getShape()!=right->getShape())
170 {
171 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
172 {
173 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
174 }
175
176 if (left->getRank()==0) // we need to allow scalar * anything
177 {
178 return right->getShape();
179 }
180 if (right->getRank()==0)
181 {
182 return left->getShape();
183 }
184 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
185 }
186 return left->getShape();
187 }
188
189 // return the shape for "op left"
190
191 DataTypes::ShapeType
192 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
193 {
194 switch(op)
195 {
196 case TRANS:
197 { // for the scoping of variables
198 const DataTypes::ShapeType& s=left->getShape();
199 DataTypes::ShapeType sh;
200 int rank=left->getRank();
201 if (axis_offset<0 || axis_offset>rank)
202 {
203 stringstream e;
204 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
205 throw DataException(e.str());
206 }
207 for (int i=0; i<rank; i++)
208 {
209 int index = (axis_offset+i)%rank;
210 sh.push_back(s[index]); // Append to new shape
211 }
212 return sh;
213 }
214 break;
215 case TRACE:
216 {
217 int rank=left->getRank();
218 if (rank<2)
219 {
220 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
221 }
222 if ((axis_offset>rank-2) || (axis_offset<0))
223 {
224 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
225 }
226 if (rank==2)
227 {
228 return DataTypes::scalarShape;
229 }
230 else if (rank==3)
231 {
232 DataTypes::ShapeType sh;
233 if (axis_offset==0)
234 {
235 sh.push_back(left->getShape()[2]);
236 }
237 else // offset==1
238 {
239 sh.push_back(left->getShape()[0]);
240 }
241 return sh;
242 }
243 else if (rank==4)
244 {
245 DataTypes::ShapeType sh;
246 const DataTypes::ShapeType& s=left->getShape();
247 if (axis_offset==0)
248 {
249 sh.push_back(s[2]);
250 sh.push_back(s[3]);
251 }
252 else if (axis_offset==1)
253 {
254 sh.push_back(s[0]);
255 sh.push_back(s[3]);
256 }
257 else // offset==2
258 {
259 sh.push_back(s[0]);
260 sh.push_back(s[1]);
261 }
262 return sh;
263 }
264 else // unknown rank
265 {
266 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
267 }
268 }
269 break;
270 default:
271 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
272 }
273 }
274
275 DataTypes::ShapeType
276 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
277 {
278 // This code taken from the Data.cpp swapaxes() method
279 // Some of the checks are probably redundant here
280 int axis0_tmp,axis1_tmp;
281 const DataTypes::ShapeType& s=left->getShape();
282 DataTypes::ShapeType out_shape;
283 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
284 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
285 int rank=left->getRank();
286 if (rank<2) {
287 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
288 }
289 if (axis0<0 || axis0>rank-1) {
290 stringstream e;
291 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
292 throw DataException(e.str());
293 }
294 if (axis1<0 || axis1>rank-1) {
295 stringstream e;
296 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
297 throw DataException(e.str());
298 }
299 if (axis0 == axis1) {
300 throw DataException("Error - Data::swapaxes: axis indices must be different.");
301 }
302 if (axis0 > axis1) {
303 axis0_tmp=axis1;
304 axis1_tmp=axis0;
305 } else {
306 axis0_tmp=axis0;
307 axis1_tmp=axis1;
308 }
309 for (int i=0; i<rank; i++) {
310 if (i == axis0_tmp) {
311 out_shape.push_back(s[axis1_tmp]);
312 } else if (i == axis1_tmp) {
313 out_shape.push_back(s[axis0_tmp]);
314 } else {
315 out_shape.push_back(s[i]);
316 }
317 }
318 return out_shape;
319 }
320
321
322 // determine the output shape for the general tensor product operation
323 // the additional parameters return information required later for the product
324 // the majority of this code is copy pasted from C_General_Tensor_Product
325 DataTypes::ShapeType
326 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
327 {
328
329 // Get rank and shape of inputs
330 int rank0 = left->getRank();
331 int rank1 = right->getRank();
332 const DataTypes::ShapeType& shape0 = left->getShape();
333 const DataTypes::ShapeType& shape1 = right->getShape();
334
335 // Prepare for the loops of the product and verify compatibility of shapes
336 int start0=0, start1=0;
337 if (transpose == 0) {}
338 else if (transpose == 1) { start0 = axis_offset; }
339 else if (transpose == 2) { start1 = rank1-axis_offset; }
340 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
341
342 if (rank0<axis_offset)
343 {
344 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
345 }
346
347 // Adjust the shapes for transpose
348 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
349 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
350 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
351 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
352
353 // Prepare for the loops of the product
354 SL=1, SM=1, SR=1;
355 for (int i=0; i<rank0-axis_offset; i++) {
356 SL *= tmpShape0[i];
357 }
358 for (int i=rank0-axis_offset; i<rank0; i++) {
359 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
360 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
361 }
362 SM *= tmpShape0[i];
363 }
364 for (int i=axis_offset; i<rank1; i++) {
365 SR *= tmpShape1[i];
366 }
367
368 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
369 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
370 { // block to limit the scope of out_index
371 int out_index=0;
372 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
373 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
374 }
375
376 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
377 {
378 ostringstream os;
379 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
380 throw DataException(os.str());
381 }
382
383 return shape2;
384 }
385
386 } // end anonymous namespace
387
388 void DataLazy::LazyNodeSetup()
389 {
390 #ifdef _OPENMP
391 int numthreads=omp_get_max_threads();
392 m_samples.resize(numthreads*m_samplesize);
393 m_sampleids=new int[numthreads];
394 for (int i=0;i<numthreads;++i)
395 {
396 m_sampleids[i]=-1;
397 }
398 #else
399 m_samples.resize(m_samplesize);
400 m_sampleids=new int[1];
401 m_sampleids[0]=-1;
402 #endif // _OPENMP
403 }
404
405
406 // Creates an identity node
407 DataLazy::DataLazy(DataAbstract_ptr p)
408 : parent(p->getFunctionSpace(),p->getShape())
409 ,m_sampleids(0),
410 m_samples(1)
411 {
412 if (p->isLazy())
413 {
414 // I don't want identity of Lazy.
415 // Question: Why would that be so bad?
416 // Answer: We assume that the child of ID is something we can call getVector on
417 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
418 }
419 else
420 {
421 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
422 makeIdentity(dr);
423 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
424 }
425 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
426 }
427
428 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
429 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
430 m_op(op),
431 m_axis_offset(0),
432 m_transpose(0),
433 m_SL(0), m_SM(0), m_SR(0)
434 {
435 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
436 {
437 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
438 }
439
440 DataLazy_ptr lleft;
441 if (!left->isLazy())
442 {
443 lleft=DataLazy_ptr(new DataLazy(left));
444 }
445 else
446 {
447 lleft=dynamic_pointer_cast<DataLazy>(left);
448 }
449 m_readytype=lleft->m_readytype;
450 m_left=lleft;
451 m_samplesize=getNumDPPSample()*getNoValues();
452 m_children=m_left->m_children+1;
453 m_height=m_left->m_height+1;
454 LazyNodeSetup();
455 SIZELIMIT
456 }
457
458
459 // In this constructor we need to consider interpolation
460 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
461 : parent(resultFS(left,right,op), resultShape(left,right,op)),
462 m_op(op),
463 m_SL(0), m_SM(0), m_SR(0)
464 {
465 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
466 if ((getOpgroup(op)!=G_BINARY))
467 {
468 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
469 }
470
471 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
472 {
473 FunctionSpace fs=getFunctionSpace();
474 Data ltemp(left);
475 Data tmp(ltemp,fs);
476 left=tmp.borrowDataPtr();
477 }
478 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
479 {
480 Data tmp(Data(right),getFunctionSpace());
481 right=tmp.borrowDataPtr();
482 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
483 }
484 left->operandCheck(*right);
485
486 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
487 {
488 m_left=dynamic_pointer_cast<DataLazy>(left);
489 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
490 }
491 else
492 {
493 m_left=DataLazy_ptr(new DataLazy(left));
494 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
495 }
496 if (right->isLazy())
497 {
498 m_right=dynamic_pointer_cast<DataLazy>(right);
499 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
500 }
501 else
502 {
503 m_right=DataLazy_ptr(new DataLazy(right));
504 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
505 }
506 char lt=m_left->m_readytype;
507 char rt=m_right->m_readytype;
508 if (lt=='E' || rt=='E')
509 {
510 m_readytype='E';
511 }
512 else if (lt=='T' || rt=='T')
513 {
514 m_readytype='T';
515 }
516 else
517 {
518 m_readytype='C';
519 }
520 m_samplesize=getNumDPPSample()*getNoValues();
521 m_children=m_left->m_children+m_right->m_children+2;
522 m_height=max(m_left->m_height,m_right->m_height)+1;
523 LazyNodeSetup();
524 SIZELIMIT
525 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
526 }
527
528 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
529 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
530 m_op(op),
531 m_axis_offset(axis_offset),
532 m_transpose(transpose)
533 {
534 if ((getOpgroup(op)!=G_TENSORPROD))
535 {
536 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
537 }
538 if ((transpose>2) || (transpose<0))
539 {
540 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
541 }
542 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
543 {
544 FunctionSpace fs=getFunctionSpace();
545 Data ltemp(left);
546 Data tmp(ltemp,fs);
547 left=tmp.borrowDataPtr();
548 }
549 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
550 {
551 Data tmp(Data(right),getFunctionSpace());
552 right=tmp.borrowDataPtr();
553 }
554 // left->operandCheck(*right);
555
556 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
557 {
558 m_left=dynamic_pointer_cast<DataLazy>(left);
559 }
560 else
561 {
562 m_left=DataLazy_ptr(new DataLazy(left));
563 }
564 if (right->isLazy())
565 {
566 m_right=dynamic_pointer_cast<DataLazy>(right);
567 }
568 else
569 {
570 m_right=DataLazy_ptr(new DataLazy(right));
571 }
572 char lt=m_left->m_readytype;
573 char rt=m_right->m_readytype;
574 if (lt=='E' || rt=='E')
575 {
576 m_readytype='E';
577 }
578 else if (lt=='T' || rt=='T')
579 {
580 m_readytype='T';
581 }
582 else
583 {
584 m_readytype='C';
585 }
586 m_samplesize=getNumDPPSample()*getNoValues();
587 m_children=m_left->m_children+m_right->m_children+2;
588 m_height=max(m_left->m_height,m_right->m_height)+1;
589 LazyNodeSetup();
590 SIZELIMIT
591 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
592 }
593
594
595 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
596 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
597 m_op(op),
598 m_axis_offset(axis_offset),
599 m_transpose(0),
600 m_tol(0)
601 {
602 if ((getOpgroup(op)!=G_NP1OUT_P))
603 {
604 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
605 }
606 DataLazy_ptr lleft;
607 if (!left->isLazy())
608 {
609 lleft=DataLazy_ptr(new DataLazy(left));
610 }
611 else
612 {
613 lleft=dynamic_pointer_cast<DataLazy>(left);
614 }
615 m_readytype=lleft->m_readytype;
616 m_left=lleft;
617 m_samplesize=getNumDPPSample()*getNoValues();
618 m_children=m_left->m_children+1;
619 m_height=m_left->m_height+1;
620 LazyNodeSetup();
621 SIZELIMIT
622 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
623 }
624
625 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
626 : parent(left->getFunctionSpace(), left->getShape()),
627 m_op(op),
628 m_axis_offset(0),
629 m_transpose(0),
630 m_tol(tol)
631 {
632 if ((getOpgroup(op)!=G_UNARY_P))
633 {
634 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
635 }
636 DataLazy_ptr lleft;
637 if (!left->isLazy())
638 {
639 lleft=DataLazy_ptr(new DataLazy(left));
640 }
641 else
642 {
643 lleft=dynamic_pointer_cast<DataLazy>(left);
644 }
645 m_readytype=lleft->m_readytype;
646 m_left=lleft;
647 m_samplesize=getNumDPPSample()*getNoValues();
648 m_children=m_left->m_children+1;
649 m_height=m_left->m_height+1;
650 LazyNodeSetup();
651 SIZELIMIT
652 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
653 }
654
655
656 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
657 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
658 m_op(op),
659 m_axis_offset(axis0),
660 m_transpose(axis1),
661 m_tol(0)
662 {
663 if ((getOpgroup(op)!=G_NP1OUT_2P))
664 {
665 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
666 }
667 DataLazy_ptr lleft;
668 if (!left->isLazy())
669 {
670 lleft=DataLazy_ptr(new DataLazy(left));
671 }
672 else
673 {
674 lleft=dynamic_pointer_cast<DataLazy>(left);
675 }
676 m_readytype=lleft->m_readytype;
677 m_left=lleft;
678 m_samplesize=getNumDPPSample()*getNoValues();
679 m_children=m_left->m_children+1;
680 m_height=m_left->m_height+1;
681 LazyNodeSetup();
682 SIZELIMIT
683 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
684 }
685
686
687 namespace
688 {
689
690 inline int max3(int a, int b, int c)
691 {
692 int t=(a>b?a:b);
693 return (t>c?t:c);
694
695 }
696 }
697
698 DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
699 : parent(left->getFunctionSpace(), left->getShape()),
700 m_op(CONDEVAL),
701 m_axis_offset(0),
702 m_transpose(0),
703 m_tol(0)
704 {
705
706 DataLazy_ptr lmask;
707 DataLazy_ptr lleft;
708 DataLazy_ptr lright;
709 if (!mask->isLazy())
710 {
711 lmask=DataLazy_ptr(new DataLazy(mask));
712 }
713 else
714 {
715 lmask=dynamic_pointer_cast<DataLazy>(mask);
716 }
717 if (!left->isLazy())
718 {
719 lleft=DataLazy_ptr(new DataLazy(left));
720 }
721 else
722 {
723 lleft=dynamic_pointer_cast<DataLazy>(left);
724 }
725 if (!right->isLazy())
726 {
727 lright=DataLazy_ptr(new DataLazy(right));
728 }
729 else
730 {
731 lright=dynamic_pointer_cast<DataLazy>(right);
732 }
733 m_readytype=lmask->m_readytype;
734 if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
735 {
736 throw DataException("Programmer Error - condEval arguments must have the same readytype");
737 }
738 m_left=lleft;
739 m_right=lright;
740 m_mask=lmask;
741 m_samplesize=getNumDPPSample()*getNoValues();
742 m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
743 m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
744 LazyNodeSetup();
745 SIZELIMIT
746 LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
747 }
748
749
750
751 DataLazy::~DataLazy()
752 {
753 delete[] m_sampleids;
754 }
755
756
757 /*
758 \brief Evaluates the expression using methods on Data.
759 This does the work for the collapse method.
760 For reasons of efficiency do not call this method on DataExpanded nodes.
761 */
762 DataReady_ptr
763 DataLazy::collapseToReady() const
764 {
765 if (m_readytype=='E')
766 { // this is more an efficiency concern than anything else
767 throw DataException("Programmer Error - do not use collapse on Expanded data.");
768 }
769 if (m_op==IDENTITY)
770 {
771 return m_id;
772 }
773 DataReady_ptr pleft=m_left->collapseToReady();
774 Data left(pleft);
775 Data right;
776 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
777 {
778 right=Data(m_right->collapseToReady());
779 }
780 Data result;
781 switch(m_op)
782 {
783 case ADD:
784 result=left+right;
785 break;
786 case SUB:
787 result=left-right;
788 break;
789 case MUL:
790 result=left*right;
791 break;
792 case DIV:
793 result=left/right;
794 break;
795 case SIN:
796 result=left.sin();
797 break;
798 case COS:
799 result=left.cos();
800 break;
801 case TAN:
802 result=left.tan();
803 break;
804 case ASIN:
805 result=left.asin();
806 break;
807 case ACOS:
808 result=left.acos();
809 break;
810 case ATAN:
811 result=left.atan();
812 break;
813 case SINH:
814 result=left.sinh();
815 break;
816 case COSH:
817 result=left.cosh();
818 break;
819 case TANH:
820 result=left.tanh();
821 break;
822 case ERF:
823 result=left.erf();
824 break;
825 case ASINH:
826 result=left.asinh();
827 break;
828 case ACOSH:
829 result=left.acosh();
830 break;
831 case ATANH:
832 result=left.atanh();
833 break;
834 case LOG10:
835 result=left.log10();
836 break;
837 case LOG:
838 result=left.log();
839 break;
840 case SIGN:
841 result=left.sign();
842 break;
843 case ABS:
844 result=left.abs();
845 break;
846 case NEG:
847 result=left.neg();
848 break;
849 case POS:
850 // it doesn't mean anything for delayed.
851 // it will just trigger a deep copy of the lazy object
852 throw DataException("Programmer error - POS not supported for lazy data.");
853 break;
854 case EXP:
855 result=left.exp();
856 break;
857 case SQRT:
858 result=left.sqrt();
859 break;
860 case RECIP:
861 result=left.oneOver();
862 break;
863 case GZ:
864 result=left.wherePositive();
865 break;
866 case LZ:
867 result=left.whereNegative();
868 break;
869 case GEZ:
870 result=left.whereNonNegative();
871 break;
872 case LEZ:
873 result=left.whereNonPositive();
874 break;
875 case NEZ:
876 result=left.whereNonZero(m_tol);
877 break;
878 case EZ:
879 result=left.whereZero(m_tol);
880 break;
881 case SYM:
882 result=left.symmetric();
883 break;
884 case NSYM:
885 result=left.antisymmetric();
886 break;
887 case PROD:
888 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
889 break;
890 case TRANS:
891 result=left.transpose(m_axis_offset);
892 break;
893 case TRACE:
894 result=left.trace(m_axis_offset);
895 break;
896 case SWAP:
897 result=left.swapaxes(m_axis_offset, m_transpose);
898 break;
899 case MINVAL:
900 result=left.minval();
901 break;
902 case MAXVAL:
903 result=left.minval();
904 break;
905 case HER:
906 result=left.hermitian();
907 break;
908 default:
909 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
910 }
911 return result.borrowReadyPtr();
912 }
913
914 /*
915 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
916 This method uses the original methods on the Data class to evaluate the expressions.
917 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
918 the purpose of using DataLazy in the first place).
919 */
920 void
921 DataLazy::collapse() const
922 {
923 if (m_op==IDENTITY)
924 {
925 return;
926 }
927 if (m_readytype=='E')
928 { // this is more an efficiency concern than anything else
929 throw DataException("Programmer Error - do not use collapse on Expanded data.");
930 }
931 m_id=collapseToReady();
932 m_op=IDENTITY;
933 }
934
935 // The result will be stored in m_samples
936 // The return value is a pointer to the DataVector, offset is the offset within the return value
937 const DataTypes::RealVectorType*
938 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
939 {
940 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
941 // collapse so we have a 'E' node or an IDENTITY for some other type
942 if (m_readytype!='E' && m_op!=IDENTITY)
943 {
944 collapse();
945 }
946 if (m_op==IDENTITY)
947 {
948 const RealVectorType& vec=m_id->getVectorRO();
949 roffset=m_id->getPointOffset(sampleNo, 0);
950 #ifdef LAZY_STACK_PROF
951 int x;
952 if (&x<stackend[omp_get_thread_num()])
953 {
954 stackend[omp_get_thread_num()]=&x;
955 }
956 #endif
957 return &(vec);
958 }
959 if (m_readytype!='E')
960 {
961 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
962 }
963 if (m_sampleids[tid]==sampleNo)
964 {
965 roffset=tid*m_samplesize;
966 return &(m_samples); // sample is already resolved
967 }
968 m_sampleids[tid]=sampleNo;
969
970 switch (getOpgroup(m_op))
971 {
972 case G_UNARY:
973 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
974 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
975 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
976 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
977 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
978 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
979 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
980 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
981 default:
982 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
983 }
984 }
985
986 const DataTypes::RealVectorType*
987 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
988 {
989 // we assume that any collapsing has been done before we get here
990 // since we only have one argument we don't need to think about only
991 // processing single points.
992 // we will also know we won't get identity nodes
993 if (m_readytype!='E')
994 {
995 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
996 }
997 if (m_op==IDENTITY)
998 {
999 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1000 }
1001 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1002 const double* left=&((*leftres)[roffset]);
1003 roffset=m_samplesize*tid;
1004 double* result=&(m_samples[roffset]);
1005 if (m_op==POS)
1006 {
1007 // this should be prevented earlier
1008 // operation is meaningless for lazy
1009 throw DataException("Programmer error - POS not supported for lazy data.");
1010 }
1011 tensor_unary_array_operation(m_samplesize,
1012 left,
1013 result,
1014 m_op,
1015 m_tol);
1016 return &(m_samples);
1017 }
1018
1019
1020 const DataTypes::RealVectorType*
1021 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1022 {
1023 // we assume that any collapsing has been done before we get here
1024 // since we only have one argument we don't need to think about only
1025 // processing single points.
1026 // we will also know we won't get identity nodes
1027 if (m_readytype!='E')
1028 {
1029 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1030 }
1031 if (m_op==IDENTITY)
1032 {
1033 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1034 }
1035 size_t loffset=0;
1036 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1037
1038 roffset=m_samplesize*tid;
1039 unsigned int ndpps=getNumDPPSample();
1040 unsigned int psize=DataTypes::noValues(m_left->getShape());
1041 double* result=&(m_samples[roffset]);
1042 switch (m_op)
1043 {
1044 case MINVAL:
1045 {
1046 for (unsigned int z=0;z<ndpps;++z)
1047 {
1048 FMin op;
1049 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1050 loffset+=psize;
1051 result++;
1052 }
1053 }
1054 break;
1055 case MAXVAL:
1056 {
1057 for (unsigned int z=0;z<ndpps;++z)
1058 {
1059 FMax op;
1060 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1061 loffset+=psize;
1062 result++;
1063 }
1064 }
1065 break;
1066 default:
1067 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1068 }
1069 return &(m_samples);
1070 }
1071
1072 const DataTypes::RealVectorType*
1073 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1074 {
1075 // we assume that any collapsing has been done before we get here
1076 // since we only have one argument we don't need to think about only
1077 // processing single points.
1078 if (m_readytype!='E')
1079 {
1080 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1081 }
1082 if (m_op==IDENTITY)
1083 {
1084 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1085 }
1086 size_t subroffset;
1087 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1088 roffset=m_samplesize*tid;
1089 size_t loop=0;
1090 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1091 size_t step=getNoValues();
1092 size_t offset=roffset;
1093 switch (m_op)
1094 {
1095 case SYM:
1096 for (loop=0;loop<numsteps;++loop)
1097 {
1098 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1099 subroffset+=step;
1100 offset+=step;
1101 }
1102 break;
1103 case NSYM:
1104 for (loop=0;loop<numsteps;++loop)
1105 {
1106 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1107 subroffset+=step;
1108 offset+=step;
1109 }
1110 break;
1111 default:
1112 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1113 }
1114 return &m_samples;
1115 }
1116
1117 const DataTypes::RealVectorType*
1118 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1119 {
1120 // we assume that any collapsing has been done before we get here
1121 // since we only have one argument we don't need to think about only
1122 // processing single points.
1123 if (m_readytype!='E')
1124 {
1125 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1126 }
1127 if (m_op==IDENTITY)
1128 {
1129 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1130 }
1131 size_t subroffset;
1132 size_t offset;
1133 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1134 roffset=m_samplesize*tid;
1135 offset=roffset;
1136 size_t loop=0;
1137 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1138 size_t outstep=getNoValues();
1139 size_t instep=m_left->getNoValues();
1140 switch (m_op)
1141 {
1142 case TRACE:
1143 for (loop=0;loop<numsteps;++loop)
1144 {
1145 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1146 subroffset+=instep;
1147 offset+=outstep;
1148 }
1149 break;
1150 case TRANS:
1151 for (loop=0;loop<numsteps;++loop)
1152 {
1153 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1154 subroffset+=instep;
1155 offset+=outstep;
1156 }
1157 break;
1158 default:
1159 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1160 }
1161 return &m_samples;
1162 }
1163
1164
1165 const DataTypes::RealVectorType*
1166 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1167 {
1168 if (m_readytype!='E')
1169 {
1170 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1171 }
1172 if (m_op==IDENTITY)
1173 {
1174 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1175 }
1176 size_t subroffset;
1177 size_t offset;
1178 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1179 roffset=m_samplesize*tid;
1180 offset=roffset;
1181 size_t loop=0;
1182 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1183 size_t outstep=getNoValues();
1184 size_t instep=m_left->getNoValues();
1185 switch (m_op)
1186 {
1187 case SWAP:
1188 for (loop=0;loop<numsteps;++loop)
1189 {
1190 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1191 subroffset+=instep;
1192 offset+=outstep;
1193 }
1194 break;
1195 default:
1196 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1197 }
1198 return &m_samples;
1199 }
1200
1201 const DataTypes::RealVectorType*
1202 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1203 {
1204 if (m_readytype!='E')
1205 {
1206 throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1207 }
1208 if (m_op!=CONDEVAL)
1209 {
1210 throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1211 }
1212 size_t subroffset;
1213
1214 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1215 const RealVectorType* srcres=0;
1216 if ((*maskres)[subroffset]>0)
1217 {
1218 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1219 }
1220 else
1221 {
1222 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1223 }
1224
1225 // Now we need to copy the result
1226
1227 roffset=m_samplesize*tid;
1228 for (int i=0;i<m_samplesize;++i)
1229 {
1230 m_samples[roffset+i]=(*srcres)[subroffset+i];
1231 }
1232
1233 return &m_samples;
1234 }
1235
1236 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1237 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1238 // If both children are expanded, then we can process them in a single operation (we treat
1239 // the whole sample as one big datapoint.
1240 // If one of the children is not expanded, then we need to treat each point in the sample
1241 // individually.
1242 // There is an additional complication when scalar operations are considered.
1243 // For example, 2+Vector.
1244 // In this case each double within the point is treated individually
1245 const DataTypes::RealVectorType*
1246 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1247 {
1248 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1249
1250 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1251 // first work out which of the children are expanded
1252 bool leftExp=(m_left->m_readytype=='E');
1253 bool rightExp=(m_right->m_readytype=='E');
1254 if (!leftExp && !rightExp)
1255 {
1256 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1257 }
1258 bool leftScalar=(m_left->getRank()==0);
1259 bool rightScalar=(m_right->getRank()==0);
1260 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1261 {
1262 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1263 }
1264 size_t leftsize=m_left->getNoValues();
1265 size_t rightsize=m_right->getNoValues();
1266 size_t chunksize=1; // how many doubles will be processed in one go
1267 int leftstep=0; // how far should the left offset advance after each step
1268 int rightstep=0;
1269 int numsteps=0; // total number of steps for the inner loop
1270 int oleftstep=0; // the o variables refer to the outer loop
1271 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1272 int onumsteps=1;
1273
1274 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1275 bool RES=(rightExp && rightScalar);
1276 bool LS=(!leftExp && leftScalar); // left is a single scalar
1277 bool RS=(!rightExp && rightScalar);
1278 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1279 bool RN=(!rightExp && !rightScalar);
1280 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1281 bool REN=(rightExp && !rightScalar);
1282
1283 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1284 {
1285 chunksize=m_left->getNumDPPSample()*leftsize;
1286 leftstep=0;
1287 rightstep=0;
1288 numsteps=1;
1289 }
1290 else if (LES || RES)
1291 {
1292 chunksize=1;
1293 if (LES) // left is an expanded scalar
1294 {
1295 if (RS)
1296 {
1297 leftstep=1;
1298 rightstep=0;
1299 numsteps=m_left->getNumDPPSample();
1300 }
1301 else // RN or REN
1302 {
1303 leftstep=0;
1304 oleftstep=1;
1305 rightstep=1;
1306 orightstep=(RN ? -(int)rightsize : 0);
1307 numsteps=rightsize;
1308 onumsteps=m_left->getNumDPPSample();
1309 }
1310 }
1311 else // right is an expanded scalar
1312 {
1313 if (LS)
1314 {
1315 rightstep=1;
1316 leftstep=0;
1317 numsteps=m_right->getNumDPPSample();
1318 }
1319 else
1320 {
1321 rightstep=0;
1322 orightstep=1;
1323 leftstep=1;
1324 oleftstep=(LN ? -(int)leftsize : 0);
1325 numsteps=leftsize;
1326 onumsteps=m_right->getNumDPPSample();
1327 }
1328 }
1329 }
1330 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1331 {
1332 if (LEN) // and Right will be a single value
1333 {
1334 chunksize=rightsize;
1335 leftstep=rightsize;
1336 rightstep=0;
1337 numsteps=m_left->getNumDPPSample();
1338 if (RS)
1339 {
1340 numsteps*=leftsize;
1341 }
1342 }
1343 else // REN
1344 {
1345 chunksize=leftsize;
1346 rightstep=leftsize;
1347 leftstep=0;
1348 numsteps=m_right->getNumDPPSample();
1349 if (LS)
1350 {
1351 numsteps*=rightsize;
1352 }
1353 }
1354 }
1355
1356 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1357 // Get the values of sub-expressions
1358 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1359 const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1360 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1361 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1362 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1363 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1364 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1365 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1366 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1367
1368 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1369 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1370
1371
1372 roffset=m_samplesize*tid;
1373 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1374 switch(m_op)
1375 {
1376 case ADD:
1377 //PROC_OP(NO_ARG,plus<double>());
1378 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1379 &(*left)[0],
1380 &(*right)[0],
1381 chunksize,
1382 onumsteps,
1383 numsteps,
1384 resultStep,
1385 leftstep,
1386 rightstep,
1387 oleftstep,
1388 orightstep,
1389 lroffset,
1390 rroffset,
1391 escript::ES_optype::ADD);
1392 break;
1393 case SUB:
1394 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1395 &(*left)[0],
1396 &(*right)[0],
1397 chunksize,
1398 onumsteps,
1399 numsteps,
1400 resultStep,
1401 leftstep,
1402 rightstep,
1403 oleftstep,
1404 orightstep,
1405 lroffset,
1406 rroffset,
1407 escript::ES_optype::SUB);
1408 //PROC_OP(NO_ARG,minus<double>());
1409 break;
1410 case MUL:
1411 //PROC_OP(NO_ARG,multiplies<double>());
1412 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1413 &(*left)[0],
1414 &(*right)[0],
1415 chunksize,
1416 onumsteps,
1417 numsteps,
1418 resultStep,
1419 leftstep,
1420 rightstep,
1421 oleftstep,
1422 orightstep,
1423 lroffset,
1424 rroffset,
1425 escript::ES_optype::MUL);
1426 break;
1427 case DIV:
1428 //PROC_OP(NO_ARG,divides<double>());
1429 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1430 &(*left)[0],
1431 &(*right)[0],
1432 chunksize,
1433 onumsteps,
1434 numsteps,
1435 resultStep,
1436 leftstep,
1437 rightstep,
1438 oleftstep,
1439 orightstep,
1440 lroffset,
1441 rroffset,
1442 escript::ES_optype::DIV);
1443 break;
1444 case POW:
1445 //PROC_OP(double (double,double),::pow);
1446 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1447 &(*left)[0],
1448 &(*right)[0],
1449 chunksize,
1450 onumsteps,
1451 numsteps,
1452 resultStep,
1453 leftstep,
1454 rightstep,
1455 oleftstep,
1456 orightstep,
1457 lroffset,
1458 rroffset,
1459 escript::ES_optype::POW);
1460 break;
1461 default:
1462 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1463 }
1464 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1465 return &m_samples;
1466 }
1467
1468
1469 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1470 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1471 // unlike the other resolve helpers, we must treat these datapoints separately.
1472 const DataTypes::RealVectorType*
1473 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1474 {
1475 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1476
1477 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1478 // first work out which of the children are expanded
1479 bool leftExp=(m_left->m_readytype=='E');
1480 bool rightExp=(m_right->m_readytype=='E');
1481 int steps=getNumDPPSample();
1482 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1483 int rightStep=(rightExp?m_right->getNoValues() : 0);
1484
1485 int resultStep=getNoValues();
1486 roffset=m_samplesize*tid;
1487 size_t offset=roffset;
1488
1489 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1490
1491 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1492
1493 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1494 cout << getNoValues() << endl;)
1495
1496
1497 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1498 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1499 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1500 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1501 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1502 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1503 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1504
1505 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1506 switch(m_op)
1507 {
1508 case PROD:
1509 for (int i=0;i<steps;++i,resultp+=resultStep)
1510 {
1511 const double *ptr_0 = &((*left)[lroffset]);
1512 const double *ptr_1 = &((*right)[rroffset]);
1513
1514 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1515 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1516
1517 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1518
1519 lroffset+=leftStep;
1520 rroffset+=rightStep;
1521 }
1522 break;
1523 default:
1524 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1525 }
1526 roffset=offset;
1527 return &m_samples;
1528 }
1529
1530
1531 const DataTypes::RealVectorType*
1532 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1533 {
1534 #ifdef _OPENMP
1535 int tid=omp_get_thread_num();
1536 #else
1537 int tid=0;
1538 #endif
1539
1540 #ifdef LAZY_STACK_PROF
1541 stackstart[tid]=&tid;
1542 stackend[tid]=&tid;
1543 const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1544 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1545 #pragma omp critical
1546 if (d>maxstackuse)
1547 {
1548 cout << "Max resolve Stack use " << d << endl;
1549 maxstackuse=d;
1550 }
1551 return r;
1552 #else
1553 return resolveNodeSample(tid, sampleNo, roffset);
1554 #endif
1555 }
1556
1557
1558 // This needs to do the work of the identity constructor
1559 void
1560 DataLazy::resolveToIdentity()
1561 {
1562 if (m_op==IDENTITY)
1563 return;
1564 DataReady_ptr p=resolveNodeWorker();
1565 makeIdentity(p);
1566 }
1567
1568 void DataLazy::makeIdentity(const DataReady_ptr& p)
1569 {
1570 m_op=IDENTITY;
1571 m_axis_offset=0;
1572 m_transpose=0;
1573 m_SL=m_SM=m_SR=0;
1574 m_children=m_height=0;
1575 m_id=p;
1576 if(p->isConstant()) {m_readytype='C';}
1577 else if(p->isExpanded()) {m_readytype='E';}
1578 else if (p->isTagged()) {m_readytype='T';}
1579 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1580 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1581 m_left.reset();
1582 m_right.reset();
1583 }
1584
1585
1586 DataReady_ptr
1587 DataLazy::resolve()
1588 {
1589 resolveToIdentity();
1590 return m_id;
1591 }
1592
1593
1594 /* This is really a static method but I think that caused problems in windows */
1595 void
1596 DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1597 {
1598 if (dats.empty())
1599 {
1600 return;
1601 }
1602 vector<DataLazy*> work;
1603 FunctionSpace fs=dats[0]->getFunctionSpace();
1604 bool match=true;
1605 for (int i=dats.size()-1;i>=0;--i)
1606 {
1607 if (dats[i]->m_readytype!='E')
1608 {
1609 dats[i]->collapse();
1610 }
1611 if (dats[i]->m_op!=IDENTITY)
1612 {
1613 work.push_back(dats[i]);
1614 if (fs!=dats[i]->getFunctionSpace())
1615 {
1616 match=false;
1617 }
1618 }
1619 }
1620 if (work.empty())
1621 {
1622 return; // no work to do
1623 }
1624 if (match) // all functionspaces match. Yes I realise this is overly strict
1625 { // it is possible that dats[0] is one of the objects which we discarded and
1626 // all the other functionspaces match.
1627 vector<DataExpanded*> dep;
1628 vector<RealVectorType*> vecs;
1629 for (int i=0;i<work.size();++i)
1630 {
1631 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1632 vecs.push_back(&(dep[i]->getVectorRW()));
1633 }
1634 int totalsamples=work[0]->getNumSamples();
1635 const RealVectorType* res=0; // Storage for answer
1636 int sample;
1637 #pragma omp parallel private(sample, res)
1638 {
1639 size_t roffset=0;
1640 #pragma omp for schedule(static)
1641 for (sample=0;sample<totalsamples;++sample)
1642 {
1643 roffset=0;
1644 int j;
1645 for (j=work.size()-1;j>=0;--j)
1646 {
1647 #ifdef _OPENMP
1648 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1649 #else
1650 res=work[j]->resolveNodeSample(0,sample,roffset);
1651 #endif
1652 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1653 memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1654 }
1655 }
1656 }
1657 // Now we need to load the new results as identity ops into the lazy nodes
1658 for (int i=work.size()-1;i>=0;--i)
1659 {
1660 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1661 }
1662 }
1663 else // functionspaces do not match
1664 {
1665 for (int i=0;i<work.size();++i)
1666 {
1667 work[i]->resolveToIdentity();
1668 }
1669 }
1670 }
1671
1672
1673
1674 // This version of resolve uses storage in each node to hold results
1675 DataReady_ptr
1676 DataLazy::resolveNodeWorker()
1677 {
1678 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1679 {
1680 collapse();
1681 }
1682 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1683 {
1684 return m_id;
1685 }
1686 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1687 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1688 RealVectorType& resvec=result->getVectorRW();
1689 DataReady_ptr resptr=DataReady_ptr(result);
1690
1691 int sample;
1692 int totalsamples=getNumSamples();
1693 const RealVectorType* res=0; // Storage for answer
1694 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1695 #pragma omp parallel private(sample,res)
1696 {
1697 size_t roffset=0;
1698 #ifdef LAZY_STACK_PROF
1699 stackstart[omp_get_thread_num()]=&roffset;
1700 stackend[omp_get_thread_num()]=&roffset;
1701 #endif
1702 #pragma omp for schedule(static)
1703 for (sample=0;sample<totalsamples;++sample)
1704 {
1705 roffset=0;
1706 #ifdef _OPENMP
1707 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1708 #else
1709 res=resolveNodeSample(0,sample,roffset);
1710 #endif
1711 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1712 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1713 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1714 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1715 }
1716 }
1717 #ifdef LAZY_STACK_PROF
1718 for (int i=0;i<getNumberOfThreads();++i)
1719 {
1720 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1721 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1722 if (r>maxstackuse)
1723 {
1724 maxstackuse=r;
1725 }
1726 }
1727 cout << "Max resolve Stack use=" << maxstackuse << endl;
1728 #endif
1729 return resptr;
1730 }
1731
1732 std::string
1733 DataLazy::toString() const
1734 {
1735 ostringstream oss;
1736 oss << "Lazy Data: [depth=" << m_height<< "] ";
1737 switch (escriptParams.getLazyStrFmt())
1738 {
1739 case 1: // tree format
1740 oss << endl;
1741 intoTreeString(oss,"");
1742 break;
1743 case 2: // just the depth
1744 break;
1745 default:
1746 intoString(oss);
1747 break;
1748 }
1749 return oss.str();
1750 }
1751
1752
1753 void
1754 DataLazy::intoString(ostringstream& oss) const
1755 {
1756 // oss << "[" << m_children <<";"<<m_height <<"]";
1757 switch (getOpgroup(m_op))
1758 {
1759 case G_IDENTITY:
1760 if (m_id->isExpanded())
1761 {
1762 oss << "E";
1763 }
1764 else if (m_id->isTagged())
1765 {
1766 oss << "T";
1767 }
1768 else if (m_id->isConstant())
1769 {
1770 oss << "C";
1771 }
1772 else
1773 {
1774 oss << "?";
1775 }
1776 oss << '@' << m_id.get();
1777 break;
1778 case G_BINARY:
1779 oss << '(';
1780 m_left->intoString(oss);
1781 oss << ' ' << opToString(m_op) << ' ';
1782 m_right->intoString(oss);
1783 oss << ')';
1784 break;
1785 case G_UNARY:
1786 case G_UNARY_P:
1787 case G_NP1OUT:
1788 case G_NP1OUT_P:
1789 case G_REDUCTION:
1790 oss << opToString(m_op) << '(';
1791 m_left->intoString(oss);
1792 oss << ')';
1793 break;
1794 case G_TENSORPROD:
1795 oss << opToString(m_op) << '(';
1796 m_left->intoString(oss);
1797 oss << ", ";
1798 m_right->intoString(oss);
1799 oss << ')';
1800 break;
1801 case G_NP1OUT_2P:
1802 oss << opToString(m_op) << '(';
1803 m_left->intoString(oss);
1804 oss << ", " << m_axis_offset << ", " << m_transpose;
1805 oss << ')';
1806 break;
1807 case G_CONDEVAL:
1808 oss << opToString(m_op)<< '(' ;
1809 m_mask->intoString(oss);
1810 oss << " ? ";
1811 m_left->intoString(oss);
1812 oss << " : ";
1813 m_right->intoString(oss);
1814 oss << ')';
1815 break;
1816 default:
1817 oss << "UNKNOWN";
1818 }
1819 }
1820
1821
1822 void
1823 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1824 {
1825 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1826 switch (getOpgroup(m_op))
1827 {
1828 case G_IDENTITY:
1829 if (m_id->isExpanded())
1830 {
1831 oss << "E";
1832 }
1833 else if (m_id->isTagged())
1834 {
1835 oss << "T";
1836 }
1837 else if (m_id->isConstant())
1838 {
1839 oss << "C";
1840 }
1841 else
1842 {
1843 oss << "?";
1844 }
1845 oss << '@' << m_id.get() << endl;
1846 break;
1847 case G_BINARY:
1848 oss << opToString(m_op) << endl;
1849 indent+='.';
1850 m_left->intoTreeString(oss, indent);
1851 m_right->intoTreeString(oss, indent);
1852 break;
1853 case G_UNARY:
1854 case G_UNARY_P:
1855 case G_NP1OUT:
1856 case G_NP1OUT_P:
1857 case G_REDUCTION:
1858 oss << opToString(m_op) << endl;
1859 indent+='.';
1860 m_left->intoTreeString(oss, indent);
1861 break;
1862 case G_TENSORPROD:
1863 oss << opToString(m_op) << endl;
1864 indent+='.';
1865 m_left->intoTreeString(oss, indent);
1866 m_right->intoTreeString(oss, indent);
1867 break;
1868 case G_NP1OUT_2P:
1869 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1870 indent+='.';
1871 m_left->intoTreeString(oss, indent);
1872 break;
1873 default:
1874 oss << "UNKNOWN";
1875 }
1876 }
1877
1878
1879 DataAbstract*
1880 DataLazy::deepCopy() const
1881 {
1882 switch (getOpgroup(m_op))
1883 {
1884 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1885 case G_UNARY:
1886 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1887 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1888 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1889 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1890 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1891 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1892 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1893 default:
1894 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1895 }
1896 }
1897
1898 // For this, we don't care what op you were doing because the answer is now zero
1899 DataAbstract*
1900 DataLazy::zeroedCopy() const
1901 {
1902 return new DataLazy(m_id->zeroedCopy()->getPtr());
1903 }
1904
1905 // There is no single, natural interpretation of getLength on DataLazy.
1906 // Instances of DataReady can look at the size of their vectors.
1907 // For lazy though, it could be the size the data would be if it were resolved;
1908 // or it could be some function of the lengths of the DataReady instances which
1909 // form part of the expression.
1910 // Rather than have people making assumptions, I have disabled the method.
1911 DataTypes::RealVectorType::size_type
1912 DataLazy::getLength() const
1913 {
1914 throw DataException("getLength() does not make sense for lazy data.");
1915 }
1916
1917
1918 DataAbstract*
1919 DataLazy::getSlice(const DataTypes::RegionType& region) const
1920 {
1921 throw DataException("getSlice - not implemented for Lazy objects.");
1922 }
1923
1924
1925 // To do this we need to rely on our child nodes
1926 DataTypes::RealVectorType::size_type
1927 DataLazy::getPointOffset(int sampleNo,
1928 int dataPointNo)
1929 {
1930 if (m_op==IDENTITY)
1931 {
1932 return m_id->getPointOffset(sampleNo,dataPointNo);
1933 }
1934 if (m_readytype!='E')
1935 {
1936 collapse();
1937 return m_id->getPointOffset(sampleNo,dataPointNo);
1938 }
1939 // at this point we do not have an identity node and the expression will be Expanded
1940 // so we only need to know which child to ask
1941 if (m_left->m_readytype=='E')
1942 {
1943 return m_left->getPointOffset(sampleNo,dataPointNo);
1944 }
1945 else
1946 {
1947 return m_right->getPointOffset(sampleNo,dataPointNo);
1948 }
1949 }
1950
1951 // To do this we need to rely on our child nodes
1952 DataTypes::RealVectorType::size_type
1953 DataLazy::getPointOffset(int sampleNo,
1954 int dataPointNo) const
1955 {
1956 if (m_op==IDENTITY)
1957 {
1958 return m_id->getPointOffset(sampleNo,dataPointNo);
1959 }
1960 if (m_readytype=='E')
1961 {
1962 // at this point we do not have an identity node and the expression will be Expanded
1963 // so we only need to know which child to ask
1964 if (m_left->m_readytype=='E')
1965 {
1966 return m_left->getPointOffset(sampleNo,dataPointNo);
1967 }
1968 else
1969 {
1970 return m_right->getPointOffset(sampleNo,dataPointNo);
1971 }
1972 }
1973 if (m_readytype=='C')
1974 {
1975 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1976 }
1977 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1978 }
1979
1980
1981 // I have decided to let Data:: handle this issue.
1982 void
1983 DataLazy::setToZero()
1984 {
1985 // DataTypes::RealVectorType v(getNoValues(),0);
1986 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1987 // m_op=IDENTITY;
1988 // m_right.reset();
1989 // m_left.reset();
1990 // m_readytype='C';
1991 // m_buffsRequired=1;
1992
1993 (void)privdebug; // to stop the compiler complaining about unused privdebug
1994 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1995 }
1996
1997 bool
1998 DataLazy::actsExpanded() const
1999 {
2000 return (m_readytype=='E');
2001 }
2002
2003 } // end namespace
2004

  ViewVC Help
Powered by ViewVC 1.1.26