/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6512 - (show annotations)
Fri Mar 3 02:01:57 2017 UTC (2 years, 1 month ago) by jfenwick
File size: 62825 byte(s)
Experiments on starting complex lazy

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "DataLazy.h"
18 #include "Data.h"
19 #include "DataTypes.h"
20 #include "EscriptParams.h"
21 #include "FunctionSpace.h"
22 #include "Utils.h"
23 #include "DataVectorOps.h"
24
25 #include <iomanip> // for some fancy formatting in debug
26
27 using namespace escript::DataTypes;
28
29 #define NO_ARG
30
31 // #define LAZYDEBUG(X) if (privdebug){X;}
32 #define LAZYDEBUG(X)
33 namespace
34 {
35 bool privdebug=false;
36
37 #define ENABLEDEBUG privdebug=true;
38 #define DISABLEDEBUG privdebug=false;
39 }
40
41 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42
43 //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44
45
46 #define SIZELIMIT \
47 if (m_height > escript::escriptParams.getTooManyLevels()) {\
48 if (escript::escriptParams.getLazyVerbose()) {\
49 cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50 }\
51 resolveToIdentity();\
52 }
53
54 /*
55 How does DataLazy work?
56 ~~~~~~~~~~~~~~~~~~~~~~~
57
58 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59 denoted left and right.
60
61 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62 This means that all "internal" nodes in the structure are instances of DataLazy.
63
64 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65 Note that IDENTITY is not considered a unary operation.
66
67 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68 It must however form a DAG (directed acyclic graph).
69 I will refer to individual DataLazy objects with the structure as nodes.
70
71 Each node also stores:
72 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73 evaluated.
74 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75 evaluate the expression.
76 - m_samplesize ~ the number of doubles stored in a sample.
77
78 When a new node is created, the above values are computed based on the values in the child nodes.
79 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80
81 The resolve method, which produces a DataReady from a DataLazy, does the following:
82 1) Create a DataReady to hold the new result.
83 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85
86 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87
88 resolveSample returns a Vector* and an offset within that vector where the result is stored.
89 Normally, this would be v, but for identity nodes their internal vector is returned instead.
90
91 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92
93 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95
96 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97 1) Add to the ES_optype.
98 2) determine what opgroup your operation belongs to (X)
99 3) add a string for the op to the end of ES_opstrings
100 4) increase ES_opcount
101 5) add an entry (X) to opgroups
102 6) add an entry to the switch in collapseToReady
103 7) add an entry to resolveX
104 */
105
106
107 using namespace std;
108 using namespace boost;
109
110 namespace escript
111 {
112
113 namespace
114 {
115
116
117 // enabling this will print out when ever the maximum stacksize used by resolve increases
118 // it assumes _OPENMP is also in use
119 //#define LAZY_STACK_PROF
120
121
122
123 #ifndef _OPENMP
124 #ifdef LAZY_STACK_PROF
125 #undef LAZY_STACK_PROF
126 #endif
127 #endif
128
129
130 #ifdef LAZY_STACK_PROF
131 std::vector<void*> stackstart(getNumberOfThreads());
132 std::vector<void*> stackend(getNumberOfThreads());
133 size_t maxstackuse=0;
134 #endif
135
136
137 // return the FunctionSpace of the result of "left op right"
138 FunctionSpace
139 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140 {
141 // perhaps this should call interpolate and throw or something?
142 // maybe we need an interpolate node -
143 // that way, if interpolate is required in any other op we can just throw a
144 // programming error exception.
145
146 FunctionSpace l=left->getFunctionSpace();
147 FunctionSpace r=right->getFunctionSpace();
148 if (l!=r)
149 {
150 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
151 if (res==1)
152 {
153 return l;
154 }
155 if (res==-1)
156 {
157 return r;
158 }
159 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
160 }
161 return l;
162 }
163
164 // return the shape of the result of "left op right"
165 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
166 DataTypes::ShapeType
167 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
168 {
169 if (left->getShape()!=right->getShape())
170 {
171 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
172 {
173 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
174 }
175
176 if (left->getRank()==0) // we need to allow scalar * anything
177 {
178 return right->getShape();
179 }
180 if (right->getRank()==0)
181 {
182 return left->getShape();
183 }
184 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
185 }
186 return left->getShape();
187 }
188
189 // return the shape for "op left"
190
191 DataTypes::ShapeType
192 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
193 {
194 switch(op)
195 {
196 case TRANS:
197 { // for the scoping of variables
198 const DataTypes::ShapeType& s=left->getShape();
199 DataTypes::ShapeType sh;
200 int rank=left->getRank();
201 if (axis_offset<0 || axis_offset>rank)
202 {
203 stringstream e;
204 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
205 throw DataException(e.str());
206 }
207 for (int i=0; i<rank; i++)
208 {
209 int index = (axis_offset+i)%rank;
210 sh.push_back(s[index]); // Append to new shape
211 }
212 return sh;
213 }
214 break;
215 case TRACE:
216 {
217 int rank=left->getRank();
218 if (rank<2)
219 {
220 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
221 }
222 if ((axis_offset>rank-2) || (axis_offset<0))
223 {
224 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
225 }
226 if (rank==2)
227 {
228 return DataTypes::scalarShape;
229 }
230 else if (rank==3)
231 {
232 DataTypes::ShapeType sh;
233 if (axis_offset==0)
234 {
235 sh.push_back(left->getShape()[2]);
236 }
237 else // offset==1
238 {
239 sh.push_back(left->getShape()[0]);
240 }
241 return sh;
242 }
243 else if (rank==4)
244 {
245 DataTypes::ShapeType sh;
246 const DataTypes::ShapeType& s=left->getShape();
247 if (axis_offset==0)
248 {
249 sh.push_back(s[2]);
250 sh.push_back(s[3]);
251 }
252 else if (axis_offset==1)
253 {
254 sh.push_back(s[0]);
255 sh.push_back(s[3]);
256 }
257 else // offset==2
258 {
259 sh.push_back(s[0]);
260 sh.push_back(s[1]);
261 }
262 return sh;
263 }
264 else // unknown rank
265 {
266 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
267 }
268 }
269 break;
270 default:
271 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
272 }
273 }
274
275 DataTypes::ShapeType
276 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
277 {
278 // This code taken from the Data.cpp swapaxes() method
279 // Some of the checks are probably redundant here
280 int axis0_tmp,axis1_tmp;
281 const DataTypes::ShapeType& s=left->getShape();
282 DataTypes::ShapeType out_shape;
283 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
284 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
285 int rank=left->getRank();
286 if (rank<2) {
287 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
288 }
289 if (axis0<0 || axis0>rank-1) {
290 stringstream e;
291 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
292 throw DataException(e.str());
293 }
294 if (axis1<0 || axis1>rank-1) {
295 stringstream e;
296 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
297 throw DataException(e.str());
298 }
299 if (axis0 == axis1) {
300 throw DataException("Error - Data::swapaxes: axis indices must be different.");
301 }
302 if (axis0 > axis1) {
303 axis0_tmp=axis1;
304 axis1_tmp=axis0;
305 } else {
306 axis0_tmp=axis0;
307 axis1_tmp=axis1;
308 }
309 for (int i=0; i<rank; i++) {
310 if (i == axis0_tmp) {
311 out_shape.push_back(s[axis1_tmp]);
312 } else if (i == axis1_tmp) {
313 out_shape.push_back(s[axis0_tmp]);
314 } else {
315 out_shape.push_back(s[i]);
316 }
317 }
318 return out_shape;
319 }
320
321
322 // determine the output shape for the general tensor product operation
323 // the additional parameters return information required later for the product
324 // the majority of this code is copy pasted from C_General_Tensor_Product
325 DataTypes::ShapeType
326 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
327 {
328
329 // Get rank and shape of inputs
330 int rank0 = left->getRank();
331 int rank1 = right->getRank();
332 const DataTypes::ShapeType& shape0 = left->getShape();
333 const DataTypes::ShapeType& shape1 = right->getShape();
334
335 // Prepare for the loops of the product and verify compatibility of shapes
336 int start0=0, start1=0;
337 if (transpose == 0) {}
338 else if (transpose == 1) { start0 = axis_offset; }
339 else if (transpose == 2) { start1 = rank1-axis_offset; }
340 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
341
342 if (rank0<axis_offset)
343 {
344 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
345 }
346
347 // Adjust the shapes for transpose
348 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
349 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
350 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
351 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
352
353 // Prepare for the loops of the product
354 SL=1, SM=1, SR=1;
355 for (int i=0; i<rank0-axis_offset; i++) {
356 SL *= tmpShape0[i];
357 }
358 for (int i=rank0-axis_offset; i<rank0; i++) {
359 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
360 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
361 }
362 SM *= tmpShape0[i];
363 }
364 for (int i=axis_offset; i<rank1; i++) {
365 SR *= tmpShape1[i];
366 }
367
368 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
369 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
370 { // block to limit the scope of out_index
371 int out_index=0;
372 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
373 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
374 }
375
376 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
377 {
378 ostringstream os;
379 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
380 throw DataException(os.str());
381 }
382
383 return shape2;
384 }
385
386 } // end anonymous namespace
387
388 void DataLazy::LazyNodeSetup()
389 {
390 #ifdef _OPENMP
391 int numthreads=omp_get_max_threads();
392 m_samples_r.resize(numthreads*m_samplesize);
393 m_sampleids=new int[numthreads];
394 for (int i=0;i<numthreads;++i)
395 {
396 m_sampleids[i]=-1;
397 }
398 #else
399 if (m_iscompl)
400 {
401 m_samples_c.resize(m_samplesize);
402 }
403 else
404 {
405 m_samples_r.resize(m_samplesize);
406 }
407 m_sampleids=new int[1];
408 m_sampleids[0]=-1;
409 #endif // _OPENMP
410 }
411
412
413 // Creates an identity node
414 DataLazy::DataLazy(DataAbstract_ptr p)
415 : parent(p->getFunctionSpace(),p->getShape())
416 ,m_sampleids(0),
417 m_samples_r(1)
418 {
419 if (p->isLazy())
420 {
421 // I don't want identity of Lazy.
422 // Question: Why would that be so bad?
423 // Answer: We assume that the child of ID is something we can call getVector on
424 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
425 }
426 else
427 {
428 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
429 makeIdentity(dr);
430 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
431 }
432 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
433 }
434
435 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
436 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
437 m_op(op),
438 m_axis_offset(0),
439 m_transpose(0),
440 m_SL(0), m_SM(0), m_SR(0)
441 {
442 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
443 {
444 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
445 }
446
447 DataLazy_ptr lleft;
448 if (!left->isLazy())
449 {
450 lleft=DataLazy_ptr(new DataLazy(left));
451 }
452 else
453 {
454 lleft=dynamic_pointer_cast<DataLazy>(left);
455 }
456 m_readytype=lleft->m_readytype;
457 m_left=lleft;
458 m_samplesize=getNumDPPSample()*getNoValues();
459 m_children=m_left->m_children+1;
460 m_height=m_left->m_height+1;
461 LazyNodeSetup();
462 SIZELIMIT
463 }
464
465
466 // In this constructor we need to consider interpolation
467 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
468 : parent(resultFS(left,right,op), resultShape(left,right,op)),
469 m_op(op),
470 m_SL(0), m_SM(0), m_SR(0)
471 {
472 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
473 if ((getOpgroup(op)!=G_BINARY))
474 {
475 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
476 }
477
478 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
479 {
480 FunctionSpace fs=getFunctionSpace();
481 Data ltemp(left);
482 Data tmp(ltemp,fs);
483 left=tmp.borrowDataPtr();
484 }
485 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
486 {
487 Data tmp(Data(right),getFunctionSpace());
488 right=tmp.borrowDataPtr();
489 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
490 }
491 left->operandCheck(*right);
492
493 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
494 {
495 m_left=dynamic_pointer_cast<DataLazy>(left);
496 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
497 }
498 else
499 {
500 m_left=DataLazy_ptr(new DataLazy(left));
501 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
502 }
503 if (right->isLazy())
504 {
505 m_right=dynamic_pointer_cast<DataLazy>(right);
506 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
507 }
508 else
509 {
510 m_right=DataLazy_ptr(new DataLazy(right));
511 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
512 }
513 char lt=m_left->m_readytype;
514 char rt=m_right->m_readytype;
515 if (lt=='E' || rt=='E')
516 {
517 m_readytype='E';
518 }
519 else if (lt=='T' || rt=='T')
520 {
521 m_readytype='T';
522 }
523 else
524 {
525 m_readytype='C';
526 }
527 m_samplesize=getNumDPPSample()*getNoValues();
528 m_children=m_left->m_children+m_right->m_children+2;
529 m_height=max(m_left->m_height,m_right->m_height)+1;
530 LazyNodeSetup();
531 SIZELIMIT
532 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
533 }
534
535 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
536 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
537 m_op(op),
538 m_axis_offset(axis_offset),
539 m_transpose(transpose)
540 {
541 if ((getOpgroup(op)!=G_TENSORPROD))
542 {
543 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
544 }
545 if ((transpose>2) || (transpose<0))
546 {
547 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
548 }
549 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
550 {
551 FunctionSpace fs=getFunctionSpace();
552 Data ltemp(left);
553 Data tmp(ltemp,fs);
554 left=tmp.borrowDataPtr();
555 }
556 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
557 {
558 Data tmp(Data(right),getFunctionSpace());
559 right=tmp.borrowDataPtr();
560 }
561 // left->operandCheck(*right);
562
563 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
564 {
565 m_left=dynamic_pointer_cast<DataLazy>(left);
566 }
567 else
568 {
569 m_left=DataLazy_ptr(new DataLazy(left));
570 }
571 if (right->isLazy())
572 {
573 m_right=dynamic_pointer_cast<DataLazy>(right);
574 }
575 else
576 {
577 m_right=DataLazy_ptr(new DataLazy(right));
578 }
579 char lt=m_left->m_readytype;
580 char rt=m_right->m_readytype;
581 if (lt=='E' || rt=='E')
582 {
583 m_readytype='E';
584 }
585 else if (lt=='T' || rt=='T')
586 {
587 m_readytype='T';
588 }
589 else
590 {
591 m_readytype='C';
592 }
593 m_samplesize=getNumDPPSample()*getNoValues();
594 m_children=m_left->m_children+m_right->m_children+2;
595 m_height=max(m_left->m_height,m_right->m_height)+1;
596 LazyNodeSetup();
597 SIZELIMIT
598 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
599 }
600
601
602 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
603 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
604 m_op(op),
605 m_axis_offset(axis_offset),
606 m_transpose(0),
607 m_tol(0)
608 {
609 if ((getOpgroup(op)!=G_NP1OUT_P))
610 {
611 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
612 }
613 DataLazy_ptr lleft;
614 if (!left->isLazy())
615 {
616 lleft=DataLazy_ptr(new DataLazy(left));
617 }
618 else
619 {
620 lleft=dynamic_pointer_cast<DataLazy>(left);
621 }
622 m_readytype=lleft->m_readytype;
623 m_left=lleft;
624 m_samplesize=getNumDPPSample()*getNoValues();
625 m_children=m_left->m_children+1;
626 m_height=m_left->m_height+1;
627 LazyNodeSetup();
628 SIZELIMIT
629 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
630 }
631
632 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
633 : parent(left->getFunctionSpace(), left->getShape()),
634 m_op(op),
635 m_axis_offset(0),
636 m_transpose(0),
637 m_tol(tol)
638 {
639 if ((getOpgroup(op)!=G_UNARY_P))
640 {
641 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
642 }
643 DataLazy_ptr lleft;
644 if (!left->isLazy())
645 {
646 lleft=DataLazy_ptr(new DataLazy(left));
647 }
648 else
649 {
650 lleft=dynamic_pointer_cast<DataLazy>(left);
651 }
652 m_readytype=lleft->m_readytype;
653 m_left=lleft;
654 m_samplesize=getNumDPPSample()*getNoValues();
655 m_children=m_left->m_children+1;
656 m_height=m_left->m_height+1;
657 LazyNodeSetup();
658 SIZELIMIT
659 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
660 }
661
662
663 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
664 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
665 m_op(op),
666 m_axis_offset(axis0),
667 m_transpose(axis1),
668 m_tol(0)
669 {
670 if ((getOpgroup(op)!=G_NP1OUT_2P))
671 {
672 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
673 }
674 DataLazy_ptr lleft;
675 if (!left->isLazy())
676 {
677 lleft=DataLazy_ptr(new DataLazy(left));
678 }
679 else
680 {
681 lleft=dynamic_pointer_cast<DataLazy>(left);
682 }
683 m_readytype=lleft->m_readytype;
684 m_left=lleft;
685 m_samplesize=getNumDPPSample()*getNoValues();
686 m_children=m_left->m_children+1;
687 m_height=m_left->m_height+1;
688 LazyNodeSetup();
689 SIZELIMIT
690 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
691 }
692
693
694 namespace
695 {
696
697 inline int max3(int a, int b, int c)
698 {
699 int t=(a>b?a:b);
700 return (t>c?t:c);
701
702 }
703 }
704
705 DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
706 : parent(left->getFunctionSpace(), left->getShape()),
707 m_op(CONDEVAL),
708 m_axis_offset(0),
709 m_transpose(0),
710 m_tol(0)
711 {
712
713 DataLazy_ptr lmask;
714 DataLazy_ptr lleft;
715 DataLazy_ptr lright;
716 if (!mask->isLazy())
717 {
718 lmask=DataLazy_ptr(new DataLazy(mask));
719 }
720 else
721 {
722 lmask=dynamic_pointer_cast<DataLazy>(mask);
723 }
724 if (!left->isLazy())
725 {
726 lleft=DataLazy_ptr(new DataLazy(left));
727 }
728 else
729 {
730 lleft=dynamic_pointer_cast<DataLazy>(left);
731 }
732 if (!right->isLazy())
733 {
734 lright=DataLazy_ptr(new DataLazy(right));
735 }
736 else
737 {
738 lright=dynamic_pointer_cast<DataLazy>(right);
739 }
740 m_readytype=lmask->m_readytype;
741 if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
742 {
743 throw DataException("Programmer Error - condEval arguments must have the same readytype");
744 }
745 m_left=lleft;
746 m_right=lright;
747 m_mask=lmask;
748 m_samplesize=getNumDPPSample()*getNoValues();
749 m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
750 m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
751 LazyNodeSetup();
752 SIZELIMIT
753 LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
754 }
755
756
757
758 DataLazy::~DataLazy()
759 {
760 delete[] m_sampleids;
761 }
762
763
764 /*
765 \brief Evaluates the expression using methods on Data.
766 This does the work for the collapse method.
767 For reasons of efficiency do not call this method on DataExpanded nodes.
768 */
769 DataReady_ptr
770 DataLazy::collapseToReady() const
771 {
772 if (m_readytype=='E')
773 { // this is more an efficiency concern than anything else
774 throw DataException("Programmer Error - do not use collapse on Expanded data.");
775 }
776 if (m_op==IDENTITY)
777 {
778 return m_id;
779 }
780 DataReady_ptr pleft=m_left->collapseToReady();
781 Data left(pleft);
782 Data right;
783 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
784 {
785 right=Data(m_right->collapseToReady());
786 }
787 Data result;
788 switch(m_op)
789 {
790 case ADD:
791 result=left+right;
792 break;
793 case SUB:
794 result=left-right;
795 break;
796 case MUL:
797 result=left*right;
798 break;
799 case DIV:
800 result=left/right;
801 break;
802 case SIN:
803 result=left.sin();
804 break;
805 case COS:
806 result=left.cos();
807 break;
808 case TAN:
809 result=left.tan();
810 break;
811 case ASIN:
812 result=left.asin();
813 break;
814 case ACOS:
815 result=left.acos();
816 break;
817 case ATAN:
818 result=left.atan();
819 break;
820 case SINH:
821 result=left.sinh();
822 break;
823 case COSH:
824 result=left.cosh();
825 break;
826 case TANH:
827 result=left.tanh();
828 break;
829 case ERF:
830 result=left.erf();
831 break;
832 case ASINH:
833 result=left.asinh();
834 break;
835 case ACOSH:
836 result=left.acosh();
837 break;
838 case ATANH:
839 result=left.atanh();
840 break;
841 case LOG10:
842 result=left.log10();
843 break;
844 case LOG:
845 result=left.log();
846 break;
847 case SIGN:
848 result=left.sign();
849 break;
850 case ABS:
851 result=left.abs();
852 break;
853 case NEG:
854 result=left.neg();
855 break;
856 case POS:
857 // it doesn't mean anything for delayed.
858 // it will just trigger a deep copy of the lazy object
859 throw DataException("Programmer error - POS not supported for lazy data.");
860 break;
861 case EXP:
862 result=left.exp();
863 break;
864 case SQRT:
865 result=left.sqrt();
866 break;
867 case RECIP:
868 result=left.oneOver();
869 break;
870 case GZ:
871 result=left.wherePositive();
872 break;
873 case LZ:
874 result=left.whereNegative();
875 break;
876 case GEZ:
877 result=left.whereNonNegative();
878 break;
879 case LEZ:
880 result=left.whereNonPositive();
881 break;
882 case NEZ:
883 result=left.whereNonZero(m_tol);
884 break;
885 case EZ:
886 result=left.whereZero(m_tol);
887 break;
888 case SYM:
889 result=left.symmetric();
890 break;
891 case NSYM:
892 result=left.antisymmetric();
893 break;
894 case PROD:
895 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
896 break;
897 case TRANS:
898 result=left.transpose(m_axis_offset);
899 break;
900 case TRACE:
901 result=left.trace(m_axis_offset);
902 break;
903 case SWAP:
904 result=left.swapaxes(m_axis_offset, m_transpose);
905 break;
906 case MINVAL:
907 result=left.minval();
908 break;
909 case MAXVAL:
910 result=left.minval();
911 break;
912 case HER:
913 result=left.hermitian();
914 break;
915 default:
916 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
917 }
918 return result.borrowReadyPtr();
919 }
920
921 /*
922 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
923 This method uses the original methods on the Data class to evaluate the expressions.
924 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
925 the purpose of using DataLazy in the first place).
926 */
927 void
928 DataLazy::collapse() const
929 {
930 if (m_op==IDENTITY)
931 {
932 return;
933 }
934 if (m_readytype=='E')
935 { // this is more an efficiency concern than anything else
936 throw DataException("Programmer Error - do not use collapse on Expanded data.");
937 }
938 m_id=collapseToReady();
939 m_op=IDENTITY;
940 }
941
942 // The result will be stored in m_samples
943 // The return value is a pointer to the DataVector, offset is the offset within the return value
944 const DataTypes::RealVectorType*
945 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
946 {
947 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
948 // collapse so we have a 'E' node or an IDENTITY for some other type
949 if (m_readytype!='E' && m_op!=IDENTITY)
950 {
951 collapse();
952 }
953 if (m_op==IDENTITY)
954 {
955 const RealVectorType& vec=m_id->getVectorRO();
956 roffset=m_id->getPointOffset(sampleNo, 0);
957 #ifdef LAZY_STACK_PROF
958 int x;
959 if (&x<stackend[omp_get_thread_num()])
960 {
961 stackend[omp_get_thread_num()]=&x;
962 }
963 #endif
964 return &(vec);
965 }
966 if (m_readytype!='E')
967 {
968 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
969 }
970 if (m_sampleids[tid]==sampleNo)
971 {
972 roffset=tid*m_samplesize;
973 return &(m_samples_r); // sample is already resolved
974 }
975 m_sampleids[tid]=sampleNo;
976
977 switch (getOpgroup(m_op))
978 {
979 case G_UNARY:
980 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
981 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
982 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
983 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
988 default:
989 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
990 }
991 }
992
993 const DataTypes::RealVectorType*
994 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
995 {
996 // we assume that any collapsing has been done before we get here
997 // since we only have one argument we don't need to think about only
998 // processing single points.
999 // we will also know we won't get identity nodes
1000 if (m_readytype!='E')
1001 {
1002 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1003 }
1004 if (m_op==IDENTITY)
1005 {
1006 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1007 }
1008 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1009 const double* left=&((*leftres)[roffset]);
1010 roffset=m_samplesize*tid;
1011 double* result=&(m_samples_r[roffset]);
1012 if (m_op==POS)
1013 {
1014 // this should be prevented earlier
1015 // operation is meaningless for lazy
1016 throw DataException("Programmer error - POS not supported for lazy data.");
1017 }
1018 tensor_unary_array_operation(m_samplesize,
1019 left,
1020 result,
1021 m_op,
1022 m_tol);
1023 return &(m_samples_r);
1024 }
1025
1026
1027 const DataTypes::RealVectorType*
1028 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1029 {
1030 // we assume that any collapsing has been done before we get here
1031 // since we only have one argument we don't need to think about only
1032 // processing single points.
1033 // we will also know we won't get identity nodes
1034 if (m_readytype!='E')
1035 {
1036 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1037 }
1038 if (m_op==IDENTITY)
1039 {
1040 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1041 }
1042 size_t loffset=0;
1043 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1044
1045 roffset=m_samplesize*tid;
1046 unsigned int ndpps=getNumDPPSample();
1047 unsigned int psize=DataTypes::noValues(m_left->getShape());
1048 double* result=&(m_samples_r[roffset]);
1049 switch (m_op)
1050 {
1051 case MINVAL:
1052 {
1053 for (unsigned int z=0;z<ndpps;++z)
1054 {
1055 FMin op;
1056 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1057 loffset+=psize;
1058 result++;
1059 }
1060 }
1061 break;
1062 case MAXVAL:
1063 {
1064 for (unsigned int z=0;z<ndpps;++z)
1065 {
1066 FMax op;
1067 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1068 loffset+=psize;
1069 result++;
1070 }
1071 }
1072 break;
1073 default:
1074 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1075 }
1076 return &(m_samples_r);
1077 }
1078
1079 const DataTypes::RealVectorType*
1080 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1081 {
1082 // we assume that any collapsing has been done before we get here
1083 // since we only have one argument we don't need to think about only
1084 // processing single points.
1085 if (m_readytype!='E')
1086 {
1087 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1088 }
1089 if (m_op==IDENTITY)
1090 {
1091 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1092 }
1093 size_t subroffset;
1094 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1095 roffset=m_samplesize*tid;
1096 size_t loop=0;
1097 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1098 size_t step=getNoValues();
1099 size_t offset=roffset;
1100 switch (m_op)
1101 {
1102 case SYM:
1103 for (loop=0;loop<numsteps;++loop)
1104 {
1105 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1106 subroffset+=step;
1107 offset+=step;
1108 }
1109 break;
1110 case NSYM:
1111 for (loop=0;loop<numsteps;++loop)
1112 {
1113 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1114 subroffset+=step;
1115 offset+=step;
1116 }
1117 break;
1118 default:
1119 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1120 }
1121 return &m_samples_r;
1122 }
1123
1124 const DataTypes::RealVectorType*
1125 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1126 {
1127 // we assume that any collapsing has been done before we get here
1128 // since we only have one argument we don't need to think about only
1129 // processing single points.
1130 if (m_readytype!='E')
1131 {
1132 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1133 }
1134 if (m_op==IDENTITY)
1135 {
1136 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1137 }
1138 size_t subroffset;
1139 size_t offset;
1140 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1141 roffset=m_samplesize*tid;
1142 offset=roffset;
1143 size_t loop=0;
1144 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1145 size_t outstep=getNoValues();
1146 size_t instep=m_left->getNoValues();
1147 switch (m_op)
1148 {
1149 case TRACE:
1150 for (loop=0;loop<numsteps;++loop)
1151 {
1152 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_r ,getShape(),offset,m_axis_offset);
1153 subroffset+=instep;
1154 offset+=outstep;
1155 }
1156 break;
1157 case TRANS:
1158 for (loop=0;loop<numsteps;++loop)
1159 {
1160 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset,m_axis_offset);
1161 subroffset+=instep;
1162 offset+=outstep;
1163 }
1164 break;
1165 default:
1166 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1167 }
1168 return &m_samples_r;
1169 }
1170
1171
1172 const DataTypes::RealVectorType*
1173 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1174 {
1175 if (m_readytype!='E')
1176 {
1177 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1178 }
1179 if (m_op==IDENTITY)
1180 {
1181 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1182 }
1183 size_t subroffset;
1184 size_t offset;
1185 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1186 roffset=m_samplesize*tid;
1187 offset=roffset;
1188 size_t loop=0;
1189 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190 size_t outstep=getNoValues();
1191 size_t instep=m_left->getNoValues();
1192 switch (m_op)
1193 {
1194 case SWAP:
1195 for (loop=0;loop<numsteps;++loop)
1196 {
1197 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset, m_axis_offset, m_transpose);
1198 subroffset+=instep;
1199 offset+=outstep;
1200 }
1201 break;
1202 default:
1203 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1204 }
1205 return &m_samples_r;
1206 }
1207
1208 const DataTypes::RealVectorType*
1209 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1210 {
1211 if (m_readytype!='E')
1212 {
1213 throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1214 }
1215 if (m_op!=CONDEVAL)
1216 {
1217 throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1218 }
1219 size_t subroffset;
1220
1221 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1222 const RealVectorType* srcres=0;
1223 if ((*maskres)[subroffset]>0)
1224 {
1225 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1226 }
1227 else
1228 {
1229 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1230 }
1231
1232 // Now we need to copy the result
1233
1234 roffset=m_samplesize*tid;
1235 for (int i=0;i<m_samplesize;++i)
1236 {
1237 m_samples_r[roffset+i]=(*srcres)[subroffset+i];
1238 }
1239
1240 return &m_samples_r;
1241 }
1242
1243 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1244 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1245 // If both children are expanded, then we can process them in a single operation (we treat
1246 // the whole sample as one big datapoint.
1247 // If one of the children is not expanded, then we need to treat each point in the sample
1248 // individually.
1249 // There is an additional complication when scalar operations are considered.
1250 // For example, 2+Vector.
1251 // In this case each double within the point is treated individually
1252 const DataTypes::RealVectorType*
1253 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1254 {
1255 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1256
1257 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1258 // first work out which of the children are expanded
1259 bool leftExp=(m_left->m_readytype=='E');
1260 bool rightExp=(m_right->m_readytype=='E');
1261 if (!leftExp && !rightExp)
1262 {
1263 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1264 }
1265 bool leftScalar=(m_left->getRank()==0);
1266 bool rightScalar=(m_right->getRank()==0);
1267 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1268 {
1269 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1270 }
1271 size_t leftsize=m_left->getNoValues();
1272 size_t rightsize=m_right->getNoValues();
1273 size_t chunksize=1; // how many doubles will be processed in one go
1274 int leftstep=0; // how far should the left offset advance after each step
1275 int rightstep=0;
1276 int numsteps=0; // total number of steps for the inner loop
1277 int oleftstep=0; // the o variables refer to the outer loop
1278 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1279 int onumsteps=1;
1280
1281 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1282 bool RES=(rightExp && rightScalar);
1283 bool LS=(!leftExp && leftScalar); // left is a single scalar
1284 bool RS=(!rightExp && rightScalar);
1285 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1286 bool RN=(!rightExp && !rightScalar);
1287 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1288 bool REN=(rightExp && !rightScalar);
1289
1290 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1291 {
1292 chunksize=m_left->getNumDPPSample()*leftsize;
1293 leftstep=0;
1294 rightstep=0;
1295 numsteps=1;
1296 }
1297 else if (LES || RES)
1298 {
1299 chunksize=1;
1300 if (LES) // left is an expanded scalar
1301 {
1302 if (RS)
1303 {
1304 leftstep=1;
1305 rightstep=0;
1306 numsteps=m_left->getNumDPPSample();
1307 }
1308 else // RN or REN
1309 {
1310 leftstep=0;
1311 oleftstep=1;
1312 rightstep=1;
1313 orightstep=(RN ? -(int)rightsize : 0);
1314 numsteps=rightsize;
1315 onumsteps=m_left->getNumDPPSample();
1316 }
1317 }
1318 else // right is an expanded scalar
1319 {
1320 if (LS)
1321 {
1322 rightstep=1;
1323 leftstep=0;
1324 numsteps=m_right->getNumDPPSample();
1325 }
1326 else
1327 {
1328 rightstep=0;
1329 orightstep=1;
1330 leftstep=1;
1331 oleftstep=(LN ? -(int)leftsize : 0);
1332 numsteps=leftsize;
1333 onumsteps=m_right->getNumDPPSample();
1334 }
1335 }
1336 }
1337 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1338 {
1339 if (LEN) // and Right will be a single value
1340 {
1341 chunksize=rightsize;
1342 leftstep=rightsize;
1343 rightstep=0;
1344 numsteps=m_left->getNumDPPSample();
1345 if (RS)
1346 {
1347 numsteps*=leftsize;
1348 }
1349 }
1350 else // REN
1351 {
1352 chunksize=leftsize;
1353 rightstep=leftsize;
1354 leftstep=0;
1355 numsteps=m_right->getNumDPPSample();
1356 if (LS)
1357 {
1358 numsteps*=rightsize;
1359 }
1360 }
1361 }
1362
1363 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1364 // Get the values of sub-expressions
1365 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1366 const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1367 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1368 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1369 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1370 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1371 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1372 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1373 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1374
1375 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1376 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1377
1378
1379 roffset=m_samplesize*tid;
1380 double* resultp=&(m_samples_r[roffset]); // results are stored at the vector offset we received
1381 switch(m_op)
1382 {
1383 case ADD:
1384 //PROC_OP(NO_ARG,plus<double>());
1385 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1386 &(*left)[0],
1387 &(*right)[0],
1388 chunksize,
1389 onumsteps,
1390 numsteps,
1391 resultStep,
1392 leftstep,
1393 rightstep,
1394 oleftstep,
1395 orightstep,
1396 lroffset,
1397 rroffset,
1398 escript::ES_optype::ADD);
1399 break;
1400 case SUB:
1401 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1402 &(*left)[0],
1403 &(*right)[0],
1404 chunksize,
1405 onumsteps,
1406 numsteps,
1407 resultStep,
1408 leftstep,
1409 rightstep,
1410 oleftstep,
1411 orightstep,
1412 lroffset,
1413 rroffset,
1414 escript::ES_optype::SUB);
1415 //PROC_OP(NO_ARG,minus<double>());
1416 break;
1417 case MUL:
1418 //PROC_OP(NO_ARG,multiplies<double>());
1419 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1420 &(*left)[0],
1421 &(*right)[0],
1422 chunksize,
1423 onumsteps,
1424 numsteps,
1425 resultStep,
1426 leftstep,
1427 rightstep,
1428 oleftstep,
1429 orightstep,
1430 lroffset,
1431 rroffset,
1432 escript::ES_optype::MUL);
1433 break;
1434 case DIV:
1435 //PROC_OP(NO_ARG,divides<double>());
1436 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1437 &(*left)[0],
1438 &(*right)[0],
1439 chunksize,
1440 onumsteps,
1441 numsteps,
1442 resultStep,
1443 leftstep,
1444 rightstep,
1445 oleftstep,
1446 orightstep,
1447 lroffset,
1448 rroffset,
1449 escript::ES_optype::DIV);
1450 break;
1451 case POW:
1452 //PROC_OP(double (double,double),::pow);
1453 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1454 &(*left)[0],
1455 &(*right)[0],
1456 chunksize,
1457 onumsteps,
1458 numsteps,
1459 resultStep,
1460 leftstep,
1461 rightstep,
1462 oleftstep,
1463 orightstep,
1464 lroffset,
1465 rroffset,
1466 escript::ES_optype::POW);
1467 break;
1468 default:
1469 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1470 }
1471 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_r[roffset] << endl;)
1472 return &m_samples_r;
1473 }
1474
1475
1476 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1477 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1478 // unlike the other resolve helpers, we must treat these datapoints separately.
1479 const DataTypes::RealVectorType*
1480 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1481 {
1482 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1483
1484 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1485 // first work out which of the children are expanded
1486 bool leftExp=(m_left->m_readytype=='E');
1487 bool rightExp=(m_right->m_readytype=='E');
1488 int steps=getNumDPPSample();
1489 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1490 int rightStep=(rightExp?m_right->getNoValues() : 0);
1491
1492 int resultStep=getNoValues();
1493 roffset=m_samplesize*tid;
1494 size_t offset=roffset;
1495
1496 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1497
1498 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1499
1500 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1501 cout << getNoValues() << endl;)
1502
1503
1504 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1505 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1506 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1507 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1508 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1509 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1510 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1511
1512 double* resultp=&(m_samples_r[offset]); // results are stored at the vector offset we received
1513 switch(m_op)
1514 {
1515 case PROD:
1516 for (int i=0;i<steps;++i,resultp+=resultStep)
1517 {
1518 const double *ptr_0 = &((*left)[lroffset]);
1519 const double *ptr_1 = &((*right)[rroffset]);
1520
1521 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1522 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1523
1524 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1525
1526 lroffset+=leftStep;
1527 rroffset+=rightStep;
1528 }
1529 break;
1530 default:
1531 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1532 }
1533 roffset=offset;
1534 return &m_samples_r;
1535 }
1536
1537
1538 const DataTypes::RealVectorType*
1539 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1540 {
1541 #ifdef _OPENMP
1542 int tid=omp_get_thread_num();
1543 #else
1544 int tid=0;
1545 #endif
1546
1547 #ifdef LAZY_STACK_PROF
1548 stackstart[tid]=&tid;
1549 stackend[tid]=&tid;
1550 const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1551 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1552 #pragma omp critical
1553 if (d>maxstackuse)
1554 {
1555 cout << "Max resolve Stack use " << d << endl;
1556 maxstackuse=d;
1557 }
1558 return r;
1559 #else
1560 return resolveNodeSample(tid, sampleNo, roffset);
1561 #endif
1562 }
1563
1564
1565 // This needs to do the work of the identity constructor
1566 void
1567 DataLazy::resolveToIdentity()
1568 {
1569 if (m_op==IDENTITY)
1570 return;
1571 DataReady_ptr p=resolveNodeWorker();
1572 makeIdentity(p);
1573 }
1574
1575 void DataLazy::makeIdentity(const DataReady_ptr& p)
1576 {
1577 m_op=IDENTITY;
1578 m_axis_offset=0;
1579 m_transpose=0;
1580 m_SL=m_SM=m_SR=0;
1581 m_children=m_height=0;
1582 m_id=p;
1583 if(p->isConstant()) {m_readytype='C';}
1584 else if(p->isExpanded()) {m_readytype='E';}
1585 else if (p->isTagged()) {m_readytype='T';}
1586 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1587 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1588 m_left.reset();
1589 m_right.reset();
1590 m_iscompl=p->isComplex();
1591 }
1592
1593
1594 DataReady_ptr
1595 DataLazy::resolve()
1596 {
1597 resolveToIdentity();
1598 return m_id;
1599 }
1600
1601
1602 /* This is really a static method but I think that caused problems in windows */
1603 void
1604 DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1605 {
1606 if (dats.empty())
1607 {
1608 return;
1609 }
1610 vector<DataLazy*> work;
1611 FunctionSpace fs=dats[0]->getFunctionSpace();
1612 bool match=true;
1613 for (int i=dats.size()-1;i>=0;--i)
1614 {
1615 if (dats[i]->m_readytype!='E')
1616 {
1617 dats[i]->collapse();
1618 }
1619 if (dats[i]->m_op!=IDENTITY)
1620 {
1621 work.push_back(dats[i]);
1622 if (fs!=dats[i]->getFunctionSpace())
1623 {
1624 match=false;
1625 }
1626 }
1627 }
1628 if (work.empty())
1629 {
1630 return; // no work to do
1631 }
1632 if (match) // all functionspaces match. Yes I realise this is overly strict
1633 { // it is possible that dats[0] is one of the objects which we discarded and
1634 // all the other functionspaces match.
1635 vector<DataExpanded*> dep;
1636 vector<RealVectorType*> vecs;
1637 for (int i=0;i<work.size();++i)
1638 {
1639 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1640 vecs.push_back(&(dep[i]->getVectorRW()));
1641 }
1642 int totalsamples=work[0]->getNumSamples();
1643 const RealVectorType* res=0; // Storage for answer
1644 int sample;
1645 #pragma omp parallel private(sample, res)
1646 {
1647 size_t roffset=0;
1648 #pragma omp for schedule(static)
1649 for (sample=0;sample<totalsamples;++sample)
1650 {
1651 roffset=0;
1652 int j;
1653 for (j=work.size()-1;j>=0;--j)
1654 {
1655 #ifdef _OPENMP
1656 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1657 #else
1658 res=work[j]->resolveNodeSample(0,sample,roffset);
1659 #endif
1660 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1661 memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1662 }
1663 }
1664 }
1665 // Now we need to load the new results as identity ops into the lazy nodes
1666 for (int i=work.size()-1;i>=0;--i)
1667 {
1668 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1669 }
1670 }
1671 else // functionspaces do not match
1672 {
1673 for (int i=0;i<work.size();++i)
1674 {
1675 work[i]->resolveToIdentity();
1676 }
1677 }
1678 }
1679
1680
1681
1682 // This version of resolve uses storage in each node to hold results
1683 DataReady_ptr
1684 DataLazy::resolveNodeWorker()
1685 {
1686 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1687 {
1688 collapse();
1689 }
1690 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1691 {
1692 return m_id;
1693 }
1694 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1695 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1696 RealVectorType& resvec=result->getVectorRW();
1697 DataReady_ptr resptr=DataReady_ptr(result);
1698
1699 int sample;
1700 int totalsamples=getNumSamples();
1701 const RealVectorType* res=0; // Storage for answer
1702 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1703 #pragma omp parallel private(sample,res)
1704 {
1705 size_t roffset=0;
1706 #ifdef LAZY_STACK_PROF
1707 stackstart[omp_get_thread_num()]=&roffset;
1708 stackend[omp_get_thread_num()]=&roffset;
1709 #endif
1710 #pragma omp for schedule(static)
1711 for (sample=0;sample<totalsamples;++sample)
1712 {
1713 roffset=0;
1714 #ifdef _OPENMP
1715 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1716 #else
1717 res=resolveNodeSample(0,sample,roffset);
1718 #endif
1719 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1720 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1721 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1722 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1723 }
1724 }
1725 #ifdef LAZY_STACK_PROF
1726 for (int i=0;i<getNumberOfThreads();++i)
1727 {
1728 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1729 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1730 if (r>maxstackuse)
1731 {
1732 maxstackuse=r;
1733 }
1734 }
1735 cout << "Max resolve Stack use=" << maxstackuse << endl;
1736 #endif
1737 return resptr;
1738 }
1739
1740 std::string
1741 DataLazy::toString() const
1742 {
1743 ostringstream oss;
1744 oss << "Lazy Data: [depth=" << m_height<< "] ";
1745 switch (escriptParams.getLazyStrFmt())
1746 {
1747 case 1: // tree format
1748 oss << endl;
1749 intoTreeString(oss,"");
1750 break;
1751 case 2: // just the depth
1752 break;
1753 default:
1754 intoString(oss);
1755 break;
1756 }
1757 return oss.str();
1758 }
1759
1760
1761 void
1762 DataLazy::intoString(ostringstream& oss) const
1763 {
1764 // oss << "[" << m_children <<";"<<m_height <<"]";
1765 switch (getOpgroup(m_op))
1766 {
1767 case G_IDENTITY:
1768 if (m_id->isExpanded())
1769 {
1770 oss << "E";
1771 }
1772 else if (m_id->isTagged())
1773 {
1774 oss << "T";
1775 }
1776 else if (m_id->isConstant())
1777 {
1778 oss << "C";
1779 }
1780 else
1781 {
1782 oss << "?";
1783 }
1784 oss << '@' << m_id.get();
1785 break;
1786 case G_BINARY:
1787 oss << '(';
1788 m_left->intoString(oss);
1789 oss << ' ' << opToString(m_op) << ' ';
1790 m_right->intoString(oss);
1791 oss << ')';
1792 break;
1793 case G_UNARY:
1794 case G_UNARY_P:
1795 case G_NP1OUT:
1796 case G_NP1OUT_P:
1797 case G_REDUCTION:
1798 oss << opToString(m_op) << '(';
1799 m_left->intoString(oss);
1800 oss << ')';
1801 break;
1802 case G_TENSORPROD:
1803 oss << opToString(m_op) << '(';
1804 m_left->intoString(oss);
1805 oss << ", ";
1806 m_right->intoString(oss);
1807 oss << ')';
1808 break;
1809 case G_NP1OUT_2P:
1810 oss << opToString(m_op) << '(';
1811 m_left->intoString(oss);
1812 oss << ", " << m_axis_offset << ", " << m_transpose;
1813 oss << ')';
1814 break;
1815 case G_CONDEVAL:
1816 oss << opToString(m_op)<< '(' ;
1817 m_mask->intoString(oss);
1818 oss << " ? ";
1819 m_left->intoString(oss);
1820 oss << " : ";
1821 m_right->intoString(oss);
1822 oss << ')';
1823 break;
1824 default:
1825 oss << "UNKNOWN";
1826 }
1827 }
1828
1829
1830 void
1831 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1832 {
1833 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1834 switch (getOpgroup(m_op))
1835 {
1836 case G_IDENTITY:
1837 if (m_id->isExpanded())
1838 {
1839 oss << "E";
1840 }
1841 else if (m_id->isTagged())
1842 {
1843 oss << "T";
1844 }
1845 else if (m_id->isConstant())
1846 {
1847 oss << "C";
1848 }
1849 else
1850 {
1851 oss << "?";
1852 }
1853 oss << '@' << m_id.get() << endl;
1854 break;
1855 case G_BINARY:
1856 oss << opToString(m_op) << endl;
1857 indent+='.';
1858 m_left->intoTreeString(oss, indent);
1859 m_right->intoTreeString(oss, indent);
1860 break;
1861 case G_UNARY:
1862 case G_UNARY_P:
1863 case G_NP1OUT:
1864 case G_NP1OUT_P:
1865 case G_REDUCTION:
1866 oss << opToString(m_op) << endl;
1867 indent+='.';
1868 m_left->intoTreeString(oss, indent);
1869 break;
1870 case G_TENSORPROD:
1871 oss << opToString(m_op) << endl;
1872 indent+='.';
1873 m_left->intoTreeString(oss, indent);
1874 m_right->intoTreeString(oss, indent);
1875 break;
1876 case G_NP1OUT_2P:
1877 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1878 indent+='.';
1879 m_left->intoTreeString(oss, indent);
1880 break;
1881 default:
1882 oss << "UNKNOWN";
1883 }
1884 }
1885
1886
1887 DataAbstract*
1888 DataLazy::deepCopy() const
1889 {
1890 switch (getOpgroup(m_op))
1891 {
1892 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1893 case G_UNARY:
1894 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1895 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1896 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1897 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1898 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1899 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1900 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1901 default:
1902 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1903 }
1904 }
1905
1906 // For this, we don't care what op you were doing because the answer is now zero
1907 DataAbstract*
1908 DataLazy::zeroedCopy() const
1909 {
1910 return new DataLazy(m_id->zeroedCopy()->getPtr());
1911 }
1912
1913 // There is no single, natural interpretation of getLength on DataLazy.
1914 // Instances of DataReady can look at the size of their vectors.
1915 // For lazy though, it could be the size the data would be if it were resolved;
1916 // or it could be some function of the lengths of the DataReady instances which
1917 // form part of the expression.
1918 // Rather than have people making assumptions, I have disabled the method.
1919 DataTypes::RealVectorType::size_type
1920 DataLazy::getLength() const
1921 {
1922 throw DataException("getLength() does not make sense for lazy data.");
1923 }
1924
1925
1926 DataAbstract*
1927 DataLazy::getSlice(const DataTypes::RegionType& region) const
1928 {
1929 throw DataException("getSlice - not implemented for Lazy objects.");
1930 }
1931
1932
1933 // To do this we need to rely on our child nodes
1934 DataTypes::RealVectorType::size_type
1935 DataLazy::getPointOffset(int sampleNo,
1936 int dataPointNo)
1937 {
1938 if (m_op==IDENTITY)
1939 {
1940 return m_id->getPointOffset(sampleNo,dataPointNo);
1941 }
1942 if (m_readytype!='E')
1943 {
1944 collapse();
1945 return m_id->getPointOffset(sampleNo,dataPointNo);
1946 }
1947 // at this point we do not have an identity node and the expression will be Expanded
1948 // so we only need to know which child to ask
1949 if (m_left->m_readytype=='E')
1950 {
1951 return m_left->getPointOffset(sampleNo,dataPointNo);
1952 }
1953 else
1954 {
1955 return m_right->getPointOffset(sampleNo,dataPointNo);
1956 }
1957 }
1958
1959 // To do this we need to rely on our child nodes
1960 DataTypes::RealVectorType::size_type
1961 DataLazy::getPointOffset(int sampleNo,
1962 int dataPointNo) const
1963 {
1964 if (m_op==IDENTITY)
1965 {
1966 return m_id->getPointOffset(sampleNo,dataPointNo);
1967 }
1968 if (m_readytype=='E')
1969 {
1970 // at this point we do not have an identity node and the expression will be Expanded
1971 // so we only need to know which child to ask
1972 if (m_left->m_readytype=='E')
1973 {
1974 return m_left->getPointOffset(sampleNo,dataPointNo);
1975 }
1976 else
1977 {
1978 return m_right->getPointOffset(sampleNo,dataPointNo);
1979 }
1980 }
1981 if (m_readytype=='C')
1982 {
1983 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1984 }
1985 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1986 }
1987
1988
1989 // I have decided to let Data:: handle this issue.
1990 void
1991 DataLazy::setToZero()
1992 {
1993 // DataTypes::RealVectorType v(getNoValues(),0);
1994 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1995 // m_op=IDENTITY;
1996 // m_right.reset();
1997 // m_left.reset();
1998 // m_readytype='C';
1999 // m_buffsRequired=1;
2000
2001 (void)privdebug; // to stop the compiler complaining about unused privdebug
2002 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2003 }
2004
2005 bool
2006 DataLazy::actsExpanded() const
2007 {
2008 return (m_readytype=='E');
2009 }
2010
2011 } // end namespace
2012

  ViewVC Help
Powered by ViewVC 1.1.26