/[escript]/trunk/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /trunk/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6168 - (show annotations)
Wed Apr 13 03:08:12 2016 UTC (2 years, 4 months ago) by jfenwick
File size: 62400 byte(s)
Making Lazy and the rest of escript use the same operator enumeration


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "DataLazy.h"
18 #include "Data.h"
19 #include "DataTypes.h"
20 #include "EscriptParams.h"
21 #include "FunctionSpace.h"
22 #include "Utils.h"
23 #include "DataVectorOps.h"
24
25 #include <iomanip> // for some fancy formatting in debug
26
27 using namespace escript::DataTypes;
28
29 #define NO_ARG
30
31 // #define LAZYDEBUG(X) if (privdebug){X;}
32 #define LAZYDEBUG(X)
33 namespace
34 {
35 bool privdebug=false;
36
37 #define ENABLEDEBUG privdebug=true;
38 #define DISABLEDEBUG privdebug=false;
39 }
40
41 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42
43 // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44
45
46 #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS()) {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47
48 /*
49 How does DataLazy work?
50 ~~~~~~~~~~~~~~~~~~~~~~~
51
52 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
53 denoted left and right.
54
55 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
56 This means that all "internal" nodes in the structure are instances of DataLazy.
57
58 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59 Note that IDENTITY is not considered a unary operation.
60
61 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62 It must however form a DAG (directed acyclic graph).
63 I will refer to individual DataLazy objects with the structure as nodes.
64
65 Each node also stores:
66 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67 evaluated.
68 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69 evaluate the expression.
70 - m_samplesize ~ the number of doubles stored in a sample.
71
72 When a new node is created, the above values are computed based on the values in the child nodes.
73 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
74
75 The resolve method, which produces a DataReady from a DataLazy, does the following:
76 1) Create a DataReady to hold the new result.
77 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
78 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
79
80 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
81
82 resolveSample returns a Vector* and an offset within that vector where the result is stored.
83 Normally, this would be v, but for identity nodes their internal vector is returned instead.
84
85 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
86
87 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89
90 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91 1) Add to the ES_optype.
92 2) determine what opgroup your operation belongs to (X)
93 3) add a string for the op to the end of ES_opstrings
94 4) increase ES_opcount
95 5) add an entry (X) to opgroups
96 6) add an entry to the switch in collapseToReady
97 7) add an entry to resolveX
98 */
99
100
101 using namespace std;
102 using namespace boost;
103
104 namespace escript
105 {
106
107 namespace
108 {
109
110
111 // enabling this will print out when ever the maximum stacksize used by resolve increases
112 // it assumes _OPENMP is also in use
113 //#define LAZY_STACK_PROF
114
115
116
117 #ifndef _OPENMP
118 #ifdef LAZY_STACK_PROF
119 #undef LAZY_STACK_PROF
120 #endif
121 #endif
122
123
124 #ifdef LAZY_STACK_PROF
125 std::vector<void*> stackstart(getNumberOfThreads());
126 std::vector<void*> stackend(getNumberOfThreads());
127 size_t maxstackuse=0;
128 #endif
129
130
131 // return the FunctionSpace of the result of "left op right"
132 FunctionSpace
133 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
134 {
135 // perhaps this should call interpolate and throw or something?
136 // maybe we need an interpolate node -
137 // that way, if interpolate is required in any other op we can just throw a
138 // programming error exception.
139
140 FunctionSpace l=left->getFunctionSpace();
141 FunctionSpace r=right->getFunctionSpace();
142 if (l!=r)
143 {
144 signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
145 if (res==1)
146 {
147 return l;
148 }
149 if (res==-1)
150 {
151 return r;
152 }
153 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
154 }
155 return l;
156 }
157
158 // return the shape of the result of "left op right"
159 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
160 DataTypes::ShapeType
161 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162 {
163 if (left->getShape()!=right->getShape())
164 {
165 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
166 {
167 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
168 }
169
170 if (left->getRank()==0) // we need to allow scalar * anything
171 {
172 return right->getShape();
173 }
174 if (right->getRank()==0)
175 {
176 return left->getShape();
177 }
178 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
179 }
180 return left->getShape();
181 }
182
183 // return the shape for "op left"
184
185 DataTypes::ShapeType
186 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
187 {
188 switch(op)
189 {
190 case TRANS:
191 { // for the scoping of variables
192 const DataTypes::ShapeType& s=left->getShape();
193 DataTypes::ShapeType sh;
194 int rank=left->getRank();
195 if (axis_offset<0 || axis_offset>rank)
196 {
197 stringstream e;
198 e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
199 throw DataException(e.str());
200 }
201 for (int i=0; i<rank; i++)
202 {
203 int index = (axis_offset+i)%rank;
204 sh.push_back(s[index]); // Append to new shape
205 }
206 return sh;
207 }
208 break;
209 case TRACE:
210 {
211 int rank=left->getRank();
212 if (rank<2)
213 {
214 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
215 }
216 if ((axis_offset>rank-2) || (axis_offset<0))
217 {
218 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
219 }
220 if (rank==2)
221 {
222 return DataTypes::scalarShape;
223 }
224 else if (rank==3)
225 {
226 DataTypes::ShapeType sh;
227 if (axis_offset==0)
228 {
229 sh.push_back(left->getShape()[2]);
230 }
231 else // offset==1
232 {
233 sh.push_back(left->getShape()[0]);
234 }
235 return sh;
236 }
237 else if (rank==4)
238 {
239 DataTypes::ShapeType sh;
240 const DataTypes::ShapeType& s=left->getShape();
241 if (axis_offset==0)
242 {
243 sh.push_back(s[2]);
244 sh.push_back(s[3]);
245 }
246 else if (axis_offset==1)
247 {
248 sh.push_back(s[0]);
249 sh.push_back(s[3]);
250 }
251 else // offset==2
252 {
253 sh.push_back(s[0]);
254 sh.push_back(s[1]);
255 }
256 return sh;
257 }
258 else // unknown rank
259 {
260 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
261 }
262 }
263 break;
264 default:
265 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
266 }
267 }
268
269 DataTypes::ShapeType
270 SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
271 {
272 // This code taken from the Data.cpp swapaxes() method
273 // Some of the checks are probably redundant here
274 int axis0_tmp,axis1_tmp;
275 const DataTypes::ShapeType& s=left->getShape();
276 DataTypes::ShapeType out_shape;
277 // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
278 // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
279 int rank=left->getRank();
280 if (rank<2) {
281 throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
282 }
283 if (axis0<0 || axis0>rank-1) {
284 stringstream e;
285 e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
286 throw DataException(e.str());
287 }
288 if (axis1<0 || axis1>rank-1) {
289 stringstream e;
290 e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
291 throw DataException(e.str());
292 }
293 if (axis0 == axis1) {
294 throw DataException("Error - Data::swapaxes: axis indices must be different.");
295 }
296 if (axis0 > axis1) {
297 axis0_tmp=axis1;
298 axis1_tmp=axis0;
299 } else {
300 axis0_tmp=axis0;
301 axis1_tmp=axis1;
302 }
303 for (int i=0; i<rank; i++) {
304 if (i == axis0_tmp) {
305 out_shape.push_back(s[axis1_tmp]);
306 } else if (i == axis1_tmp) {
307 out_shape.push_back(s[axis0_tmp]);
308 } else {
309 out_shape.push_back(s[i]);
310 }
311 }
312 return out_shape;
313 }
314
315
316 // determine the output shape for the general tensor product operation
317 // the additional parameters return information required later for the product
318 // the majority of this code is copy pasted from C_General_Tensor_Product
319 DataTypes::ShapeType
320 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
321 {
322
323 // Get rank and shape of inputs
324 int rank0 = left->getRank();
325 int rank1 = right->getRank();
326 const DataTypes::ShapeType& shape0 = left->getShape();
327 const DataTypes::ShapeType& shape1 = right->getShape();
328
329 // Prepare for the loops of the product and verify compatibility of shapes
330 int start0=0, start1=0;
331 if (transpose == 0) {}
332 else if (transpose == 1) { start0 = axis_offset; }
333 else if (transpose == 2) { start1 = rank1-axis_offset; }
334 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
335
336 if (rank0<axis_offset)
337 {
338 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
339 }
340
341 // Adjust the shapes for transpose
342 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
343 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
344 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
345 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
346
347 // Prepare for the loops of the product
348 SL=1, SM=1, SR=1;
349 for (int i=0; i<rank0-axis_offset; i++) {
350 SL *= tmpShape0[i];
351 }
352 for (int i=rank0-axis_offset; i<rank0; i++) {
353 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
354 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
355 }
356 SM *= tmpShape0[i];
357 }
358 for (int i=axis_offset; i<rank1; i++) {
359 SR *= tmpShape1[i];
360 }
361
362 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
363 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
364 { // block to limit the scope of out_index
365 int out_index=0;
366 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
367 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
368 }
369
370 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
371 {
372 ostringstream os;
373 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
374 throw DataException(os.str());
375 }
376
377 return shape2;
378 }
379
380 } // end anonymous namespace
381
382 void DataLazy::LazyNodeSetup()
383 {
384 #ifdef _OPENMP
385 int numthreads=omp_get_max_threads();
386 m_samples.resize(numthreads*m_samplesize);
387 m_sampleids=new int[numthreads];
388 for (int i=0;i<numthreads;++i)
389 {
390 m_sampleids[i]=-1;
391 }
392 #else
393 m_samples.resize(m_samplesize);
394 m_sampleids=new int[1];
395 m_sampleids[0]=-1;
396 #endif // _OPENMP
397 }
398
399
400 // Creates an identity node
401 DataLazy::DataLazy(DataAbstract_ptr p)
402 : parent(p->getFunctionSpace(),p->getShape())
403 ,m_sampleids(0),
404 m_samples(1)
405 {
406 if (p->isLazy())
407 {
408 // I don't want identity of Lazy.
409 // Question: Why would that be so bad?
410 // Answer: We assume that the child of ID is something we can call getVector on
411 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
412 }
413 else
414 {
415 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
416 makeIdentity(dr);
417 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
418 }
419 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
420 }
421
422 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
423 : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
424 m_op(op),
425 m_axis_offset(0),
426 m_transpose(0),
427 m_SL(0), m_SM(0), m_SR(0)
428 {
429 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
430 {
431 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
432 }
433
434 DataLazy_ptr lleft;
435 if (!left->isLazy())
436 {
437 lleft=DataLazy_ptr(new DataLazy(left));
438 }
439 else
440 {
441 lleft=dynamic_pointer_cast<DataLazy>(left);
442 }
443 m_readytype=lleft->m_readytype;
444 m_left=lleft;
445 m_samplesize=getNumDPPSample()*getNoValues();
446 m_children=m_left->m_children+1;
447 m_height=m_left->m_height+1;
448 LazyNodeSetup();
449 SIZELIMIT
450 }
451
452
453 // In this constructor we need to consider interpolation
454 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
455 : parent(resultFS(left,right,op), resultShape(left,right,op)),
456 m_op(op),
457 m_SL(0), m_SM(0), m_SR(0)
458 {
459 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
460 if ((getOpgroup(op)!=G_BINARY))
461 {
462 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
463 }
464
465 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
466 {
467 FunctionSpace fs=getFunctionSpace();
468 Data ltemp(left);
469 Data tmp(ltemp,fs);
470 left=tmp.borrowDataPtr();
471 }
472 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
473 {
474 Data tmp(Data(right),getFunctionSpace());
475 right=tmp.borrowDataPtr();
476 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
477 }
478 left->operandCheck(*right);
479
480 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
481 {
482 m_left=dynamic_pointer_cast<DataLazy>(left);
483 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
484 }
485 else
486 {
487 m_left=DataLazy_ptr(new DataLazy(left));
488 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
489 }
490 if (right->isLazy())
491 {
492 m_right=dynamic_pointer_cast<DataLazy>(right);
493 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
494 }
495 else
496 {
497 m_right=DataLazy_ptr(new DataLazy(right));
498 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
499 }
500 char lt=m_left->m_readytype;
501 char rt=m_right->m_readytype;
502 if (lt=='E' || rt=='E')
503 {
504 m_readytype='E';
505 }
506 else if (lt=='T' || rt=='T')
507 {
508 m_readytype='T';
509 }
510 else
511 {
512 m_readytype='C';
513 }
514 m_samplesize=getNumDPPSample()*getNoValues();
515 m_children=m_left->m_children+m_right->m_children+2;
516 m_height=max(m_left->m_height,m_right->m_height)+1;
517 LazyNodeSetup();
518 SIZELIMIT
519 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
520 }
521
522 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
523 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
524 m_op(op),
525 m_axis_offset(axis_offset),
526 m_transpose(transpose)
527 {
528 if ((getOpgroup(op)!=G_TENSORPROD))
529 {
530 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
531 }
532 if ((transpose>2) || (transpose<0))
533 {
534 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
535 }
536 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
537 {
538 FunctionSpace fs=getFunctionSpace();
539 Data ltemp(left);
540 Data tmp(ltemp,fs);
541 left=tmp.borrowDataPtr();
542 }
543 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
544 {
545 Data tmp(Data(right),getFunctionSpace());
546 right=tmp.borrowDataPtr();
547 }
548 // left->operandCheck(*right);
549
550 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
551 {
552 m_left=dynamic_pointer_cast<DataLazy>(left);
553 }
554 else
555 {
556 m_left=DataLazy_ptr(new DataLazy(left));
557 }
558 if (right->isLazy())
559 {
560 m_right=dynamic_pointer_cast<DataLazy>(right);
561 }
562 else
563 {
564 m_right=DataLazy_ptr(new DataLazy(right));
565 }
566 char lt=m_left->m_readytype;
567 char rt=m_right->m_readytype;
568 if (lt=='E' || rt=='E')
569 {
570 m_readytype='E';
571 }
572 else if (lt=='T' || rt=='T')
573 {
574 m_readytype='T';
575 }
576 else
577 {
578 m_readytype='C';
579 }
580 m_samplesize=getNumDPPSample()*getNoValues();
581 m_children=m_left->m_children+m_right->m_children+2;
582 m_height=max(m_left->m_height,m_right->m_height)+1;
583 LazyNodeSetup();
584 SIZELIMIT
585 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
586 }
587
588
589 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
590 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
591 m_op(op),
592 m_axis_offset(axis_offset),
593 m_transpose(0),
594 m_tol(0)
595 {
596 if ((getOpgroup(op)!=G_NP1OUT_P))
597 {
598 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
599 }
600 DataLazy_ptr lleft;
601 if (!left->isLazy())
602 {
603 lleft=DataLazy_ptr(new DataLazy(left));
604 }
605 else
606 {
607 lleft=dynamic_pointer_cast<DataLazy>(left);
608 }
609 m_readytype=lleft->m_readytype;
610 m_left=lleft;
611 m_samplesize=getNumDPPSample()*getNoValues();
612 m_children=m_left->m_children+1;
613 m_height=m_left->m_height+1;
614 LazyNodeSetup();
615 SIZELIMIT
616 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
617 }
618
619 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
620 : parent(left->getFunctionSpace(), left->getShape()),
621 m_op(op),
622 m_axis_offset(0),
623 m_transpose(0),
624 m_tol(tol)
625 {
626 if ((getOpgroup(op)!=G_UNARY_P))
627 {
628 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
629 }
630 DataLazy_ptr lleft;
631 if (!left->isLazy())
632 {
633 lleft=DataLazy_ptr(new DataLazy(left));
634 }
635 else
636 {
637 lleft=dynamic_pointer_cast<DataLazy>(left);
638 }
639 m_readytype=lleft->m_readytype;
640 m_left=lleft;
641 m_samplesize=getNumDPPSample()*getNoValues();
642 m_children=m_left->m_children+1;
643 m_height=m_left->m_height+1;
644 LazyNodeSetup();
645 SIZELIMIT
646 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
647 }
648
649
650 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
651 : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
652 m_op(op),
653 m_axis_offset(axis0),
654 m_transpose(axis1),
655 m_tol(0)
656 {
657 if ((getOpgroup(op)!=G_NP1OUT_2P))
658 {
659 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
660 }
661 DataLazy_ptr lleft;
662 if (!left->isLazy())
663 {
664 lleft=DataLazy_ptr(new DataLazy(left));
665 }
666 else
667 {
668 lleft=dynamic_pointer_cast<DataLazy>(left);
669 }
670 m_readytype=lleft->m_readytype;
671 m_left=lleft;
672 m_samplesize=getNumDPPSample()*getNoValues();
673 m_children=m_left->m_children+1;
674 m_height=m_left->m_height+1;
675 LazyNodeSetup();
676 SIZELIMIT
677 LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
678 }
679
680
681 namespace
682 {
683
684 inline int max3(int a, int b, int c)
685 {
686 int t=(a>b?a:b);
687 return (t>c?t:c);
688
689 }
690 }
691
692 DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
693 : parent(left->getFunctionSpace(), left->getShape()),
694 m_op(CONDEVAL),
695 m_axis_offset(0),
696 m_transpose(0),
697 m_tol(0)
698 {
699
700 DataLazy_ptr lmask;
701 DataLazy_ptr lleft;
702 DataLazy_ptr lright;
703 if (!mask->isLazy())
704 {
705 lmask=DataLazy_ptr(new DataLazy(mask));
706 }
707 else
708 {
709 lmask=dynamic_pointer_cast<DataLazy>(mask);
710 }
711 if (!left->isLazy())
712 {
713 lleft=DataLazy_ptr(new DataLazy(left));
714 }
715 else
716 {
717 lleft=dynamic_pointer_cast<DataLazy>(left);
718 }
719 if (!right->isLazy())
720 {
721 lright=DataLazy_ptr(new DataLazy(right));
722 }
723 else
724 {
725 lright=dynamic_pointer_cast<DataLazy>(right);
726 }
727 m_readytype=lmask->m_readytype;
728 if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
729 {
730 throw DataException("Programmer Error - condEval arguments must have the same readytype");
731 }
732 m_left=lleft;
733 m_right=lright;
734 m_mask=lmask;
735 m_samplesize=getNumDPPSample()*getNoValues();
736 m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
737 m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
738 LazyNodeSetup();
739 SIZELIMIT
740 LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
741 }
742
743
744
745 DataLazy::~DataLazy()
746 {
747 delete[] m_sampleids;
748 }
749
750
751 /*
752 \brief Evaluates the expression using methods on Data.
753 This does the work for the collapse method.
754 For reasons of efficiency do not call this method on DataExpanded nodes.
755 */
756 DataReady_ptr
757 DataLazy::collapseToReady() const
758 {
759 if (m_readytype=='E')
760 { // this is more an efficiency concern than anything else
761 throw DataException("Programmer Error - do not use collapse on Expanded data.");
762 }
763 if (m_op==IDENTITY)
764 {
765 return m_id;
766 }
767 DataReady_ptr pleft=m_left->collapseToReady();
768 Data left(pleft);
769 Data right;
770 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
771 {
772 right=Data(m_right->collapseToReady());
773 }
774 Data result;
775 switch(m_op)
776 {
777 case ADD:
778 result=left+right;
779 break;
780 case SUB:
781 result=left-right;
782 break;
783 case MUL:
784 result=left*right;
785 break;
786 case DIV:
787 result=left/right;
788 break;
789 case SIN:
790 result=left.sin();
791 break;
792 case COS:
793 result=left.cos();
794 break;
795 case TAN:
796 result=left.tan();
797 break;
798 case ASIN:
799 result=left.asin();
800 break;
801 case ACOS:
802 result=left.acos();
803 break;
804 case ATAN:
805 result=left.atan();
806 break;
807 case SINH:
808 result=left.sinh();
809 break;
810 case COSH:
811 result=left.cosh();
812 break;
813 case TANH:
814 result=left.tanh();
815 break;
816 case ERF:
817 result=left.erf();
818 break;
819 case ASINH:
820 result=left.asinh();
821 break;
822 case ACOSH:
823 result=left.acosh();
824 break;
825 case ATANH:
826 result=left.atanh();
827 break;
828 case LOG10:
829 result=left.log10();
830 break;
831 case LOG:
832 result=left.log();
833 break;
834 case SIGN:
835 result=left.sign();
836 break;
837 case ABS:
838 result=left.abs();
839 break;
840 case NEG:
841 result=left.neg();
842 break;
843 case POS:
844 // it doesn't mean anything for delayed.
845 // it will just trigger a deep copy of the lazy object
846 throw DataException("Programmer error - POS not supported for lazy data.");
847 break;
848 case EXP:
849 result=left.exp();
850 break;
851 case SQRT:
852 result=left.sqrt();
853 break;
854 case RECIP:
855 result=left.oneOver();
856 break;
857 case GZ:
858 result=left.wherePositive();
859 break;
860 case LZ:
861 result=left.whereNegative();
862 break;
863 case GEZ:
864 result=left.whereNonNegative();
865 break;
866 case LEZ:
867 result=left.whereNonPositive();
868 break;
869 case NEZ:
870 result=left.whereNonZero(m_tol);
871 break;
872 case EZ:
873 result=left.whereZero(m_tol);
874 break;
875 case SYM:
876 result=left.symmetric();
877 break;
878 case NSYM:
879 result=left.antisymmetric();
880 break;
881 case PROD:
882 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
883 break;
884 case TRANS:
885 result=left.transpose(m_axis_offset);
886 break;
887 case TRACE:
888 result=left.trace(m_axis_offset);
889 break;
890 case SWAP:
891 result=left.swapaxes(m_axis_offset, m_transpose);
892 break;
893 case MINVAL:
894 result=left.minval();
895 break;
896 case MAXVAL:
897 result=left.minval();
898 break;
899 case HER:
900 result=left.hermitian();
901 break;
902 default:
903 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
904 }
905 return result.borrowReadyPtr();
906 }
907
908 /*
909 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
910 This method uses the original methods on the Data class to evaluate the expressions.
911 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
912 the purpose of using DataLazy in the first place).
913 */
914 void
915 DataLazy::collapse() const
916 {
917 if (m_op==IDENTITY)
918 {
919 return;
920 }
921 if (m_readytype=='E')
922 { // this is more an efficiency concern than anything else
923 throw DataException("Programmer Error - do not use collapse on Expanded data.");
924 }
925 m_id=collapseToReady();
926 m_op=IDENTITY;
927 }
928
929 // The result will be stored in m_samples
930 // The return value is a pointer to the DataVector, offset is the offset within the return value
931 const DataTypes::RealVectorType*
932 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
933 {
934 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
935 // collapse so we have a 'E' node or an IDENTITY for some other type
936 if (m_readytype!='E' && m_op!=IDENTITY)
937 {
938 collapse();
939 }
940 if (m_op==IDENTITY)
941 {
942 const RealVectorType& vec=m_id->getVectorRO();
943 roffset=m_id->getPointOffset(sampleNo, 0);
944 #ifdef LAZY_STACK_PROF
945 int x;
946 if (&x<stackend[omp_get_thread_num()])
947 {
948 stackend[omp_get_thread_num()]=&x;
949 }
950 #endif
951 return &(vec);
952 }
953 if (m_readytype!='E')
954 {
955 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
956 }
957 if (m_sampleids[tid]==sampleNo)
958 {
959 roffset=tid*m_samplesize;
960 return &(m_samples); // sample is already resolved
961 }
962 m_sampleids[tid]=sampleNo;
963
964 switch (getOpgroup(m_op))
965 {
966 case G_UNARY:
967 case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
968 case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
969 case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
970 case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
971 case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
972 case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
973 case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
974 case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
975 default:
976 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
977 }
978 }
979
980 const DataTypes::RealVectorType*
981 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
982 {
983 // we assume that any collapsing has been done before we get here
984 // since we only have one argument we don't need to think about only
985 // processing single points.
986 // we will also know we won't get identity nodes
987 if (m_readytype!='E')
988 {
989 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
990 }
991 if (m_op==IDENTITY)
992 {
993 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
994 }
995 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
996 const double* left=&((*leftres)[roffset]);
997 roffset=m_samplesize*tid;
998 double* result=&(m_samples[roffset]);
999 if (m_op==POS)
1000 {
1001 // this should be prevented earlier
1002 // operation is meaningless for lazy
1003 throw DataException("Programmer error - POS not supported for lazy data.");
1004 }
1005 tensor_unary_array_operation(m_samplesize,
1006 left,
1007 result,
1008 m_op,
1009 m_tol);
1010 return &(m_samples);
1011 }
1012
1013
1014 const DataTypes::RealVectorType*
1015 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1016 {
1017 // we assume that any collapsing has been done before we get here
1018 // since we only have one argument we don't need to think about only
1019 // processing single points.
1020 // we will also know we won't get identity nodes
1021 if (m_readytype!='E')
1022 {
1023 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1024 }
1025 if (m_op==IDENTITY)
1026 {
1027 throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1028 }
1029 size_t loffset=0;
1030 const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1031
1032 roffset=m_samplesize*tid;
1033 unsigned int ndpps=getNumDPPSample();
1034 unsigned int psize=DataTypes::noValues(m_left->getShape());
1035 double* result=&(m_samples[roffset]);
1036 switch (m_op)
1037 {
1038 case MINVAL:
1039 {
1040 for (unsigned int z=0;z<ndpps;++z)
1041 {
1042 FMin op;
1043 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1044 loffset+=psize;
1045 result++;
1046 }
1047 }
1048 break;
1049 case MAXVAL:
1050 {
1051 for (unsigned int z=0;z<ndpps;++z)
1052 {
1053 FMax op;
1054 *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1055 loffset+=psize;
1056 result++;
1057 }
1058 }
1059 break;
1060 default:
1061 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1062 }
1063 return &(m_samples);
1064 }
1065
1066 const DataTypes::RealVectorType*
1067 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1068 {
1069 // we assume that any collapsing has been done before we get here
1070 // since we only have one argument we don't need to think about only
1071 // processing single points.
1072 if (m_readytype!='E')
1073 {
1074 throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1075 }
1076 if (m_op==IDENTITY)
1077 {
1078 throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1079 }
1080 size_t subroffset;
1081 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1082 roffset=m_samplesize*tid;
1083 size_t loop=0;
1084 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1085 size_t step=getNoValues();
1086 size_t offset=roffset;
1087 switch (m_op)
1088 {
1089 case SYM:
1090 for (loop=0;loop<numsteps;++loop)
1091 {
1092 escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1093 subroffset+=step;
1094 offset+=step;
1095 }
1096 break;
1097 case NSYM:
1098 for (loop=0;loop<numsteps;++loop)
1099 {
1100 escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1101 subroffset+=step;
1102 offset+=step;
1103 }
1104 break;
1105 default:
1106 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1107 }
1108 return &m_samples;
1109 }
1110
1111 const DataTypes::RealVectorType*
1112 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1113 {
1114 // we assume that any collapsing has been done before we get here
1115 // since we only have one argument we don't need to think about only
1116 // processing single points.
1117 if (m_readytype!='E')
1118 {
1119 throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1120 }
1121 if (m_op==IDENTITY)
1122 {
1123 throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1124 }
1125 size_t subroffset;
1126 size_t offset;
1127 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1128 roffset=m_samplesize*tid;
1129 offset=roffset;
1130 size_t loop=0;
1131 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1132 size_t outstep=getNoValues();
1133 size_t instep=m_left->getNoValues();
1134 switch (m_op)
1135 {
1136 case TRACE:
1137 for (loop=0;loop<numsteps;++loop)
1138 {
1139 escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1140 subroffset+=instep;
1141 offset+=outstep;
1142 }
1143 break;
1144 case TRANS:
1145 for (loop=0;loop<numsteps;++loop)
1146 {
1147 escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1148 subroffset+=instep;
1149 offset+=outstep;
1150 }
1151 break;
1152 default:
1153 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1154 }
1155 return &m_samples;
1156 }
1157
1158
1159 const DataTypes::RealVectorType*
1160 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1161 {
1162 if (m_readytype!='E')
1163 {
1164 throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1165 }
1166 if (m_op==IDENTITY)
1167 {
1168 throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1169 }
1170 size_t subroffset;
1171 size_t offset;
1172 const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1173 roffset=m_samplesize*tid;
1174 offset=roffset;
1175 size_t loop=0;
1176 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1177 size_t outstep=getNoValues();
1178 size_t instep=m_left->getNoValues();
1179 switch (m_op)
1180 {
1181 case SWAP:
1182 for (loop=0;loop<numsteps;++loop)
1183 {
1184 escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1185 subroffset+=instep;
1186 offset+=outstep;
1187 }
1188 break;
1189 default:
1190 throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1191 }
1192 return &m_samples;
1193 }
1194
1195 const DataTypes::RealVectorType*
1196 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1197 {
1198 if (m_readytype!='E')
1199 {
1200 throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1201 }
1202 if (m_op!=CONDEVAL)
1203 {
1204 throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1205 }
1206 size_t subroffset;
1207
1208 const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1209 const RealVectorType* srcres=0;
1210 if ((*maskres)[subroffset]>0)
1211 {
1212 srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1213 }
1214 else
1215 {
1216 srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1217 }
1218
1219 // Now we need to copy the result
1220
1221 roffset=m_samplesize*tid;
1222 for (int i=0;i<m_samplesize;++i)
1223 {
1224 m_samples[roffset+i]=(*srcres)[subroffset+i];
1225 }
1226
1227 return &m_samples;
1228 }
1229
1230 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1231 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1232 // If both children are expanded, then we can process them in a single operation (we treat
1233 // the whole sample as one big datapoint.
1234 // If one of the children is not expanded, then we need to treat each point in the sample
1235 // individually.
1236 // There is an additional complication when scalar operations are considered.
1237 // For example, 2+Vector.
1238 // In this case each double within the point is treated individually
1239 const DataTypes::RealVectorType*
1240 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1241 {
1242 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1243
1244 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1245 // first work out which of the children are expanded
1246 bool leftExp=(m_left->m_readytype=='E');
1247 bool rightExp=(m_right->m_readytype=='E');
1248 if (!leftExp && !rightExp)
1249 {
1250 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1251 }
1252 bool leftScalar=(m_left->getRank()==0);
1253 bool rightScalar=(m_right->getRank()==0);
1254 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1255 {
1256 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1257 }
1258 size_t leftsize=m_left->getNoValues();
1259 size_t rightsize=m_right->getNoValues();
1260 size_t chunksize=1; // how many doubles will be processed in one go
1261 int leftstep=0; // how far should the left offset advance after each step
1262 int rightstep=0;
1263 int numsteps=0; // total number of steps for the inner loop
1264 int oleftstep=0; // the o variables refer to the outer loop
1265 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1266 int onumsteps=1;
1267
1268 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1269 bool RES=(rightExp && rightScalar);
1270 bool LS=(!leftExp && leftScalar); // left is a single scalar
1271 bool RS=(!rightExp && rightScalar);
1272 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1273 bool RN=(!rightExp && !rightScalar);
1274 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1275 bool REN=(rightExp && !rightScalar);
1276
1277 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1278 {
1279 chunksize=m_left->getNumDPPSample()*leftsize;
1280 leftstep=0;
1281 rightstep=0;
1282 numsteps=1;
1283 }
1284 else if (LES || RES)
1285 {
1286 chunksize=1;
1287 if (LES) // left is an expanded scalar
1288 {
1289 if (RS)
1290 {
1291 leftstep=1;
1292 rightstep=0;
1293 numsteps=m_left->getNumDPPSample();
1294 }
1295 else // RN or REN
1296 {
1297 leftstep=0;
1298 oleftstep=1;
1299 rightstep=1;
1300 orightstep=(RN ? -(int)rightsize : 0);
1301 numsteps=rightsize;
1302 onumsteps=m_left->getNumDPPSample();
1303 }
1304 }
1305 else // right is an expanded scalar
1306 {
1307 if (LS)
1308 {
1309 rightstep=1;
1310 leftstep=0;
1311 numsteps=m_right->getNumDPPSample();
1312 }
1313 else
1314 {
1315 rightstep=0;
1316 orightstep=1;
1317 leftstep=1;
1318 oleftstep=(LN ? -(int)leftsize : 0);
1319 numsteps=leftsize;
1320 onumsteps=m_right->getNumDPPSample();
1321 }
1322 }
1323 }
1324 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1325 {
1326 if (LEN) // and Right will be a single value
1327 {
1328 chunksize=rightsize;
1329 leftstep=rightsize;
1330 rightstep=0;
1331 numsteps=m_left->getNumDPPSample();
1332 if (RS)
1333 {
1334 numsteps*=leftsize;
1335 }
1336 }
1337 else // REN
1338 {
1339 chunksize=leftsize;
1340 rightstep=leftsize;
1341 leftstep=0;
1342 numsteps=m_right->getNumDPPSample();
1343 if (LS)
1344 {
1345 numsteps*=rightsize;
1346 }
1347 }
1348 }
1349
1350 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1351 // Get the values of sub-expressions
1352 const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);
1353 const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1354 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1355 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1356 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1357 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1358 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1359 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1360 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1361
1362 LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1363 LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1364
1365
1366 roffset=m_samplesize*tid;
1367 double* resultp=&(m_samples[roffset]); // results are stored at the vector offset we received
1368 switch(m_op)
1369 {
1370 case ADD:
1371 //PROC_OP(NO_ARG,plus<double>());
1372 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1373 &(*left)[0],
1374 &(*right)[0],
1375 chunksize,
1376 onumsteps,
1377 numsteps,
1378 resultStep,
1379 leftstep,
1380 rightstep,
1381 oleftstep,
1382 orightstep,
1383 lroffset,
1384 rroffset,
1385 escript::ES_optype::ADD);
1386 break;
1387 case SUB:
1388 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1389 &(*left)[0],
1390 &(*right)[0],
1391 chunksize,
1392 onumsteps,
1393 numsteps,
1394 resultStep,
1395 leftstep,
1396 rightstep,
1397 oleftstep,
1398 orightstep,
1399 lroffset,
1400 rroffset,
1401 escript::ES_optype::SUB);
1402 //PROC_OP(NO_ARG,minus<double>());
1403 break;
1404 case MUL:
1405 //PROC_OP(NO_ARG,multiplies<double>());
1406 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1407 &(*left)[0],
1408 &(*right)[0],
1409 chunksize,
1410 onumsteps,
1411 numsteps,
1412 resultStep,
1413 leftstep,
1414 rightstep,
1415 oleftstep,
1416 orightstep,
1417 lroffset,
1418 rroffset,
1419 escript::ES_optype::MUL);
1420 break;
1421 case DIV:
1422 //PROC_OP(NO_ARG,divides<double>());
1423 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1424 &(*left)[0],
1425 &(*right)[0],
1426 chunksize,
1427 onumsteps,
1428 numsteps,
1429 resultStep,
1430 leftstep,
1431 rightstep,
1432 oleftstep,
1433 orightstep,
1434 lroffset,
1435 rroffset,
1436 escript::ES_optype::DIV);
1437 break;
1438 case POW:
1439 //PROC_OP(double (double,double),::pow);
1440 escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1441 &(*left)[0],
1442 &(*right)[0],
1443 chunksize,
1444 onumsteps,
1445 numsteps,
1446 resultStep,
1447 leftstep,
1448 rightstep,
1449 oleftstep,
1450 orightstep,
1451 lroffset,
1452 rroffset,
1453 escript::ES_optype::POW);
1454 break;
1455 default:
1456 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1457 }
1458 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1459 return &m_samples;
1460 }
1461
1462
1463 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1464 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1465 // unlike the other resolve helpers, we must treat these datapoints separately.
1466 const DataTypes::RealVectorType*
1467 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1468 {
1469 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1470
1471 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1472 // first work out which of the children are expanded
1473 bool leftExp=(m_left->m_readytype=='E');
1474 bool rightExp=(m_right->m_readytype=='E');
1475 int steps=getNumDPPSample();
1476 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1477 int rightStep=(rightExp?m_right->getNoValues() : 0);
1478
1479 int resultStep=getNoValues();
1480 roffset=m_samplesize*tid;
1481 size_t offset=roffset;
1482
1483 const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1484
1485 const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1486
1487 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1488 cout << getNoValues() << endl;)
1489
1490
1491 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1492 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1493 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1494 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1495 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1496 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1497 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1498
1499 double* resultp=&(m_samples[offset]); // results are stored at the vector offset we received
1500 switch(m_op)
1501 {
1502 case PROD:
1503 for (int i=0;i<steps;++i,resultp+=resultStep)
1504 {
1505 const double *ptr_0 = &((*left)[lroffset]);
1506 const double *ptr_1 = &((*right)[rroffset]);
1507
1508 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1509 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1510
1511 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1512
1513 lroffset+=leftStep;
1514 rroffset+=rightStep;
1515 }
1516 break;
1517 default:
1518 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1519 }
1520 roffset=offset;
1521 return &m_samples;
1522 }
1523
1524
1525 const DataTypes::RealVectorType*
1526 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1527 {
1528 #ifdef _OPENMP
1529 int tid=omp_get_thread_num();
1530 #else
1531 int tid=0;
1532 #endif
1533
1534 #ifdef LAZY_STACK_PROF
1535 stackstart[tid]=&tid;
1536 stackend[tid]=&tid;
1537 const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1538 size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1539 #pragma omp critical
1540 if (d>maxstackuse)
1541 {
1542 cout << "Max resolve Stack use " << d << endl;
1543 maxstackuse=d;
1544 }
1545 return r;
1546 #else
1547 return resolveNodeSample(tid, sampleNo, roffset);
1548 #endif
1549 }
1550
1551
1552 // This needs to do the work of the identity constructor
1553 void
1554 DataLazy::resolveToIdentity()
1555 {
1556 if (m_op==IDENTITY)
1557 return;
1558 DataReady_ptr p=resolveNodeWorker();
1559 makeIdentity(p);
1560 }
1561
1562 void DataLazy::makeIdentity(const DataReady_ptr& p)
1563 {
1564 m_op=IDENTITY;
1565 m_axis_offset=0;
1566 m_transpose=0;
1567 m_SL=m_SM=m_SR=0;
1568 m_children=m_height=0;
1569 m_id=p;
1570 if(p->isConstant()) {m_readytype='C';}
1571 else if(p->isExpanded()) {m_readytype='E';}
1572 else if (p->isTagged()) {m_readytype='T';}
1573 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1574 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1575 m_left.reset();
1576 m_right.reset();
1577 }
1578
1579
1580 DataReady_ptr
1581 DataLazy::resolve()
1582 {
1583 resolveToIdentity();
1584 return m_id;
1585 }
1586
1587
1588 /* This is really a static method but I think that caused problems in windows */
1589 void
1590 DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1591 {
1592 if (dats.empty())
1593 {
1594 return;
1595 }
1596 vector<DataLazy*> work;
1597 FunctionSpace fs=dats[0]->getFunctionSpace();
1598 bool match=true;
1599 for (int i=dats.size()-1;i>=0;--i)
1600 {
1601 if (dats[i]->m_readytype!='E')
1602 {
1603 dats[i]->collapse();
1604 }
1605 if (dats[i]->m_op!=IDENTITY)
1606 {
1607 work.push_back(dats[i]);
1608 if (fs!=dats[i]->getFunctionSpace())
1609 {
1610 match=false;
1611 }
1612 }
1613 }
1614 if (work.empty())
1615 {
1616 return; // no work to do
1617 }
1618 if (match) // all functionspaces match. Yes I realise this is overly strict
1619 { // it is possible that dats[0] is one of the objects which we discarded and
1620 // all the other functionspaces match.
1621 vector<DataExpanded*> dep;
1622 vector<RealVectorType*> vecs;
1623 for (int i=0;i<work.size();++i)
1624 {
1625 dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1626 vecs.push_back(&(dep[i]->getVectorRW()));
1627 }
1628 int totalsamples=work[0]->getNumSamples();
1629 const RealVectorType* res=0; // Storage for answer
1630 int sample;
1631 #pragma omp parallel private(sample, res)
1632 {
1633 size_t roffset=0;
1634 #pragma omp for schedule(static)
1635 for (sample=0;sample<totalsamples;++sample)
1636 {
1637 roffset=0;
1638 int j;
1639 for (j=work.size()-1;j>=0;--j)
1640 {
1641 #ifdef _OPENMP
1642 res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1643 #else
1644 res=work[j]->resolveNodeSample(0,sample,roffset);
1645 #endif
1646 RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1647 memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1648 }
1649 }
1650 }
1651 // Now we need to load the new results as identity ops into the lazy nodes
1652 for (int i=work.size()-1;i>=0;--i)
1653 {
1654 work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1655 }
1656 }
1657 else // functionspaces do not match
1658 {
1659 for (int i=0;i<work.size();++i)
1660 {
1661 work[i]->resolveToIdentity();
1662 }
1663 }
1664 }
1665
1666
1667
1668 // This version of resolve uses storage in each node to hold results
1669 DataReady_ptr
1670 DataLazy::resolveNodeWorker()
1671 {
1672 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1673 {
1674 collapse();
1675 }
1676 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1677 {
1678 return m_id;
1679 }
1680 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1681 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), RealVectorType(getNoValues()));
1682 RealVectorType& resvec=result->getVectorRW();
1683 DataReady_ptr resptr=DataReady_ptr(result);
1684
1685 int sample;
1686 int totalsamples=getNumSamples();
1687 const RealVectorType* res=0; // Storage for answer
1688 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1689 #pragma omp parallel private(sample,res)
1690 {
1691 size_t roffset=0;
1692 #ifdef LAZY_STACK_PROF
1693 stackstart[omp_get_thread_num()]=&roffset;
1694 stackend[omp_get_thread_num()]=&roffset;
1695 #endif
1696 #pragma omp for schedule(static)
1697 for (sample=0;sample<totalsamples;++sample)
1698 {
1699 roffset=0;
1700 #ifdef _OPENMP
1701 res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1702 #else
1703 res=resolveNodeSample(0,sample,roffset);
1704 #endif
1705 LAZYDEBUG(cout << "Sample #" << sample << endl;)
1706 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1707 RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1708 memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1709 }
1710 }
1711 #ifdef LAZY_STACK_PROF
1712 for (int i=0;i<getNumberOfThreads();++i)
1713 {
1714 size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1715 // cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " << r << endl;
1716 if (r>maxstackuse)
1717 {
1718 maxstackuse=r;
1719 }
1720 }
1721 cout << "Max resolve Stack use=" << maxstackuse << endl;
1722 #endif
1723 return resptr;
1724 }
1725
1726 std::string
1727 DataLazy::toString() const
1728 {
1729 ostringstream oss;
1730 oss << "Lazy Data: [depth=" << m_height<< "] ";
1731 switch (escriptParams.getLAZY_STR_FMT())
1732 {
1733 case 1: // tree format
1734 oss << endl;
1735 intoTreeString(oss,"");
1736 break;
1737 case 2: // just the depth
1738 break;
1739 default:
1740 intoString(oss);
1741 break;
1742 }
1743 return oss.str();
1744 }
1745
1746
1747 void
1748 DataLazy::intoString(ostringstream& oss) const
1749 {
1750 // oss << "[" << m_children <<";"<<m_height <<"]";
1751 switch (getOpgroup(m_op))
1752 {
1753 case G_IDENTITY:
1754 if (m_id->isExpanded())
1755 {
1756 oss << "E";
1757 }
1758 else if (m_id->isTagged())
1759 {
1760 oss << "T";
1761 }
1762 else if (m_id->isConstant())
1763 {
1764 oss << "C";
1765 }
1766 else
1767 {
1768 oss << "?";
1769 }
1770 oss << '@' << m_id.get();
1771 break;
1772 case G_BINARY:
1773 oss << '(';
1774 m_left->intoString(oss);
1775 oss << ' ' << opToString(m_op) << ' ';
1776 m_right->intoString(oss);
1777 oss << ')';
1778 break;
1779 case G_UNARY:
1780 case G_UNARY_P:
1781 case G_NP1OUT:
1782 case G_NP1OUT_P:
1783 case G_REDUCTION:
1784 oss << opToString(m_op) << '(';
1785 m_left->intoString(oss);
1786 oss << ')';
1787 break;
1788 case G_TENSORPROD:
1789 oss << opToString(m_op) << '(';
1790 m_left->intoString(oss);
1791 oss << ", ";
1792 m_right->intoString(oss);
1793 oss << ')';
1794 break;
1795 case G_NP1OUT_2P:
1796 oss << opToString(m_op) << '(';
1797 m_left->intoString(oss);
1798 oss << ", " << m_axis_offset << ", " << m_transpose;
1799 oss << ')';
1800 break;
1801 case G_CONDEVAL:
1802 oss << opToString(m_op)<< '(' ;
1803 m_mask->intoString(oss);
1804 oss << " ? ";
1805 m_left->intoString(oss);
1806 oss << " : ";
1807 m_right->intoString(oss);
1808 oss << ')';
1809 break;
1810 default:
1811 oss << "UNKNOWN";
1812 }
1813 }
1814
1815
1816 void
1817 DataLazy::intoTreeString(ostringstream& oss, string indent) const
1818 {
1819 oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1820 switch (getOpgroup(m_op))
1821 {
1822 case G_IDENTITY:
1823 if (m_id->isExpanded())
1824 {
1825 oss << "E";
1826 }
1827 else if (m_id->isTagged())
1828 {
1829 oss << "T";
1830 }
1831 else if (m_id->isConstant())
1832 {
1833 oss << "C";
1834 }
1835 else
1836 {
1837 oss << "?";
1838 }
1839 oss << '@' << m_id.get() << endl;
1840 break;
1841 case G_BINARY:
1842 oss << opToString(m_op) << endl;
1843 indent+='.';
1844 m_left->intoTreeString(oss, indent);
1845 m_right->intoTreeString(oss, indent);
1846 break;
1847 case G_UNARY:
1848 case G_UNARY_P:
1849 case G_NP1OUT:
1850 case G_NP1OUT_P:
1851 case G_REDUCTION:
1852 oss << opToString(m_op) << endl;
1853 indent+='.';
1854 m_left->intoTreeString(oss, indent);
1855 break;
1856 case G_TENSORPROD:
1857 oss << opToString(m_op) << endl;
1858 indent+='.';
1859 m_left->intoTreeString(oss, indent);
1860 m_right->intoTreeString(oss, indent);
1861 break;
1862 case G_NP1OUT_2P:
1863 oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1864 indent+='.';
1865 m_left->intoTreeString(oss, indent);
1866 break;
1867 default:
1868 oss << "UNKNOWN";
1869 }
1870 }
1871
1872
1873 DataAbstract*
1874 DataLazy::deepCopy() const
1875 {
1876 switch (getOpgroup(m_op))
1877 {
1878 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1879 case G_UNARY:
1880 case G_REDUCTION: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1881 case G_UNARY_P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1882 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1883 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1884 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1885 case G_NP1OUT_P: return new DataLazy(m_left->deepCopy()->getPtr(),m_op, m_axis_offset);
1886 case G_NP1OUT_2P: return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1887 default:
1888 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1889 }
1890 }
1891
1892
1893
1894 // There is no single, natural interpretation of getLength on DataLazy.
1895 // Instances of DataReady can look at the size of their vectors.
1896 // For lazy though, it could be the size the data would be if it were resolved;
1897 // or it could be some function of the lengths of the DataReady instances which
1898 // form part of the expression.
1899 // Rather than have people making assumptions, I have disabled the method.
1900 DataTypes::RealVectorType::size_type
1901 DataLazy::getLength() const
1902 {
1903 throw DataException("getLength() does not make sense for lazy data.");
1904 }
1905
1906
1907 DataAbstract*
1908 DataLazy::getSlice(const DataTypes::RegionType& region) const
1909 {
1910 throw DataException("getSlice - not implemented for Lazy objects.");
1911 }
1912
1913
1914 // To do this we need to rely on our child nodes
1915 DataTypes::RealVectorType::size_type
1916 DataLazy::getPointOffset(int sampleNo,
1917 int dataPointNo)
1918 {
1919 if (m_op==IDENTITY)
1920 {
1921 return m_id->getPointOffset(sampleNo,dataPointNo);
1922 }
1923 if (m_readytype!='E')
1924 {
1925 collapse();
1926 return m_id->getPointOffset(sampleNo,dataPointNo);
1927 }
1928 // at this point we do not have an identity node and the expression will be Expanded
1929 // so we only need to know which child to ask
1930 if (m_left->m_readytype=='E')
1931 {
1932 return m_left->getPointOffset(sampleNo,dataPointNo);
1933 }
1934 else
1935 {
1936 return m_right->getPointOffset(sampleNo,dataPointNo);
1937 }
1938 }
1939
1940 // To do this we need to rely on our child nodes
1941 DataTypes::RealVectorType::size_type
1942 DataLazy::getPointOffset(int sampleNo,
1943 int dataPointNo) const
1944 {
1945 if (m_op==IDENTITY)
1946 {
1947 return m_id->getPointOffset(sampleNo,dataPointNo);
1948 }
1949 if (m_readytype=='E')
1950 {
1951 // at this point we do not have an identity node and the expression will be Expanded
1952 // so we only need to know which child to ask
1953 if (m_left->m_readytype=='E')
1954 {
1955 return m_left->getPointOffset(sampleNo,dataPointNo);
1956 }
1957 else
1958 {
1959 return m_right->getPointOffset(sampleNo,dataPointNo);
1960 }
1961 }
1962 if (m_readytype=='C')
1963 {
1964 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1965 }
1966 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1967 }
1968
1969
1970 // I have decided to let Data:: handle this issue.
1971 void
1972 DataLazy::setToZero()
1973 {
1974 // DataTypes::RealVectorType v(getNoValues(),0);
1975 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1976 // m_op=IDENTITY;
1977 // m_right.reset();
1978 // m_left.reset();
1979 // m_readytype='C';
1980 // m_buffsRequired=1;
1981
1982 (void)privdebug; // to stop the compiler complaining about unused privdebug
1983 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1984 }
1985
1986 bool
1987 DataLazy::actsExpanded() const
1988 {
1989 return (m_readytype=='E');
1990 }
1991
1992 } // end namespace
1993

  ViewVC Help
Powered by ViewVC 1.1.26