/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Contents of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2472 - (show annotations)
Thu Jun 11 23:33:47 2009 UTC (10 years ago) by jfenwick
File size: 55932 byte(s)
Added new params to control auto resolving for lazy data
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 #include "EscriptParams.h"
32
33 #include <iomanip> // for some fancy formatting in debug
34
35 // #define LAZYDEBUG(X) if (privdebug){X;}
36 #define LAZYDEBUG(X)
37 namespace
38 {
39 bool privdebug=false;
40
41 #define ENABLEDEBUG privdebug=true;
42 #define DISABLEDEBUG privdebug=false;
43 }
44
45 // #define SIZELIMIT
46 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
47 //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}
48
49 #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
50
51 /*
52 How does DataLazy work?
53 ~~~~~~~~~~~~~~~~~~~~~~~
54
55 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
56 denoted left and right.
57
58 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
59 This means that all "internal" nodes in the structure are instances of DataLazy.
60
61 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
62 Note that IDENITY is not considered a unary operation.
63
64 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
65 It must however form a DAG (directed acyclic graph).
66 I will refer to individual DataLazy objects with the structure as nodes.
67
68 Each node also stores:
69 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70 evaluated.
71 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72 evaluate the expression.
73 - m_samplesize ~ the number of doubles stored in a sample.
74
75 When a new node is created, the above values are computed based on the values in the child nodes.
76 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
77
78 The resolve method, which produces a DataReady from a DataLazy, does the following:
79 1) Create a DataReady to hold the new result.
80 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
81 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
82
83 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
84
85 resolveSample returns a Vector* and an offset within that vector where the result is stored.
86 Normally, this would be v, but for identity nodes their internal vector is returned instead.
87
88 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
89
90 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92
93 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94 1) Add to the ES_optype.
95 2) determine what opgroup your operation belongs to (X)
96 3) add a string for the op to the end of ES_opstrings
97 4) increase ES_opcount
98 5) add an entry (X) to opgroups
99 6) add an entry to the switch in collapseToReady
100 7) add an entry to resolveX
101 */
102
103
104 using namespace std;
105 using namespace boost;
106
107 namespace escript
108 {
109
110 namespace
111 {
112
113 enum ES_opgroup
114 {
115 G_UNKNOWN,
116 G_IDENTITY,
117 G_BINARY, // pointwise operations with two arguments
118 G_UNARY, // pointwise operations with one argument
119 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
120 G_NP1OUT, // non-pointwise op with one output
121 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
122 G_TENSORPROD // general tensor product
123 };
124
125
126
127
128 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
129 "sin","cos","tan",
130 "asin","acos","atan","sinh","cosh","tanh","erf",
131 "asinh","acosh","atanh",
132 "log10","log","sign","abs","neg","pos","exp","sqrt",
133 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
134 "symmetric","nonsymmetric",
135 "prod",
136 "transpose", "trace"};
137 int ES_opcount=40;
138 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
139 G_UNARY,G_UNARY,G_UNARY, //10
140 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
141 G_UNARY,G_UNARY,G_UNARY, // 20
142 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
143 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
144 G_NP1OUT,G_NP1OUT,
145 G_TENSORPROD,
146 G_NP1OUT_P, G_NP1OUT_P};
147 inline
148 ES_opgroup
149 getOpgroup(ES_optype op)
150 {
151 return opgroups[op];
152 }
153
154 // return the FunctionSpace of the result of "left op right"
155 FunctionSpace
156 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
157 {
158 // perhaps this should call interpolate and throw or something?
159 // maybe we need an interpolate node -
160 // that way, if interpolate is required in any other op we can just throw a
161 // programming error exception.
162
163 FunctionSpace l=left->getFunctionSpace();
164 FunctionSpace r=right->getFunctionSpace();
165 if (l!=r)
166 {
167 if (r.probeInterpolation(l))
168 {
169 return l;
170 }
171 if (l.probeInterpolation(r))
172 {
173 return r;
174 }
175 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
176 }
177 return l;
178 }
179
180 // return the shape of the result of "left op right"
181 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
182 DataTypes::ShapeType
183 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
184 {
185 if (left->getShape()!=right->getShape())
186 {
187 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
188 {
189 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
190 }
191 if (left->getRank()==0) // we need to allow scalar * anything
192 {
193 return right->getShape();
194 }
195 if (right->getRank()==0)
196 {
197 return left->getShape();
198 }
199 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
200 }
201 return left->getShape();
202 }
203
204 // return the shape for "op left"
205
206 DataTypes::ShapeType
207 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
208 {
209 switch(op)
210 {
211 case TRANS:
212 { // for the scoping of variables
213 const DataTypes::ShapeType& s=left->getShape();
214 DataTypes::ShapeType sh;
215 int rank=left->getRank();
216 if (axis_offset<0 || axis_offset>rank)
217 {
218 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
219 }
220 for (int i=0; i<rank; i++)
221 {
222 int index = (axis_offset+i)%rank;
223 sh.push_back(s[index]); // Append to new shape
224 }
225 return sh;
226 }
227 break;
228 case TRACE:
229 {
230 int rank=left->getRank();
231 if (rank<2)
232 {
233 throw DataException("Trace can only be computed for objects with rank 2 or greater.");
234 }
235 if ((axis_offset>rank-2) || (axis_offset<0))
236 {
237 throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
238 }
239 if (rank==2)
240 {
241 return DataTypes::scalarShape;
242 }
243 else if (rank==3)
244 {
245 DataTypes::ShapeType sh;
246 if (axis_offset==0)
247 {
248 sh.push_back(left->getShape()[2]);
249 }
250 else // offset==1
251 {
252 sh.push_back(left->getShape()[0]);
253 }
254 return sh;
255 }
256 else if (rank==4)
257 {
258 DataTypes::ShapeType sh;
259 const DataTypes::ShapeType& s=left->getShape();
260 if (axis_offset==0)
261 {
262 sh.push_back(s[2]);
263 sh.push_back(s[3]);
264 }
265 else if (axis_offset==1)
266 {
267 sh.push_back(s[0]);
268 sh.push_back(s[3]);
269 }
270 else // offset==2
271 {
272 sh.push_back(s[0]);
273 sh.push_back(s[1]);
274 }
275 return sh;
276 }
277 else // unknown rank
278 {
279 throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
280 }
281 }
282 break;
283 default:
284 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
285 }
286 }
287
288 // determine the output shape for the general tensor product operation
289 // the additional parameters return information required later for the product
290 // the majority of this code is copy pasted from C_General_Tensor_Product
291 DataTypes::ShapeType
292 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
293 {
294
295 // Get rank and shape of inputs
296 int rank0 = left->getRank();
297 int rank1 = right->getRank();
298 const DataTypes::ShapeType& shape0 = left->getShape();
299 const DataTypes::ShapeType& shape1 = right->getShape();
300
301 // Prepare for the loops of the product and verify compatibility of shapes
302 int start0=0, start1=0;
303 if (transpose == 0) {}
304 else if (transpose == 1) { start0 = axis_offset; }
305 else if (transpose == 2) { start1 = rank1-axis_offset; }
306 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
307
308 if (rank0<axis_offset)
309 {
310 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
311 }
312
313 // Adjust the shapes for transpose
314 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
315 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
316 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
317 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
318
319 // Prepare for the loops of the product
320 SL=1, SM=1, SR=1;
321 for (int i=0; i<rank0-axis_offset; i++) {
322 SL *= tmpShape0[i];
323 }
324 for (int i=rank0-axis_offset; i<rank0; i++) {
325 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
326 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
327 }
328 SM *= tmpShape0[i];
329 }
330 for (int i=axis_offset; i<rank1; i++) {
331 SR *= tmpShape1[i];
332 }
333
334 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
335 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
336 { // block to limit the scope of out_index
337 int out_index=0;
338 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
339 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
340 }
341
342 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
343 {
344 ostringstream os;
345 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
346 throw DataException(os.str());
347 }
348
349 return shape2;
350 }
351
352 // determine the number of samples requires to evaluate an expression combining left and right
353 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
354 // The same goes for G_TENSORPROD
355 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
356 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
357 // multiple times
358 int
359 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
360 {
361 switch(getOpgroup(op))
362 {
363 case G_IDENTITY: return 1;
364 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
365 case G_UNARY:
366 case G_UNARY_P: return max(left->getBuffsRequired(),1);
367 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
368 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
369 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
370 default:
371 throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
372 }
373 }
374
375
376 } // end anonymous namespace
377
378
379
380 // Return a string representing the operation
381 const std::string&
382 opToString(ES_optype op)
383 {
384 if (op<0 || op>=ES_opcount)
385 {
386 op=UNKNOWNOP;
387 }
388 return ES_opstrings[op];
389 }
390
391
392 DataLazy::DataLazy(DataAbstract_ptr p)
393 : parent(p->getFunctionSpace(),p->getShape())
394 {
395 if (p->isLazy())
396 {
397 // I don't want identity of Lazy.
398 // Question: Why would that be so bad?
399 // Answer: We assume that the child of ID is something we can call getVector on
400 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
401 }
402 else
403 {
404 p->makeLazyShared();
405 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
406 makeIdentity(dr);
407 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
408 }
409 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
410 }
411
412
413
414
415 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
416 : parent(left->getFunctionSpace(),left->getShape()),
417 m_op(op),
418 m_axis_offset(0),
419 m_transpose(0),
420 m_SL(0), m_SM(0), m_SR(0)
421 {
422 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
423 {
424 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
425 }
426
427 DataLazy_ptr lleft;
428 if (!left->isLazy())
429 {
430 lleft=DataLazy_ptr(new DataLazy(left));
431 }
432 else
433 {
434 lleft=dynamic_pointer_cast<DataLazy>(left);
435 }
436 m_readytype=lleft->m_readytype;
437 m_left=lleft;
438 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
439 m_samplesize=getNumDPPSample()*getNoValues();
440 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
441 m_children=m_left->m_children+1;
442 m_height=m_left->m_height+1;
443 SIZELIMIT
444 }
445
446
447 // In this constructor we need to consider interpolation
448 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
449 : parent(resultFS(left,right,op), resultShape(left,right,op)),
450 m_op(op),
451 m_SL(0), m_SM(0), m_SR(0)
452 {
453 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
454 if ((getOpgroup(op)!=G_BINARY))
455 {
456 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
457 }
458
459 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
460 {
461 FunctionSpace fs=getFunctionSpace();
462 Data ltemp(left);
463 Data tmp(ltemp,fs);
464 left=tmp.borrowDataPtr();
465 }
466 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
467 {
468 Data tmp(Data(right),getFunctionSpace());
469 right=tmp.borrowDataPtr();
470 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
471 }
472 left->operandCheck(*right);
473
474 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
475 {
476 m_left=dynamic_pointer_cast<DataLazy>(left);
477 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
478 }
479 else
480 {
481 m_left=DataLazy_ptr(new DataLazy(left));
482 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
483 }
484 if (right->isLazy())
485 {
486 m_right=dynamic_pointer_cast<DataLazy>(right);
487 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
488 }
489 else
490 {
491 m_right=DataLazy_ptr(new DataLazy(right));
492 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
493 }
494 char lt=m_left->m_readytype;
495 char rt=m_right->m_readytype;
496 if (lt=='E' || rt=='E')
497 {
498 m_readytype='E';
499 }
500 else if (lt=='T' || rt=='T')
501 {
502 m_readytype='T';
503 }
504 else
505 {
506 m_readytype='C';
507 }
508 m_samplesize=getNumDPPSample()*getNoValues();
509 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
510 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
511 m_children=m_left->m_children+m_right->m_children+2;
512 m_height=max(m_left->m_height,m_right->m_height)+1;
513 SIZELIMIT
514 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
515 }
516
517 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
518 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
519 m_op(op),
520 m_axis_offset(axis_offset),
521 m_transpose(transpose)
522 {
523 if ((getOpgroup(op)!=G_TENSORPROD))
524 {
525 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
526 }
527 if ((transpose>2) || (transpose<0))
528 {
529 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
530 }
531 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
532 {
533 FunctionSpace fs=getFunctionSpace();
534 Data ltemp(left);
535 Data tmp(ltemp,fs);
536 left=tmp.borrowDataPtr();
537 }
538 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
539 {
540 Data tmp(Data(right),getFunctionSpace());
541 right=tmp.borrowDataPtr();
542 }
543 // left->operandCheck(*right);
544
545 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
546 {
547 m_left=dynamic_pointer_cast<DataLazy>(left);
548 }
549 else
550 {
551 m_left=DataLazy_ptr(new DataLazy(left));
552 }
553 if (right->isLazy())
554 {
555 m_right=dynamic_pointer_cast<DataLazy>(right);
556 }
557 else
558 {
559 m_right=DataLazy_ptr(new DataLazy(right));
560 }
561 char lt=m_left->m_readytype;
562 char rt=m_right->m_readytype;
563 if (lt=='E' || rt=='E')
564 {
565 m_readytype='E';
566 }
567 else if (lt=='T' || rt=='T')
568 {
569 m_readytype='T';
570 }
571 else
572 {
573 m_readytype='C';
574 }
575 m_samplesize=getNumDPPSample()*getNoValues();
576 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
577 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
578 m_children=m_left->m_children+m_right->m_children+2;
579 m_height=max(m_left->m_height,m_right->m_height)+1;
580 SIZELIMIT
581 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
582 }
583
584
585 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
586 : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
587 m_op(op),
588 m_axis_offset(axis_offset),
589 m_transpose(0),
590 m_tol(0)
591 {
592 if ((getOpgroup(op)!=G_NP1OUT_P))
593 {
594 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
595 }
596 DataLazy_ptr lleft;
597 if (!left->isLazy())
598 {
599 lleft=DataLazy_ptr(new DataLazy(left));
600 }
601 else
602 {
603 lleft=dynamic_pointer_cast<DataLazy>(left);
604 }
605 m_readytype=lleft->m_readytype;
606 m_left=lleft;
607 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
608 m_samplesize=getNumDPPSample()*getNoValues();
609 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
610 m_children=m_left->m_children+1;
611 m_height=m_left->m_height+1;
612 SIZELIMIT
613 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
614 }
615
616 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
617 : parent(left->getFunctionSpace(), left->getShape()),
618 m_op(op),
619 m_axis_offset(0),
620 m_transpose(0),
621 m_tol(tol)
622 {
623 if ((getOpgroup(op)!=G_UNARY_P))
624 {
625 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
626 }
627 DataLazy_ptr lleft;
628 if (!left->isLazy())
629 {
630 lleft=DataLazy_ptr(new DataLazy(left));
631 }
632 else
633 {
634 lleft=dynamic_pointer_cast<DataLazy>(left);
635 }
636 m_readytype=lleft->m_readytype;
637 m_left=lleft;
638 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
639 m_samplesize=getNumDPPSample()*getNoValues();
640 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
641 m_children=m_left->m_children+1;
642 m_height=m_left->m_height+1;
643 SIZELIMIT
644 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
645 }
646
647 DataLazy::~DataLazy()
648 {
649 }
650
651
652 int
653 DataLazy::getBuffsRequired() const
654 {
655 return m_buffsRequired;
656 }
657
658
659 size_t
660 DataLazy::getMaxSampleSize() const
661 {
662 return m_maxsamplesize;
663 }
664
665
666
667 size_t
668 DataLazy::getSampleBufferSize() const
669 {
670 return m_maxsamplesize*(max(1,m_buffsRequired));
671 }
672
673 /*
674 \brief Evaluates the expression using methods on Data.
675 This does the work for the collapse method.
676 For reasons of efficiency do not call this method on DataExpanded nodes.
677 */
678 DataReady_ptr
679 DataLazy::collapseToReady()
680 {
681 if (m_readytype=='E')
682 { // this is more an efficiency concern than anything else
683 throw DataException("Programmer Error - do not use collapse on Expanded data.");
684 }
685 if (m_op==IDENTITY)
686 {
687 return m_id;
688 }
689 DataReady_ptr pleft=m_left->collapseToReady();
690 Data left(pleft);
691 Data right;
692 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
693 {
694 right=Data(m_right->collapseToReady());
695 }
696 Data result;
697 switch(m_op)
698 {
699 case ADD:
700 result=left+right;
701 break;
702 case SUB:
703 result=left-right;
704 break;
705 case MUL:
706 result=left*right;
707 break;
708 case DIV:
709 result=left/right;
710 break;
711 case SIN:
712 result=left.sin();
713 break;
714 case COS:
715 result=left.cos();
716 break;
717 case TAN:
718 result=left.tan();
719 break;
720 case ASIN:
721 result=left.asin();
722 break;
723 case ACOS:
724 result=left.acos();
725 break;
726 case ATAN:
727 result=left.atan();
728 break;
729 case SINH:
730 result=left.sinh();
731 break;
732 case COSH:
733 result=left.cosh();
734 break;
735 case TANH:
736 result=left.tanh();
737 break;
738 case ERF:
739 result=left.erf();
740 break;
741 case ASINH:
742 result=left.asinh();
743 break;
744 case ACOSH:
745 result=left.acosh();
746 break;
747 case ATANH:
748 result=left.atanh();
749 break;
750 case LOG10:
751 result=left.log10();
752 break;
753 case LOG:
754 result=left.log();
755 break;
756 case SIGN:
757 result=left.sign();
758 break;
759 case ABS:
760 result=left.abs();
761 break;
762 case NEG:
763 result=left.neg();
764 break;
765 case POS:
766 // it doesn't mean anything for delayed.
767 // it will just trigger a deep copy of the lazy object
768 throw DataException("Programmer error - POS not supported for lazy data.");
769 break;
770 case EXP:
771 result=left.exp();
772 break;
773 case SQRT:
774 result=left.sqrt();
775 break;
776 case RECIP:
777 result=left.oneOver();
778 break;
779 case GZ:
780 result=left.wherePositive();
781 break;
782 case LZ:
783 result=left.whereNegative();
784 break;
785 case GEZ:
786 result=left.whereNonNegative();
787 break;
788 case LEZ:
789 result=left.whereNonPositive();
790 break;
791 case NEZ:
792 result=left.whereNonZero(m_tol);
793 break;
794 case EZ:
795 result=left.whereZero(m_tol);
796 break;
797 case SYM:
798 result=left.symmetric();
799 break;
800 case NSYM:
801 result=left.nonsymmetric();
802 break;
803 case PROD:
804 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
805 break;
806 case TRANS:
807 result=left.transpose(m_axis_offset);
808 break;
809 case TRACE:
810 result=left.trace(m_axis_offset);
811 break;
812 default:
813 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
814 }
815 return result.borrowReadyPtr();
816 }
817
818 /*
819 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
820 This method uses the original methods on the Data class to evaluate the expressions.
821 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
822 the purpose of using DataLazy in the first place).
823 */
824 void
825 DataLazy::collapse()
826 {
827 if (m_op==IDENTITY)
828 {
829 return;
830 }
831 if (m_readytype=='E')
832 { // this is more an efficiency concern than anything else
833 throw DataException("Programmer Error - do not use collapse on Expanded data.");
834 }
835 m_id=collapseToReady();
836 m_op=IDENTITY;
837 }
838
839 /*
840 \brief Compute the value of the expression (unary operation) for the given sample.
841 \return Vector which stores the value of the subexpression for the given sample.
842 \param v A vector to store intermediate results.
843 \param offset Index in v to begin storing results.
844 \param sampleNo Sample number to evaluate.
845 \param roffset (output parameter) the offset in the return vector where the result begins.
846
847 The return value will be an existing vector so do not deallocate it.
848 If the result is stored in v it should be stored at the offset given.
849 Everything from offset to the end of v should be considered available for this method to use.
850 */
851 DataTypes::ValueType*
852 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
853 {
854 // we assume that any collapsing has been done before we get here
855 // since we only have one argument we don't need to think about only
856 // processing single points.
857 if (m_readytype!='E')
858 {
859 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
860 }
861 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
862 const double* left=&((*vleft)[roffset]);
863 double* result=&(v[offset]);
864 roffset=offset;
865 switch (m_op)
866 {
867 case SIN:
868 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
869 break;
870 case COS:
871 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
872 break;
873 case TAN:
874 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
875 break;
876 case ASIN:
877 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
878 break;
879 case ACOS:
880 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
881 break;
882 case ATAN:
883 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
884 break;
885 case SINH:
886 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
887 break;
888 case COSH:
889 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
890 break;
891 case TANH:
892 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
893 break;
894 case ERF:
895 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
896 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
897 #else
898 tensor_unary_operation(m_samplesize, left, result, ::erf);
899 break;
900 #endif
901 case ASINH:
902 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
903 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
904 #else
905 tensor_unary_operation(m_samplesize, left, result, ::asinh);
906 #endif
907 break;
908 case ACOSH:
909 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
910 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
911 #else
912 tensor_unary_operation(m_samplesize, left, result, ::acosh);
913 #endif
914 break;
915 case ATANH:
916 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
917 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
918 #else
919 tensor_unary_operation(m_samplesize, left, result, ::atanh);
920 #endif
921 break;
922 case LOG10:
923 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
924 break;
925 case LOG:
926 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
927 break;
928 case SIGN:
929 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
930 break;
931 case ABS:
932 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
933 break;
934 case NEG:
935 tensor_unary_operation(m_samplesize, left, result, negate<double>());
936 break;
937 case POS:
938 // it doesn't mean anything for delayed.
939 // it will just trigger a deep copy of the lazy object
940 throw DataException("Programmer error - POS not supported for lazy data.");
941 break;
942 case EXP:
943 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
944 break;
945 case SQRT:
946 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
947 break;
948 case RECIP:
949 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
950 break;
951 case GZ:
952 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
953 break;
954 case LZ:
955 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
956 break;
957 case GEZ:
958 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
959 break;
960 case LEZ:
961 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
962 break;
963 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
964 case NEZ:
965 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
966 break;
967 case EZ:
968 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
969 break;
970
971 default:
972 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
973 }
974 return &v;
975 }
976
977
978
979
980
981
982 /*
983 \brief Compute the value of the expression (unary operation) for the given sample.
984 \return Vector which stores the value of the subexpression for the given sample.
985 \param v A vector to store intermediate results.
986 \param offset Index in v to begin storing results.
987 \param sampleNo Sample number to evaluate.
988 \param roffset (output parameter) the offset in the return vector where the result begins.
989
990 The return value will be an existing vector so do not deallocate it.
991 If the result is stored in v it should be stored at the offset given.
992 Everything from offset to the end of v should be considered available for this method to use.
993 */
994 DataTypes::ValueType*
995 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
996 {
997 // we assume that any collapsing has been done before we get here
998 // since we only have one argument we don't need to think about only
999 // processing single points.
1000 if (m_readytype!='E')
1001 {
1002 throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1003 }
1004 // since we can't write the result over the input, we need a result offset further along
1005 size_t subroffset=roffset+m_samplesize;
1006 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1007 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1008 roffset=offset;
1009 size_t loop=0;
1010 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1011 size_t step=getNoValues();
1012 switch (m_op)
1013 {
1014 case SYM:
1015 for (loop=0;loop<numsteps;++loop)
1016 {
1017 DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1018 subroffset+=step;
1019 offset+=step;
1020 }
1021 break;
1022 case NSYM:
1023 for (loop=0;loop<numsteps;++loop)
1024 {
1025 DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1026 subroffset+=step;
1027 offset+=step;
1028 }
1029 break;
1030 default:
1031 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1032 }
1033 return &v;
1034 }
1035
1036 /*
1037 \brief Compute the value of the expression (unary operation) for the given sample.
1038 \return Vector which stores the value of the subexpression for the given sample.
1039 \param v A vector to store intermediate results.
1040 \param offset Index in v to begin storing results.
1041 \param sampleNo Sample number to evaluate.
1042 \param roffset (output parameter) the offset in the return vector where the result begins.
1043
1044 The return value will be an existing vector so do not deallocate it.
1045 If the result is stored in v it should be stored at the offset given.
1046 Everything from offset to the end of v should be considered available for this method to use.
1047 */
1048 DataTypes::ValueType*
1049 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1050 {
1051 // we assume that any collapsing has been done before we get here
1052 // since we only have one argument we don't need to think about only
1053 // processing single points.
1054 if (m_readytype!='E')
1055 {
1056 throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1057 }
1058 // since we can't write the result over the input, we need a result offset further along
1059 size_t subroffset;
1060 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1061 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1062 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1063 roffset=offset;
1064 size_t loop=0;
1065 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1066 size_t outstep=getNoValues();
1067 size_t instep=m_left->getNoValues();
1068 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1069 switch (m_op)
1070 {
1071 case TRACE:
1072 for (loop=0;loop<numsteps;++loop)
1073 {
1074 size_t zz=sampleNo*getNumDPPSample()+loop;
1075 if (zz==5800)
1076 {
1077 LAZYDEBUG(cerr << "point=" << zz<< endl;)
1078 LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1079 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1080 LAZYDEBUG(cerr << subroffset << endl;)
1081 LAZYDEBUG(cerr << "output=" << offset << endl;)
1082 }
1083 DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1084 if (zz==5800)
1085 {
1086 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1087 }
1088 subroffset+=instep;
1089 offset+=outstep;
1090 }
1091 break;
1092 case TRANS:
1093 for (loop=0;loop<numsteps;++loop)
1094 {
1095 DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1096 subroffset+=instep;
1097 offset+=outstep;
1098 }
1099 break;
1100 default:
1101 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1102 }
1103 return &v;
1104 }
1105
1106
1107 #define PROC_OP(TYPE,X) \
1108 for (int j=0;j<onumsteps;++j)\
1109 {\
1110 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1111 { \
1112 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1113 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1114 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1115 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1116 lroffset+=leftstep; \
1117 rroffset+=rightstep; \
1118 }\
1119 lroffset+=oleftstep;\
1120 rroffset+=orightstep;\
1121 }
1122
1123 /*
1124 \brief Compute the value of the expression (binary operation) for the given sample.
1125 \return Vector which stores the value of the subexpression for the given sample.
1126 \param v A vector to store intermediate results.
1127 \param offset Index in v to begin storing results.
1128 \param sampleNo Sample number to evaluate.
1129 \param roffset (output parameter) the offset in the return vector where the result begins.
1130
1131 The return value will be an existing vector so do not deallocate it.
1132 If the result is stored in v it should be stored at the offset given.
1133 Everything from offset to the end of v should be considered available for this method to use.
1134 */
1135 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1136 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1137 // If both children are expanded, then we can process them in a single operation (we treat
1138 // the whole sample as one big datapoint.
1139 // If one of the children is not expanded, then we need to treat each point in the sample
1140 // individually.
1141 // There is an additional complication when scalar operations are considered.
1142 // For example, 2+Vector.
1143 // In this case each double within the point is treated individually
1144 DataTypes::ValueType*
1145 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1146 {
1147 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1148
1149 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1150 // first work out which of the children are expanded
1151 bool leftExp=(m_left->m_readytype=='E');
1152 bool rightExp=(m_right->m_readytype=='E');
1153 if (!leftExp && !rightExp)
1154 {
1155 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1156 }
1157 bool leftScalar=(m_left->getRank()==0);
1158 bool rightScalar=(m_right->getRank()==0);
1159 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1160 {
1161 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1162 }
1163 size_t leftsize=m_left->getNoValues();
1164 size_t rightsize=m_right->getNoValues();
1165 size_t chunksize=1; // how many doubles will be processed in one go
1166 int leftstep=0; // how far should the left offset advance after each step
1167 int rightstep=0;
1168 int numsteps=0; // total number of steps for the inner loop
1169 int oleftstep=0; // the o variables refer to the outer loop
1170 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1171 int onumsteps=1;
1172
1173 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1174 bool RES=(rightExp && rightScalar);
1175 bool LS=(!leftExp && leftScalar); // left is a single scalar
1176 bool RS=(!rightExp && rightScalar);
1177 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1178 bool RN=(!rightExp && !rightScalar);
1179 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1180 bool REN=(rightExp && !rightScalar);
1181
1182 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1183 {
1184 chunksize=m_left->getNumDPPSample()*leftsize;
1185 leftstep=0;
1186 rightstep=0;
1187 numsteps=1;
1188 }
1189 else if (LES || RES)
1190 {
1191 chunksize=1;
1192 if (LES) // left is an expanded scalar
1193 {
1194 if (RS)
1195 {
1196 leftstep=1;
1197 rightstep=0;
1198 numsteps=m_left->getNumDPPSample();
1199 }
1200 else // RN or REN
1201 {
1202 leftstep=0;
1203 oleftstep=1;
1204 rightstep=1;
1205 orightstep=(RN ? -(int)rightsize : 0);
1206 numsteps=rightsize;
1207 onumsteps=m_left->getNumDPPSample();
1208 }
1209 }
1210 else // right is an expanded scalar
1211 {
1212 if (LS)
1213 {
1214 rightstep=1;
1215 leftstep=0;
1216 numsteps=m_right->getNumDPPSample();
1217 }
1218 else
1219 {
1220 rightstep=0;
1221 orightstep=1;
1222 leftstep=1;
1223 oleftstep=(LN ? -(int)leftsize : 0);
1224 numsteps=leftsize;
1225 onumsteps=m_right->getNumDPPSample();
1226 }
1227 }
1228 }
1229 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1230 {
1231 if (LEN) // and Right will be a single value
1232 {
1233 chunksize=rightsize;
1234 leftstep=rightsize;
1235 rightstep=0;
1236 numsteps=m_left->getNumDPPSample();
1237 if (RS)
1238 {
1239 numsteps*=leftsize;
1240 }
1241 }
1242 else // REN
1243 {
1244 chunksize=leftsize;
1245 rightstep=leftsize;
1246 leftstep=0;
1247 numsteps=m_right->getNumDPPSample();
1248 if (LS)
1249 {
1250 numsteps*=rightsize;
1251 }
1252 }
1253 }
1254
1255 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1256 // Get the values of sub-expressions
1257 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1258 // calcBufss for why we can't put offset as the 2nd param above
1259 const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1260 // the right child starts further along.
1261 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1262 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1263 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1264 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1265 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1266 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1267 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1268
1269
1270 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1271 switch(m_op)
1272 {
1273 case ADD:
1274 PROC_OP(NO_ARG,plus<double>());
1275 break;
1276 case SUB:
1277 PROC_OP(NO_ARG,minus<double>());
1278 break;
1279 case MUL:
1280 PROC_OP(NO_ARG,multiplies<double>());
1281 break;
1282 case DIV:
1283 PROC_OP(NO_ARG,divides<double>());
1284 break;
1285 case POW:
1286 PROC_OP(double (double,double),::pow);
1287 break;
1288 default:
1289 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1290 }
1291 roffset=offset;
1292 return &v;
1293 }
1294
1295
1296
1297 /*
1298 \brief Compute the value of the expression (tensor product) for the given sample.
1299 \return Vector which stores the value of the subexpression for the given sample.
1300 \param v A vector to store intermediate results.
1301 \param offset Index in v to begin storing results.
1302 \param sampleNo Sample number to evaluate.
1303 \param roffset (output parameter) the offset in the return vector where the result begins.
1304
1305 The return value will be an existing vector so do not deallocate it.
1306 If the result is stored in v it should be stored at the offset given.
1307 Everything from offset to the end of v should be considered available for this method to use.
1308 */
1309 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1310 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1311 // unlike the other resolve helpers, we must treat these datapoints separately.
1312 DataTypes::ValueType*
1313 DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1314 {
1315 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1316
1317 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1318 // first work out which of the children are expanded
1319 bool leftExp=(m_left->m_readytype=='E');
1320 bool rightExp=(m_right->m_readytype=='E');
1321 int steps=getNumDPPSample();
1322 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1323 int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1324 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1325 int rightStep=(rightExp?m_right->getNoValues() : 0);
1326
1327 int resultStep=getNoValues();
1328 // Get the values of sub-expressions (leave a gap of one sample for the result).
1329 int gap=offset+m_samplesize;
1330
1331 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1332
1333 const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1334 gap+=m_left->getMaxSampleSize();
1335
1336
1337 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1338
1339
1340 const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1341
1342 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1343 cout << getNoValues() << endl;)
1344 LAZYDEBUG(cerr << "Result of left=";)
1345 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1346
1347 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1348 {
1349 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1350 if (i%4==0) cout << endl;
1351 })
1352 LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1353 LAZYDEBUG(
1354 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1355 {
1356 cerr << "[" << setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1357 if (i%4==0) cout << endl;
1358 }
1359 cerr << endl;
1360 )
1361 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1362 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1363 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1364 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1365 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1366 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1367 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1368
1369 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1370 switch(m_op)
1371 {
1372 case PROD:
1373 for (int i=0;i<steps;++i,resultp+=resultStep)
1374 {
1375
1376 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1377 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1378 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1379
1380 const double *ptr_0 = &((*left)[lroffset]);
1381 const double *ptr_1 = &((*right)[rroffset]);
1382
1383 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1384 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1385
1386 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1387
1388 LAZYDEBUG(cout << "Results--\n";
1389 {
1390 DataVector dv(getNoValues());
1391 for (int z=0;z<getNoValues();++z)
1392 {
1393 cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1394 if (z%4==0) cout << endl;
1395 dv[z]=resultp[z];
1396 }
1397 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1398 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1399 }
1400 )
1401 lroffset+=leftStep;
1402 rroffset+=rightStep;
1403 }
1404 break;
1405 default:
1406 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1407 }
1408 roffset=offset;
1409 return &v;
1410 }
1411
1412
1413
1414 /*
1415 \brief Compute the value of the expression for the given sample.
1416 \return Vector which stores the value of the subexpression for the given sample.
1417 \param v A vector to store intermediate results.
1418 \param offset Index in v to begin storing results.
1419 \param sampleNo Sample number to evaluate.
1420 \param roffset (output parameter) the offset in the return vector where the result begins.
1421
1422 The return value will be an existing vector so do not deallocate it.
1423 */
1424 // the vector and the offset are a place where the method could write its data if it wishes
1425 // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1426 // Hence the return value to indicate where the data is actually stored.
1427 // Regardless, the storage should be assumed to be used, even if it isn't.
1428
1429 // the roffset is the offset within the returned vector where the data begins
1430 const DataTypes::ValueType*
1431 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1432 {
1433 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1434 // collapse so we have a 'E' node or an IDENTITY for some other type
1435 if (m_readytype!='E' && m_op!=IDENTITY)
1436 {
1437 collapse();
1438 }
1439 if (m_op==IDENTITY)
1440 {
1441 const ValueType& vec=m_id->getVectorRO();
1442 if (m_readytype=='C')
1443 {
1444 roffset=0;
1445 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1446 return &(vec);
1447 }
1448 roffset=m_id->getPointOffset(sampleNo, 0);
1449 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1450 return &(vec);
1451 }
1452 if (m_readytype!='E')
1453 {
1454 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1455 }
1456 switch (getOpgroup(m_op))
1457 {
1458 case G_UNARY:
1459 case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1460 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1461 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1462 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1463 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1464 default:
1465 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1466 }
1467
1468 }
1469
1470 const DataTypes::ValueType*
1471 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
1472 {
1473 #ifdef _OPENMP
1474 int tid=omp_get_thread_num();
1475 #else
1476 int tid=0;
1477 #endif
1478 return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
1479 }
1480
1481
1482 // This needs to do the work of the idenity constructor
1483 void
1484 DataLazy::resolveToIdentity()
1485 {
1486 if (m_op==IDENTITY)
1487 return;
1488 DataReady_ptr p=resolve();
1489 makeIdentity(p);
1490 }
1491
1492 void DataLazy::makeIdentity(const DataReady_ptr& p)
1493 {
1494 m_op=IDENTITY;
1495 m_axis_offset=0;
1496 m_transpose=0;
1497 m_SL=m_SM=m_SR=0;
1498 m_children=m_height=0;
1499 m_id=p;
1500 if(p->isConstant()) {m_readytype='C';}
1501 else if(p->isExpanded()) {m_readytype='E';}
1502 else if (p->isTagged()) {m_readytype='T';}
1503 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1504 m_buffsRequired=1;
1505 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1506 m_maxsamplesize=m_samplesize;
1507 m_left.reset();
1508 m_right.reset();
1509 }
1510
1511 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1512 // Each sample is evaluated independently and copied into the result DataExpanded.
1513 DataReady_ptr
1514 DataLazy::resolve()
1515 {
1516
1517 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1518 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1519 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1520 {
1521 collapse();
1522 }
1523 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1524 {
1525 return m_id;
1526 }
1527 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1528 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1529 // storage to evaluate its expression
1530 int numthreads=1;
1531 #ifdef _OPENMP
1532 numthreads=omp_get_max_threads();
1533 #endif
1534 ValueType v(numthreads*threadbuffersize);
1535 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1536 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1537 ValueType& resvec=result->getVectorRW();
1538 DataReady_ptr resptr=DataReady_ptr(result);
1539 int sample;
1540 size_t outoffset; // offset in the output data
1541 int totalsamples=getNumSamples();
1542 const ValueType* res=0; // Vector storing the answer
1543 size_t resoffset=0; // where in the vector to find the answer
1544 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1545 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1546 for (sample=0;sample<totalsamples;++sample)
1547 {
1548 if (sample==0) {ENABLEDEBUG}
1549
1550 // if (sample==5800/getNumDPPSample()) {ENABLEDEBUG}
1551 LAZYDEBUG(cout << "################################# " << sample << endl;)
1552 #ifdef _OPENMP
1553 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1554 #else
1555 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1556 #endif
1557 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1558 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1559 outoffset=result->getPointOffset(sample,0);
1560 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1561 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1562 {
1563 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1564 resvec[outoffset]=(*res)[resoffset];
1565 }
1566 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1567 LAZYDEBUG(cerr << "*********************************" << endl;)
1568 DISABLEDEBUG
1569 }
1570 return resptr;
1571 }
1572
1573 std::string
1574 DataLazy::toString() const
1575 {
1576 ostringstream oss;
1577 oss << "Lazy Data:";
1578 intoString(oss);
1579 return oss.str();
1580 }
1581
1582
1583 void
1584 DataLazy::intoString(ostringstream& oss) const
1585 {
1586 // oss << "[" << m_children <<";"<<m_height <<"]";
1587 switch (getOpgroup(m_op))
1588 {
1589 case G_IDENTITY:
1590 if (m_id->isExpanded())
1591 {
1592 oss << "E";
1593 }
1594 else if (m_id->isTagged())
1595 {
1596 oss << "T";
1597 }
1598 else if (m_id->isConstant())
1599 {
1600 oss << "C";
1601 }
1602 else
1603 {
1604 oss << "?";
1605 }
1606 oss << '@' << m_id.get();
1607 break;
1608 case G_BINARY:
1609 oss << '(';
1610 m_left->intoString(oss);
1611 oss << ' ' << opToString(m_op) << ' ';
1612 m_right->intoString(oss);
1613 oss << ')';
1614 break;
1615 case G_UNARY:
1616 case G_UNARY_P:
1617 case G_NP1OUT:
1618 case G_NP1OUT_P:
1619 oss << opToString(m_op) << '(';
1620 m_left->intoString(oss);
1621 oss << ')';
1622 break;
1623 case G_TENSORPROD:
1624 oss << opToString(m_op) << '(';
1625 m_left->intoString(oss);
1626 oss << ", ";
1627 m_right->intoString(oss);
1628 oss << ')';
1629 break;
1630 default:
1631 oss << "UNKNOWN";
1632 }
1633 }
1634
1635 DataAbstract*
1636 DataLazy::deepCopy()
1637 {
1638 switch (getOpgroup(m_op))
1639 {
1640 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1641 case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1642 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1643 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1644 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1645 default:
1646 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1647 }
1648 }
1649
1650
1651 // There is no single, natural interpretation of getLength on DataLazy.
1652 // Instances of DataReady can look at the size of their vectors.
1653 // For lazy though, it could be the size the data would be if it were resolved;
1654 // or it could be some function of the lengths of the DataReady instances which
1655 // form part of the expression.
1656 // Rather than have people making assumptions, I have disabled the method.
1657 DataTypes::ValueType::size_type
1658 DataLazy::getLength() const
1659 {
1660 throw DataException("getLength() does not make sense for lazy data.");
1661 }
1662
1663
1664 DataAbstract*
1665 DataLazy::getSlice(const DataTypes::RegionType& region) const
1666 {
1667 throw DataException("getSlice - not implemented for Lazy objects.");
1668 }
1669
1670
1671 // To do this we need to rely on our child nodes
1672 DataTypes::ValueType::size_type
1673 DataLazy::getPointOffset(int sampleNo,
1674 int dataPointNo)
1675 {
1676 if (m_op==IDENTITY)
1677 {
1678 return m_id->getPointOffset(sampleNo,dataPointNo);
1679 }
1680 if (m_readytype!='E')
1681 {
1682 collapse();
1683 return m_id->getPointOffset(sampleNo,dataPointNo);
1684 }
1685 // at this point we do not have an identity node and the expression will be Expanded
1686 // so we only need to know which child to ask
1687 if (m_left->m_readytype=='E')
1688 {
1689 return m_left->getPointOffset(sampleNo,dataPointNo);
1690 }
1691 else
1692 {
1693 return m_right->getPointOffset(sampleNo,dataPointNo);
1694 }
1695 }
1696
1697 // To do this we need to rely on our child nodes
1698 DataTypes::ValueType::size_type
1699 DataLazy::getPointOffset(int sampleNo,
1700 int dataPointNo) const
1701 {
1702 if (m_op==IDENTITY)
1703 {
1704 return m_id->getPointOffset(sampleNo,dataPointNo);
1705 }
1706 if (m_readytype=='E')
1707 {
1708 // at this point we do not have an identity node and the expression will be Expanded
1709 // so we only need to know which child to ask
1710 if (m_left->m_readytype=='E')
1711 {
1712 return m_left->getPointOffset(sampleNo,dataPointNo);
1713 }
1714 else
1715 {
1716 return m_right->getPointOffset(sampleNo,dataPointNo);
1717 }
1718 }
1719 if (m_readytype=='C')
1720 {
1721 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1722 }
1723 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1724 }
1725
1726
1727 // I have decided to let Data:: handle this issue.
1728 void
1729 DataLazy::setToZero()
1730 {
1731 // DataTypes::ValueType v(getNoValues(),0);
1732 // m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1733 // m_op=IDENTITY;
1734 // m_right.reset();
1735 // m_left.reset();
1736 // m_readytype='C';
1737 // m_buffsRequired=1;
1738
1739 throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1740
1741 }
1742
1743 bool
1744 DataLazy::actsExpanded() const
1745 {
1746 return (m_readytype=='E');
1747 }
1748
1749 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26