/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Contents of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2171 - (show annotations)
Wed Dec 17 08:21:29 2008 UTC (10 years, 6 months ago) by phornby
File size: 51182 byte(s)
Unitary minus applied to unsigned returns unsigned error fixed
lines 1124 & 1142. Probably not what was intended......
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 // #define LAZYDEBUG(X) if (privdebug){X;}
32 #define LAZYDEBUG(X)
33 namespace
34 {
35 bool privdebug=false;
36
37 #define ENABLEDEBUG privdebug=true;
38 #define DISABLEDEBUG privdebug=false;
39 }
40
41 /*
42 How does DataLazy work?
43 ~~~~~~~~~~~~~~~~~~~~~~~
44
45 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
46 denoted left and right.
47
48 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
49 This means that all "internal" nodes in the structure are instances of DataLazy.
50
51 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
52 Note that IDENITY is not considered a unary operation.
53
54 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
55 It must however form a DAG (directed acyclic graph).
56 I will refer to individual DataLazy objects with the structure as nodes.
57
58 Each node also stores:
59 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
60 evaluated.
61 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
62 evaluate the expression.
63 - m_samplesize ~ the number of doubles stored in a sample.
64
65 When a new node is created, the above values are computed based on the values in the child nodes.
66 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
67
68 The resolve method, which produces a DataReady from a DataLazy, does the following:
69 1) Create a DataReady to hold the new result.
70 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
71 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
72
73 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
74
75 resolveSample returns a Vector* and an offset within that vector where the result is stored.
76 Normally, this would be v, but for identity nodes their internal vector is returned instead.
77
78 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
79
80 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
81 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
82
83 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
84 1) Add to the ES_optype.
85 2) determine what opgroup your operation belongs to (X)
86 3) add a string for the op to the end of ES_opstrings
87 4) increase ES_opcount
88 5) add an entry (X) to opgroups
89 6) add an entry to the switch in collapseToReady
90 7) add an entry to resolveX
91 */
92
93
94 using namespace std;
95 using namespace boost;
96
97 namespace escript
98 {
99
100 namespace
101 {
102
103 enum ES_opgroup
104 {
105 G_UNKNOWN,
106 G_IDENTITY,
107 G_BINARY, // pointwise operations with two arguments
108 G_UNARY, // pointwise operations with one argument
109 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
110 G_NP1OUT, // non-pointwise op with one output
111 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
112 G_TENSORPROD // general tensor product
113 };
114
115
116
117
118 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
119 "sin","cos","tan",
120 "asin","acos","atan","sinh","cosh","tanh","erf",
121 "asinh","acosh","atanh",
122 "log10","log","sign","abs","neg","pos","exp","sqrt",
123 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
124 "symmetric","nonsymmetric",
125 "prod",
126 "transpose", "trace"};
127 int ES_opcount=40;
128 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
129 G_UNARY,G_UNARY,G_UNARY, //10
130 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
131 G_UNARY,G_UNARY,G_UNARY, // 20
132 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
133 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
134 G_NP1OUT,G_NP1OUT,
135 G_TENSORPROD,
136 G_NP1OUT_P, G_NP1OUT_P};
137 inline
138 ES_opgroup
139 getOpgroup(ES_optype op)
140 {
141 return opgroups[op];
142 }
143
144 // return the FunctionSpace of the result of "left op right"
145 FunctionSpace
146 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
147 {
148 // perhaps this should call interpolate and throw or something?
149 // maybe we need an interpolate node -
150 // that way, if interpolate is required in any other op we can just throw a
151 // programming error exception.
152
153 FunctionSpace l=left->getFunctionSpace();
154 FunctionSpace r=right->getFunctionSpace();
155 if (l!=r)
156 {
157 if (r.probeInterpolation(l))
158 {
159 return l;
160 }
161 if (l.probeInterpolation(r))
162 {
163 return r;
164 }
165 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
166 }
167 return l;
168 }
169
170 // return the shape of the result of "left op right"
171 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
172 DataTypes::ShapeType
173 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
174 {
175 if (left->getShape()!=right->getShape())
176 {
177 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
178 {
179 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
180 }
181 if (left->getRank()==0) // we need to allow scalar * anything
182 {
183 return right->getShape();
184 }
185 if (right->getRank()==0)
186 {
187 return left->getShape();
188 }
189 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
190 }
191 return left->getShape();
192 }
193
194 // return the shape for "op left"
195
196 DataTypes::ShapeType
197 resultShape(DataAbstract_ptr left, ES_optype op)
198 {
199 switch(op)
200 {
201 case TRANS:
202 return left->getShape();
203 break;
204 case TRACE:
205 return DataTypes::scalarShape;
206 break;
207 default:
208 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
209 }
210 }
211
212 // determine the output shape for the general tensor product operation
213 // the additional parameters return information required later for the product
214 // the majority of this code is copy pasted from C_General_Tensor_Product
215 DataTypes::ShapeType
216 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
217 {
218
219 // Get rank and shape of inputs
220 int rank0 = left->getRank();
221 int rank1 = right->getRank();
222 const DataTypes::ShapeType& shape0 = left->getShape();
223 const DataTypes::ShapeType& shape1 = right->getShape();
224
225 // Prepare for the loops of the product and verify compatibility of shapes
226 int start0=0, start1=0;
227 if (transpose == 0) {}
228 else if (transpose == 1) { start0 = axis_offset; }
229 else if (transpose == 2) { start1 = rank1-axis_offset; }
230 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
231
232 if (rank0<axis_offset)
233 {
234 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
235 }
236
237 // Adjust the shapes for transpose
238 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
239 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
240 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
241 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
242
243 // Prepare for the loops of the product
244 SL=1, SM=1, SR=1;
245 for (int i=0; i<rank0-axis_offset; i++) {
246 SL *= tmpShape0[i];
247 }
248 for (int i=rank0-axis_offset; i<rank0; i++) {
249 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
250 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
251 }
252 SM *= tmpShape0[i];
253 }
254 for (int i=axis_offset; i<rank1; i++) {
255 SR *= tmpShape1[i];
256 }
257
258 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
259 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
260 { // block to limit the scope of out_index
261 int out_index=0;
262 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
263 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
264 }
265
266 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
267 {
268 ostringstream os;
269 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
270 throw DataException(os.str());
271 }
272
273 return shape2;
274 }
275
276
277 // determine the number of points in the result of "left op right"
278 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
279 // size_t
280 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
281 // {
282 // switch (getOpgroup(op))
283 // {
284 // case G_BINARY: return left->getLength();
285 // case G_UNARY: return left->getLength();
286 // case G_NP1OUT: return left->getLength();
287 // default:
288 // throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
289 // }
290 // }
291
292 // determine the number of samples requires to evaluate an expression combining left and right
293 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294 // The same goes for G_TENSORPROD
295 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297 // multiple times
298 int
299 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300 {
301 switch(getOpgroup(op))
302 {
303 case G_IDENTITY: return 1;
304 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305 case G_UNARY:
306 case G_UNARY_P: return max(left->getBuffsRequired(),1);
307 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
308 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
309 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
310 default:
311 throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
312 }
313 }
314
315
316 } // end anonymous namespace
317
318
319
320 // Return a string representing the operation
321 const std::string&
322 opToString(ES_optype op)
323 {
324 if (op<0 || op>=ES_opcount)
325 {
326 op=UNKNOWNOP;
327 }
328 return ES_opstrings[op];
329 }
330
331
332 DataLazy::DataLazy(DataAbstract_ptr p)
333 : parent(p->getFunctionSpace(),p->getShape()),
334 m_op(IDENTITY),
335 m_axis_offset(0),
336 m_transpose(0),
337 m_SL(0), m_SM(0), m_SR(0)
338 {
339 if (p->isLazy())
340 {
341 // I don't want identity of Lazy.
342 // Question: Why would that be so bad?
343 // Answer: We assume that the child of ID is something we can call getVector on
344 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
345 }
346 else
347 {
348 m_id=dynamic_pointer_cast<DataReady>(p);
349 if(p->isConstant()) {m_readytype='C';}
350 else if(p->isExpanded()) {m_readytype='E';}
351 else if (p->isTagged()) {m_readytype='T';}
352 else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
353 }
354 m_buffsRequired=1;
355 m_samplesize=getNumDPPSample()*getNoValues();
356 m_maxsamplesize=m_samplesize;
357 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
358 }
359
360
361
362
363 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
364 : parent(left->getFunctionSpace(),left->getShape()),
365 m_op(op),
366 m_axis_offset(0),
367 m_transpose(0),
368 m_SL(0), m_SM(0), m_SR(0)
369 {
370 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
371 {
372 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
373 }
374
375 DataLazy_ptr lleft;
376 if (!left->isLazy())
377 {
378 lleft=DataLazy_ptr(new DataLazy(left));
379 }
380 else
381 {
382 lleft=dynamic_pointer_cast<DataLazy>(left);
383 }
384 m_readytype=lleft->m_readytype;
385 m_left=lleft;
386 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
387 m_samplesize=getNumDPPSample()*getNoValues();
388 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
389 }
390
391
392 // In this constructor we need to consider interpolation
393 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
394 : parent(resultFS(left,right,op), resultShape(left,right,op)),
395 m_op(op),
396 m_SL(0), m_SM(0), m_SR(0)
397 {
398 if ((getOpgroup(op)!=G_BINARY))
399 {
400 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
401 }
402
403 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
404 {
405 FunctionSpace fs=getFunctionSpace();
406 Data ltemp(left);
407 Data tmp(ltemp,fs);
408 left=tmp.borrowDataPtr();
409 }
410 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
411 {
412 Data tmp(Data(right),getFunctionSpace());
413 right=tmp.borrowDataPtr();
414 }
415 left->operandCheck(*right);
416
417 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
418 {
419 m_left=dynamic_pointer_cast<DataLazy>(left);
420 }
421 else
422 {
423 m_left=DataLazy_ptr(new DataLazy(left));
424 }
425 if (right->isLazy())
426 {
427 m_right=dynamic_pointer_cast<DataLazy>(right);
428 }
429 else
430 {
431 m_right=DataLazy_ptr(new DataLazy(right));
432 }
433 char lt=m_left->m_readytype;
434 char rt=m_right->m_readytype;
435 if (lt=='E' || rt=='E')
436 {
437 m_readytype='E';
438 }
439 else if (lt=='T' || rt=='T')
440 {
441 m_readytype='T';
442 }
443 else
444 {
445 m_readytype='C';
446 }
447 m_samplesize=getNumDPPSample()*getNoValues();
448 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
449 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
450 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
451 }
452
453 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
454 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
455 m_op(op),
456 m_axis_offset(axis_offset),
457 m_transpose(transpose)
458 {
459 if ((getOpgroup(op)!=G_TENSORPROD))
460 {
461 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
462 }
463 if ((transpose>2) || (transpose<0))
464 {
465 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
466 }
467 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
468 {
469 FunctionSpace fs=getFunctionSpace();
470 Data ltemp(left);
471 Data tmp(ltemp,fs);
472 left=tmp.borrowDataPtr();
473 }
474 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
475 {
476 Data tmp(Data(right),getFunctionSpace());
477 right=tmp.borrowDataPtr();
478 }
479 left->operandCheck(*right);
480
481 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
482 {
483 m_left=dynamic_pointer_cast<DataLazy>(left);
484 }
485 else
486 {
487 m_left=DataLazy_ptr(new DataLazy(left));
488 }
489 if (right->isLazy())
490 {
491 m_right=dynamic_pointer_cast<DataLazy>(right);
492 }
493 else
494 {
495 m_right=DataLazy_ptr(new DataLazy(right));
496 }
497 char lt=m_left->m_readytype;
498 char rt=m_right->m_readytype;
499 if (lt=='E' || rt=='E')
500 {
501 m_readytype='E';
502 }
503 else if (lt=='T' || rt=='T')
504 {
505 m_readytype='T';
506 }
507 else
508 {
509 m_readytype='C';
510 }
511 m_samplesize=getNumDPPSample()*getNoValues();
512 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
513 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
514 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
515 }
516
517
518 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
519 : parent(left->getFunctionSpace(), resultShape(left,op)),
520 m_op(op),
521 m_axis_offset(axis_offset),
522 m_transpose(0),
523 m_tol(0)
524 {
525 if ((getOpgroup(op)!=G_NP1OUT_P))
526 {
527 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
528 }
529 DataLazy_ptr lleft;
530 if (!left->isLazy())
531 {
532 lleft=DataLazy_ptr(new DataLazy(left));
533 }
534 else
535 {
536 lleft=dynamic_pointer_cast<DataLazy>(left);
537 }
538 m_readytype=lleft->m_readytype;
539 m_left=lleft;
540 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
541 m_samplesize=getNumDPPSample()*getNoValues();
542 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
543 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
544 }
545
546 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
547 : parent(left->getFunctionSpace(), left->getShape()),
548 m_op(op),
549 m_axis_offset(0),
550 m_transpose(0),
551 m_tol(tol)
552 {
553 if ((getOpgroup(op)!=G_UNARY_P))
554 {
555 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
556 }
557 DataLazy_ptr lleft;
558 if (!left->isLazy())
559 {
560 lleft=DataLazy_ptr(new DataLazy(left));
561 }
562 else
563 {
564 lleft=dynamic_pointer_cast<DataLazy>(left);
565 }
566 m_readytype=lleft->m_readytype;
567 m_left=lleft;
568 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
569 m_samplesize=getNumDPPSample()*getNoValues();
570 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
571 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572 }
573
574 DataLazy::~DataLazy()
575 {
576 }
577
578
579 int
580 DataLazy::getBuffsRequired() const
581 {
582 return m_buffsRequired;
583 }
584
585
586 size_t
587 DataLazy::getMaxSampleSize() const
588 {
589 return m_maxsamplesize;
590 }
591
592 /*
593 \brief Evaluates the expression using methods on Data.
594 This does the work for the collapse method.
595 For reasons of efficiency do not call this method on DataExpanded nodes.
596 */
597 DataReady_ptr
598 DataLazy::collapseToReady()
599 {
600 if (m_readytype=='E')
601 { // this is more an efficiency concern than anything else
602 throw DataException("Programmer Error - do not use collapse on Expanded data.");
603 }
604 if (m_op==IDENTITY)
605 {
606 return m_id;
607 }
608 DataReady_ptr pleft=m_left->collapseToReady();
609 Data left(pleft);
610 Data right;
611 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612 {
613 right=Data(m_right->collapseToReady());
614 }
615 Data result;
616 switch(m_op)
617 {
618 case ADD:
619 result=left+right;
620 break;
621 case SUB:
622 result=left-right;
623 break;
624 case MUL:
625 result=left*right;
626 break;
627 case DIV:
628 result=left/right;
629 break;
630 case SIN:
631 result=left.sin();
632 break;
633 case COS:
634 result=left.cos();
635 break;
636 case TAN:
637 result=left.tan();
638 break;
639 case ASIN:
640 result=left.asin();
641 break;
642 case ACOS:
643 result=left.acos();
644 break;
645 case ATAN:
646 result=left.atan();
647 break;
648 case SINH:
649 result=left.sinh();
650 break;
651 case COSH:
652 result=left.cosh();
653 break;
654 case TANH:
655 result=left.tanh();
656 break;
657 case ERF:
658 result=left.erf();
659 break;
660 case ASINH:
661 result=left.asinh();
662 break;
663 case ACOSH:
664 result=left.acosh();
665 break;
666 case ATANH:
667 result=left.atanh();
668 break;
669 case LOG10:
670 result=left.log10();
671 break;
672 case LOG:
673 result=left.log();
674 break;
675 case SIGN:
676 result=left.sign();
677 break;
678 case ABS:
679 result=left.abs();
680 break;
681 case NEG:
682 result=left.neg();
683 break;
684 case POS:
685 // it doesn't mean anything for delayed.
686 // it will just trigger a deep copy of the lazy object
687 throw DataException("Programmer error - POS not supported for lazy data.");
688 break;
689 case EXP:
690 result=left.exp();
691 break;
692 case SQRT:
693 result=left.sqrt();
694 break;
695 case RECIP:
696 result=left.oneOver();
697 break;
698 case GZ:
699 result=left.wherePositive();
700 break;
701 case LZ:
702 result=left.whereNegative();
703 break;
704 case GEZ:
705 result=left.whereNonNegative();
706 break;
707 case LEZ:
708 result=left.whereNonPositive();
709 break;
710 case NEZ:
711 result=left.whereNonZero(m_tol);
712 break;
713 case EZ:
714 result=left.whereZero(m_tol);
715 break;
716 case SYM:
717 result=left.symmetric();
718 break;
719 case NSYM:
720 result=left.nonsymmetric();
721 break;
722 case PROD:
723 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724 break;
725 case TRANS:
726 result=left.transpose(m_axis_offset);
727 break;
728 case TRACE:
729 result=left.trace(m_axis_offset);
730 break;
731 default:
732 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733 }
734 return result.borrowReadyPtr();
735 }
736
737 /*
738 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739 This method uses the original methods on the Data class to evaluate the expressions.
740 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741 the purpose of using DataLazy in the first place).
742 */
743 void
744 DataLazy::collapse()
745 {
746 if (m_op==IDENTITY)
747 {
748 return;
749 }
750 if (m_readytype=='E')
751 { // this is more an efficiency concern than anything else
752 throw DataException("Programmer Error - do not use collapse on Expanded data.");
753 }
754 m_id=collapseToReady();
755 m_op=IDENTITY;
756 }
757
758 /*
759 \brief Compute the value of the expression (unary operation) for the given sample.
760 \return Vector which stores the value of the subexpression for the given sample.
761 \param v A vector to store intermediate results.
762 \param offset Index in v to begin storing results.
763 \param sampleNo Sample number to evaluate.
764 \param roffset (output parameter) the offset in the return vector where the result begins.
765
766 The return value will be an existing vector so do not deallocate it.
767 If the result is stored in v it should be stored at the offset given.
768 Everything from offset to the end of v should be considered available for this method to use.
769 */
770 DataTypes::ValueType*
771 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772 {
773 // we assume that any collapsing has been done before we get here
774 // since we only have one argument we don't need to think about only
775 // processing single points.
776 if (m_readytype!='E')
777 {
778 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779 }
780 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781 const double* left=&((*vleft)[roffset]);
782 double* result=&(v[offset]);
783 roffset=offset;
784 switch (m_op)
785 {
786 case SIN:
787 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788 break;
789 case COS:
790 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791 break;
792 case TAN:
793 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794 break;
795 case ASIN:
796 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797 break;
798 case ACOS:
799 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800 break;
801 case ATAN:
802 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803 break;
804 case SINH:
805 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806 break;
807 case COSH:
808 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809 break;
810 case TANH:
811 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812 break;
813 case ERF:
814 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816 #else
817 tensor_unary_operation(m_samplesize, left, result, ::erf);
818 break;
819 #endif
820 case ASINH:
821 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823 #else
824 tensor_unary_operation(m_samplesize, left, result, ::asinh);
825 #endif
826 break;
827 case ACOSH:
828 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830 #else
831 tensor_unary_operation(m_samplesize, left, result, ::acosh);
832 #endif
833 break;
834 case ATANH:
835 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837 #else
838 tensor_unary_operation(m_samplesize, left, result, ::atanh);
839 #endif
840 break;
841 case LOG10:
842 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843 break;
844 case LOG:
845 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846 break;
847 case SIGN:
848 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849 break;
850 case ABS:
851 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852 break;
853 case NEG:
854 tensor_unary_operation(m_samplesize, left, result, negate<double>());
855 break;
856 case POS:
857 // it doesn't mean anything for delayed.
858 // it will just trigger a deep copy of the lazy object
859 throw DataException("Programmer error - POS not supported for lazy data.");
860 break;
861 case EXP:
862 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863 break;
864 case SQRT:
865 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866 break;
867 case RECIP:
868 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
869 break;
870 case GZ:
871 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
872 break;
873 case LZ:
874 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
875 break;
876 case GEZ:
877 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
878 break;
879 case LEZ:
880 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881 break;
882 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883 case NEZ:
884 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885 break;
886 case EZ:
887 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888 break;
889
890 default:
891 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892 }
893 return &v;
894 }
895
896
897
898
899
900
901 /*
902 \brief Compute the value of the expression (unary operation) for the given sample.
903 \return Vector which stores the value of the subexpression for the given sample.
904 \param v A vector to store intermediate results.
905 \param offset Index in v to begin storing results.
906 \param sampleNo Sample number to evaluate.
907 \param roffset (output parameter) the offset in the return vector where the result begins.
908
909 The return value will be an existing vector so do not deallocate it.
910 If the result is stored in v it should be stored at the offset given.
911 Everything from offset to the end of v should be considered available for this method to use.
912 */
913 DataTypes::ValueType*
914 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915 {
916 // we assume that any collapsing has been done before we get here
917 // since we only have one argument we don't need to think about only
918 // processing single points.
919 if (m_readytype!='E')
920 {
921 throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922 }
923 // since we can't write the result over the input, we need a result offset further along
924 size_t subroffset=roffset+m_samplesize;
925 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927 roffset=offset;
928 size_t loop=0;
929 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930 size_t step=getNoValues();
931 switch (m_op)
932 {
933 case SYM:
934 for (loop=0;loop<numsteps;++loop)
935 {
936 DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937 subroffset+=step;
938 offset+=step;
939 }
940 break;
941 case NSYM:
942 for (loop=0;loop<numsteps;++loop)
943 {
944 DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945 subroffset+=step;
946 offset+=step;
947 }
948 break;
949 default:
950 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951 }
952 return &v;
953 }
954
955 /*
956 \brief Compute the value of the expression (unary operation) for the given sample.
957 \return Vector which stores the value of the subexpression for the given sample.
958 \param v A vector to store intermediate results.
959 \param offset Index in v to begin storing results.
960 \param sampleNo Sample number to evaluate.
961 \param roffset (output parameter) the offset in the return vector where the result begins.
962
963 The return value will be an existing vector so do not deallocate it.
964 If the result is stored in v it should be stored at the offset given.
965 Everything from offset to the end of v should be considered available for this method to use.
966 */
967 DataTypes::ValueType*
968 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969 {
970 // we assume that any collapsing has been done before we get here
971 // since we only have one argument we don't need to think about only
972 // processing single points.
973 if (m_readytype!='E')
974 {
975 throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976 }
977 // since we can't write the result over the input, we need a result offset further along
978 size_t subroffset;
979 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
981 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
982 roffset=offset;
983 size_t loop=0;
984 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
985 size_t outstep=getNoValues();
986 size_t instep=m_left->getNoValues();
987 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
988 switch (m_op)
989 {
990 case TRACE:
991 for (loop=0;loop<numsteps;++loop)
992 {
993 size_t zz=sampleNo*getNumDPPSample()+loop;
994 if (zz==5800)
995 {
996 LAZYDEBUG(cerr << "point=" << zz<< endl;)
997 LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
998 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
999 LAZYDEBUG(cerr << subroffset << endl;)
1000 LAZYDEBUG(cerr << "output=" << offset << endl;)
1001 }
1002 DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1003 if (zz==5800)
1004 {
1005 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1006 }
1007 subroffset+=instep;
1008 offset+=outstep;
1009 }
1010 break;
1011 case TRANS:
1012 for (loop=0;loop<numsteps;++loop)
1013 {
1014 DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1015 subroffset+=instep;
1016 offset+=outstep;
1017 }
1018 break;
1019 default:
1020 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1021 }
1022 return &v;
1023 }
1024
1025
1026 #define PROC_OP(TYPE,X) \
1027 for (int j=0;j<onumsteps;++j)\
1028 {\
1029 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1030 { \
1031 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1032 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1033 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1034 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1035 lroffset+=leftstep; \
1036 rroffset+=rightstep; \
1037 }\
1038 lroffset+=oleftstep;\
1039 rroffset+=orightstep;\
1040 }
1041
1042 /*
1043 \brief Compute the value of the expression (binary operation) for the given sample.
1044 \return Vector which stores the value of the subexpression for the given sample.
1045 \param v A vector to store intermediate results.
1046 \param offset Index in v to begin storing results.
1047 \param sampleNo Sample number to evaluate.
1048 \param roffset (output parameter) the offset in the return vector where the result begins.
1049
1050 The return value will be an existing vector so do not deallocate it.
1051 If the result is stored in v it should be stored at the offset given.
1052 Everything from offset to the end of v should be considered available for this method to use.
1053 */
1054 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1055 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1056 // If both children are expanded, then we can process them in a single operation (we treat
1057 // the whole sample as one big datapoint.
1058 // If one of the children is not expanded, then we need to treat each point in the sample
1059 // individually.
1060 // There is an additional complication when scalar operations are considered.
1061 // For example, 2+Vector.
1062 // In this case each double within the point is treated individually
1063 DataTypes::ValueType*
1064 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1065 {
1066 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1067
1068 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1069 // first work out which of the children are expanded
1070 bool leftExp=(m_left->m_readytype=='E');
1071 bool rightExp=(m_right->m_readytype=='E');
1072 if (!leftExp && !rightExp)
1073 {
1074 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1075 }
1076 bool leftScalar=(m_left->getRank()==0);
1077 bool rightScalar=(m_right->getRank()==0);
1078 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1079 {
1080 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1081 }
1082 size_t leftsize=m_left->getNoValues();
1083 size_t rightsize=m_right->getNoValues();
1084 size_t chunksize=1; // how many doubles will be processed in one go
1085 int leftstep=0; // how far should the left offset advance after each step
1086 int rightstep=0;
1087 int numsteps=0; // total number of steps for the inner loop
1088 int oleftstep=0; // the o variables refer to the outer loop
1089 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1090 int onumsteps=1;
1091
1092 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1093 bool RES=(rightExp && rightScalar);
1094 bool LS=(!leftExp && leftScalar); // left is a single scalar
1095 bool RS=(!rightExp && rightScalar);
1096 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1097 bool RN=(!rightExp && !rightScalar);
1098 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1099 bool REN=(rightExp && !rightScalar);
1100
1101 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1102 {
1103 chunksize=m_left->getNumDPPSample()*leftsize;
1104 leftstep=0;
1105 rightstep=0;
1106 numsteps=1;
1107 }
1108 else if (LES || RES)
1109 {
1110 chunksize=1;
1111 if (LES) // left is an expanded scalar
1112 {
1113 if (RS)
1114 {
1115 leftstep=1;
1116 rightstep=0;
1117 numsteps=m_left->getNumDPPSample();
1118 }
1119 else // RN or REN
1120 {
1121 leftstep=0;
1122 oleftstep=1;
1123 rightstep=1;
1124 orightstep=(RN ? -(int)rightsize : 0);
1125 numsteps=rightsize;
1126 onumsteps=m_left->getNumDPPSample();
1127 }
1128 }
1129 else // right is an expanded scalar
1130 {
1131 if (LS)
1132 {
1133 rightstep=1;
1134 leftstep=0;
1135 numsteps=m_right->getNumDPPSample();
1136 }
1137 else
1138 {
1139 rightstep=0;
1140 orightstep=1;
1141 leftstep=1;
1142 oleftstep=(RN ? -(int)leftsize : 0);
1143 numsteps=leftsize;
1144 onumsteps=m_right->getNumDPPSample();
1145 }
1146 }
1147 }
1148 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1149 {
1150 if (LEN) // and Right will be a single value
1151 {
1152 chunksize=rightsize;
1153 leftstep=rightsize;
1154 rightstep=0;
1155 numsteps=m_left->getNumDPPSample();
1156 if (RS)
1157 {
1158 numsteps*=leftsize;
1159 }
1160 }
1161 else // REN
1162 {
1163 chunksize=leftsize;
1164 rightstep=leftsize;
1165 leftstep=0;
1166 numsteps=m_right->getNumDPPSample();
1167 if (LS)
1168 {
1169 numsteps*=rightsize;
1170 }
1171 }
1172 }
1173
1174 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1175 // Get the values of sub-expressions
1176 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1177 // calcBufss for why we can't put offset as the 2nd param above
1178 const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1179 // the right child starts further along.
1180 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1181 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1182 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1183 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1184 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1185 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1186 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1187 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1188 switch(m_op)
1189 {
1190 case ADD:
1191 PROC_OP(NO_ARG,plus<double>());
1192 break;
1193 case SUB:
1194 PROC_OP(NO_ARG,minus<double>());
1195 break;
1196 case MUL:
1197 PROC_OP(NO_ARG,multiplies<double>());
1198 break;
1199 case DIV:
1200 PROC_OP(NO_ARG,divides<double>());
1201 break;
1202 case POW:
1203 PROC_OP(double (double,double),::pow);
1204 break;
1205 default:
1206 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1207 }
1208 roffset=offset;
1209 return &v;
1210 }
1211
1212
1213
1214 /*
1215 \brief Compute the value of the expression (tensor product) for the given sample.
1216 \return Vector which stores the value of the subexpression for the given sample.
1217 \param v A vector to store intermediate results.
1218 \param offset Index in v to begin storing results.
1219 \param sampleNo Sample number to evaluate.
1220 \param roffset (output parameter) the offset in the return vector where the result begins.
1221
1222 The return value will be an existing vector so do not deallocate it.
1223 If the result is stored in v it should be stored at the offset given.
1224 Everything from offset to the end of v should be considered available for this method to use.
1225 */
1226 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1227 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1228 // unlike the other resolve helpers, we must treat these datapoints separately.
1229 DataTypes::ValueType*
1230 DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1231 {
1232 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1233
1234 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1235 // first work out which of the children are expanded
1236 bool leftExp=(m_left->m_readytype=='E');
1237 bool rightExp=(m_right->m_readytype=='E');
1238 int steps=getNumDPPSample();
1239 int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1240 int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1241 int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1242 // Get the values of sub-expressions (leave a gap of one sample for the result).
1243 int gap=offset+m_left->getMaxSampleSize(); // actually only needs to be m_left->m_samplesize
1244 const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1245 gap+=m_right->getMaxSampleSize();
1246 const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1247 LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1248 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1249 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1250 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1251 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1252 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1253 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1254 switch(m_op)
1255 {
1256 case PROD:
1257 for (int i=0;i<steps;++i,resultp+=resultStep)
1258 {
1259 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1260 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1261 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1262 const double *ptr_0 = &((*left)[lroffset]);
1263 const double *ptr_1 = &((*right)[rroffset]);
1264 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1265 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1266 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1267 lroffset+=leftStep;
1268 rroffset+=rightStep;
1269 }
1270 break;
1271 default:
1272 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1273 }
1274 roffset=offset;
1275 return &v;
1276 }
1277
1278
1279
1280 /*
1281 \brief Compute the value of the expression for the given sample.
1282 \return Vector which stores the value of the subexpression for the given sample.
1283 \param v A vector to store intermediate results.
1284 \param offset Index in v to begin storing results.
1285 \param sampleNo Sample number to evaluate.
1286 \param roffset (output parameter) the offset in the return vector where the result begins.
1287
1288 The return value will be an existing vector so do not deallocate it.
1289 */
1290 // the vector and the offset are a place where the method could write its data if it wishes
1291 // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1292 // Hence the return value to indicate where the data is actually stored.
1293 // Regardless, the storage should be assumed to be used, even if it isn't.
1294
1295 // the roffset is the offset within the returned vector where the data begins
1296 const DataTypes::ValueType*
1297 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1298 {
1299 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1300 // collapse so we have a 'E' node or an IDENTITY for some other type
1301 if (m_readytype!='E' && m_op!=IDENTITY)
1302 {
1303 collapse();
1304 }
1305 if (m_op==IDENTITY)
1306 {
1307 const ValueType& vec=m_id->getVector();
1308 if (m_readytype=='C')
1309 {
1310 roffset=0;
1311 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1312 return &(vec);
1313 }
1314 roffset=m_id->getPointOffset(sampleNo, 0);
1315 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1316 return &(vec);
1317 }
1318 if (m_readytype!='E')
1319 {
1320 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1321 }
1322 switch (getOpgroup(m_op))
1323 {
1324 case G_UNARY:
1325 case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1326 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1327 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1328 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1329 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1330 default:
1331 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1332 }
1333
1334 }
1335
1336
1337 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1338 // Each sample is evaluated independently and copied into the result DataExpanded.
1339 DataReady_ptr
1340 DataLazy::resolve()
1341 {
1342
1343 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1344 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1345
1346 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1347 {
1348 collapse();
1349 }
1350 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1351 {
1352 return m_id;
1353 }
1354 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1355 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1356 // storage to evaluate its expression
1357 int numthreads=1;
1358 #ifdef _OPENMP
1359 numthreads=getNumberOfThreads();
1360 #endif
1361 ValueType v(numthreads*threadbuffersize);
1362 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1363 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1364 ValueType& resvec=result->getVector();
1365 DataReady_ptr resptr=DataReady_ptr(result);
1366 int sample;
1367 size_t outoffset; // offset in the output data
1368 int totalsamples=getNumSamples();
1369 const ValueType* res=0; // Vector storing the answer
1370 size_t resoffset=0; // where in the vector to find the answer
1371 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1372 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1373 for (sample=0;sample<totalsamples;++sample)
1374 {
1375 if (sample==0) {ENABLEDEBUG}
1376
1377 // if (sample==5800/getNumDPPSample()) {ENABLEDEBUG}
1378 LAZYDEBUG(cout << "################################# " << sample << endl;)
1379 #ifdef _OPENMP
1380 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1381 #else
1382 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1383 #endif
1384 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1385 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1386 outoffset=result->getPointOffset(sample,0);
1387 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1388 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1389 {
1390 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1391 resvec[outoffset]=(*res)[resoffset];
1392 }
1393 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1394 LAZYDEBUG(cerr << "*********************************" << endl;)
1395 DISABLEDEBUG
1396 }
1397 return resptr;
1398 }
1399
1400 std::string
1401 DataLazy::toString() const
1402 {
1403 ostringstream oss;
1404 oss << "Lazy Data:";
1405 intoString(oss);
1406 return oss.str();
1407 }
1408
1409
1410 void
1411 DataLazy::intoString(ostringstream& oss) const
1412 {
1413 switch (getOpgroup(m_op))
1414 {
1415 case G_IDENTITY:
1416 if (m_id->isExpanded())
1417 {
1418 oss << "E";
1419 }
1420 else if (m_id->isTagged())
1421 {
1422 oss << "T";
1423 }
1424 else if (m_id->isConstant())
1425 {
1426 oss << "C";
1427 }
1428 else
1429 {
1430 oss << "?";
1431 }
1432 oss << '@' << m_id.get();
1433 break;
1434 case G_BINARY:
1435 oss << '(';
1436 m_left->intoString(oss);
1437 oss << ' ' << opToString(m_op) << ' ';
1438 m_right->intoString(oss);
1439 oss << ')';
1440 break;
1441 case G_UNARY:
1442 case G_UNARY_P:
1443 case G_NP1OUT:
1444 case G_NP1OUT_P:
1445 oss << opToString(m_op) << '(';
1446 m_left->intoString(oss);
1447 oss << ')';
1448 break;
1449 case G_TENSORPROD:
1450 oss << opToString(m_op) << '(';
1451 m_left->intoString(oss);
1452 oss << ", ";
1453 m_right->intoString(oss);
1454 oss << ')';
1455 break;
1456 default:
1457 oss << "UNKNOWN";
1458 }
1459 }
1460
1461 DataAbstract*
1462 DataLazy::deepCopy()
1463 {
1464 switch (getOpgroup(m_op))
1465 {
1466 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1467 case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1468 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1469 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1470 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1471 default:
1472 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1473 }
1474 }
1475
1476
1477 // There is no single, natural interpretation of getLength on DataLazy.
1478 // Instances of DataReady can look at the size of their vectors.
1479 // For lazy though, it could be the size the data would be if it were resolved;
1480 // or it could be some function of the lengths of the DataReady instances which
1481 // form part of the expression.
1482 // Rather than have people making assumptions, I have disabled the method.
1483 DataTypes::ValueType::size_type
1484 DataLazy::getLength() const
1485 {
1486 throw DataException("getLength() does not make sense for lazy data.");
1487 }
1488
1489
1490 DataAbstract*
1491 DataLazy::getSlice(const DataTypes::RegionType& region) const
1492 {
1493 throw DataException("getSlice - not implemented for Lazy objects.");
1494 }
1495
1496
1497 // To do this we need to rely on our child nodes
1498 DataTypes::ValueType::size_type
1499 DataLazy::getPointOffset(int sampleNo,
1500 int dataPointNo)
1501 {
1502 if (m_op==IDENTITY)
1503 {
1504 return m_id->getPointOffset(sampleNo,dataPointNo);
1505 }
1506 if (m_readytype!='E')
1507 {
1508 collapse();
1509 return m_id->getPointOffset(sampleNo,dataPointNo);
1510 }
1511 // at this point we do not have an identity node and the expression will be Expanded
1512 // so we only need to know which child to ask
1513 if (m_left->m_readytype=='E')
1514 {
1515 return m_left->getPointOffset(sampleNo,dataPointNo);
1516 }
1517 else
1518 {
1519 return m_right->getPointOffset(sampleNo,dataPointNo);
1520 }
1521 }
1522
1523 // To do this we need to rely on our child nodes
1524 DataTypes::ValueType::size_type
1525 DataLazy::getPointOffset(int sampleNo,
1526 int dataPointNo) const
1527 {
1528 if (m_op==IDENTITY)
1529 {
1530 return m_id->getPointOffset(sampleNo,dataPointNo);
1531 }
1532 if (m_readytype=='E')
1533 {
1534 // at this point we do not have an identity node and the expression will be Expanded
1535 // so we only need to know which child to ask
1536 if (m_left->m_readytype=='E')
1537 {
1538 return m_left->getPointOffset(sampleNo,dataPointNo);
1539 }
1540 else
1541 {
1542 return m_right->getPointOffset(sampleNo,dataPointNo);
1543 }
1544 }
1545 if (m_readytype=='C')
1546 {
1547 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1548 }
1549 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1550 }
1551
1552 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1553 // to zero, all the tags from all the DataTags would be in the result.
1554 // However since they all have the same value (0) whether they are there or not should not matter.
1555 // So I have decided that for all types this method will create a constant 0.
1556 // It can be promoted up as required.
1557 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1558 // but we can deal with that if it arrises.
1559 void
1560 DataLazy::setToZero()
1561 {
1562 DataTypes::ValueType v(getNoValues(),0);
1563 m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1564 m_op=IDENTITY;
1565 m_right.reset();
1566 m_left.reset();
1567 m_readytype='C';
1568 m_buffsRequired=1;
1569 }
1570
1571 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26