/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Contents of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2195 - (show annotations)
Wed Jan 7 04:13:52 2009 UTC (10 years, 2 months ago) by jfenwick
Original Path: trunk/escript/src/DataLazy.cpp
File size: 53873 byte(s)
Fixed a bug in lazy evaluation of Tensor Products.
Added / to end of boost path

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "DataLazy.h"
16 #ifdef USE_NETCDF
17 #include <netcdfcpp.h>
18 #endif
19 #ifdef PASO_MPI
20 #include <mpi.h>
21 #endif
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25 #include "FunctionSpace.h"
26 #include "DataTypes.h"
27 #include "Data.h"
28 #include "UnaryFuncs.h" // for escript::fsign
29 #include "Utils.h"
30
31 // #define LAZYDEBUG(X) if (privdebug){X;}
32 #define LAZYDEBUG(X)
33 namespace
34 {
35 bool privdebug=false;
36
37 #define ENABLEDEBUG privdebug=true;
38 #define DISABLEDEBUG privdebug=false;
39 }
40
41 #define SIZELIMIT
42 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
43
44
45 /*
46 How does DataLazy work?
47 ~~~~~~~~~~~~~~~~~~~~~~~
48
49 Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
50 denoted left and right.
51
52 A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
53 This means that all "internal" nodes in the structure are instances of DataLazy.
54
55 Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
56 Note that IDENITY is not considered a unary operation.
57
58 I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
59 It must however form a DAG (directed acyclic graph).
60 I will refer to individual DataLazy objects with the structure as nodes.
61
62 Each node also stores:
63 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
64 evaluated.
65 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
66 evaluate the expression.
67 - m_samplesize ~ the number of doubles stored in a sample.
68
69 When a new node is created, the above values are computed based on the values in the child nodes.
70 Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
71
72 The resolve method, which produces a DataReady from a DataLazy, does the following:
73 1) Create a DataReady to hold the new result.
74 2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
75 3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
76
77 (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
78
79 resolveSample returns a Vector* and an offset within that vector where the result is stored.
80 Normally, this would be v, but for identity nodes their internal vector is returned instead.
81
82 The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
83
84 For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
85 The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
86
87 To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
88 1) Add to the ES_optype.
89 2) determine what opgroup your operation belongs to (X)
90 3) add a string for the op to the end of ES_opstrings
91 4) increase ES_opcount
92 5) add an entry (X) to opgroups
93 6) add an entry to the switch in collapseToReady
94 7) add an entry to resolveX
95 */
96
97
98 using namespace std;
99 using namespace boost;
100
101 namespace escript
102 {
103
104 namespace
105 {
106
107 enum ES_opgroup
108 {
109 G_UNKNOWN,
110 G_IDENTITY,
111 G_BINARY, // pointwise operations with two arguments
112 G_UNARY, // pointwise operations with one argument
113 G_UNARY_P, // pointwise operations with one argument, requiring a parameter
114 G_NP1OUT, // non-pointwise op with one output
115 G_NP1OUT_P, // non-pointwise op with one output requiring a parameter
116 G_TENSORPROD // general tensor product
117 };
118
119
120
121
122 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
123 "sin","cos","tan",
124 "asin","acos","atan","sinh","cosh","tanh","erf",
125 "asinh","acosh","atanh",
126 "log10","log","sign","abs","neg","pos","exp","sqrt",
127 "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
128 "symmetric","nonsymmetric",
129 "prod",
130 "transpose", "trace"};
131 int ES_opcount=40;
132 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
133 G_UNARY,G_UNARY,G_UNARY, //10
134 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 17
135 G_UNARY,G_UNARY,G_UNARY, // 20
136 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, // 28
137 G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P, // 35
138 G_NP1OUT,G_NP1OUT,
139 G_TENSORPROD,
140 G_NP1OUT_P, G_NP1OUT_P};
141 inline
142 ES_opgroup
143 getOpgroup(ES_optype op)
144 {
145 return opgroups[op];
146 }
147
148 // return the FunctionSpace of the result of "left op right"
149 FunctionSpace
150 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
151 {
152 // perhaps this should call interpolate and throw or something?
153 // maybe we need an interpolate node -
154 // that way, if interpolate is required in any other op we can just throw a
155 // programming error exception.
156
157 FunctionSpace l=left->getFunctionSpace();
158 FunctionSpace r=right->getFunctionSpace();
159 if (l!=r)
160 {
161 if (r.probeInterpolation(l))
162 {
163 return l;
164 }
165 if (l.probeInterpolation(r))
166 {
167 return r;
168 }
169 throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
170 }
171 return l;
172 }
173
174 // return the shape of the result of "left op right"
175 // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
176 DataTypes::ShapeType
177 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
178 {
179 if (left->getShape()!=right->getShape())
180 {
181 if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
182 {
183 throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
184 }
185 if (left->getRank()==0) // we need to allow scalar * anything
186 {
187 return right->getShape();
188 }
189 if (right->getRank()==0)
190 {
191 return left->getShape();
192 }
193 throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
194 }
195 return left->getShape();
196 }
197
198 // return the shape for "op left"
199
200 DataTypes::ShapeType
201 resultShape(DataAbstract_ptr left, ES_optype op)
202 {
203 switch(op)
204 {
205 case TRANS:
206 return left->getShape();
207 break;
208 case TRACE:
209 return DataTypes::scalarShape;
210 break;
211 default:
212 throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
213 }
214 }
215
216 // determine the output shape for the general tensor product operation
217 // the additional parameters return information required later for the product
218 // the majority of this code is copy pasted from C_General_Tensor_Product
219 DataTypes::ShapeType
220 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
221 {
222
223 // Get rank and shape of inputs
224 int rank0 = left->getRank();
225 int rank1 = right->getRank();
226 const DataTypes::ShapeType& shape0 = left->getShape();
227 const DataTypes::ShapeType& shape1 = right->getShape();
228
229 // Prepare for the loops of the product and verify compatibility of shapes
230 int start0=0, start1=0;
231 if (transpose == 0) {}
232 else if (transpose == 1) { start0 = axis_offset; }
233 else if (transpose == 2) { start1 = rank1-axis_offset; }
234 else { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
235
236 if (rank0<axis_offset)
237 {
238 throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
239 }
240
241 // Adjust the shapes for transpose
242 DataTypes::ShapeType tmpShape0(rank0); // pre-sizing the vectors rather
243 DataTypes::ShapeType tmpShape1(rank1); // than using push_back
244 for (int i=0; i<rank0; i++) { tmpShape0[i]=shape0[(i+start0)%rank0]; }
245 for (int i=0; i<rank1; i++) { tmpShape1[i]=shape1[(i+start1)%rank1]; }
246
247 // Prepare for the loops of the product
248 SL=1, SM=1, SR=1;
249 for (int i=0; i<rank0-axis_offset; i++) {
250 SL *= tmpShape0[i];
251 }
252 for (int i=rank0-axis_offset; i<rank0; i++) {
253 if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
254 throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
255 }
256 SM *= tmpShape0[i];
257 }
258 for (int i=axis_offset; i<rank1; i++) {
259 SR *= tmpShape1[i];
260 }
261
262 // Define the shape of the output (rank of shape is the sum of the loop ranges below)
263 DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);
264 { // block to limit the scope of out_index
265 int out_index=0;
266 for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
267 for (int i=axis_offset; i<rank1; i++, ++out_index) { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
268 }
269
270 if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
271 {
272 ostringstream os;
273 os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
274 throw DataException(os.str());
275 }
276
277 return shape2;
278 }
279
280 // determine the number of samples requires to evaluate an expression combining left and right
281 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
282 // The same goes for G_TENSORPROD
283 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
284 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
285 // multiple times
286 int
287 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
288 {
289 switch(getOpgroup(op))
290 {
291 case G_IDENTITY: return 1;
292 case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
293 case G_UNARY:
294 case G_UNARY_P: return max(left->getBuffsRequired(),1);
295 case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
296 case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
297 case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
298 default:
299 throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
300 }
301 }
302
303
304 } // end anonymous namespace
305
306
307
308 // Return a string representing the operation
309 const std::string&
310 opToString(ES_optype op)
311 {
312 if (op<0 || op>=ES_opcount)
313 {
314 op=UNKNOWNOP;
315 }
316 return ES_opstrings[op];
317 }
318
319
320 DataLazy::DataLazy(DataAbstract_ptr p)
321 : parent(p->getFunctionSpace(),p->getShape())
322 {
323 if (p->isLazy())
324 {
325 // I don't want identity of Lazy.
326 // Question: Why would that be so bad?
327 // Answer: We assume that the child of ID is something we can call getVector on
328 throw DataException("Programmer error - attempt to create identity from a DataLazy.");
329 }
330 else
331 {
332 DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
333 makeIdentity(dr);
334 cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;
335 }
336 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
337 }
338
339
340
341
342 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
343 : parent(left->getFunctionSpace(),left->getShape()),
344 m_op(op),
345 m_axis_offset(0),
346 m_transpose(0),
347 m_SL(0), m_SM(0), m_SR(0)
348 {
349 if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
350 {
351 throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
352 }
353
354 DataLazy_ptr lleft;
355 if (!left->isLazy())
356 {
357 lleft=DataLazy_ptr(new DataLazy(left));
358 }
359 else
360 {
361 lleft=dynamic_pointer_cast<DataLazy>(left);
362 }
363 m_readytype=lleft->m_readytype;
364 m_left=lleft;
365 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
366 m_samplesize=getNumDPPSample()*getNoValues();
367 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
368 m_children=m_left->m_children+1;
369 m_height=m_left->m_height+1;
370 SIZELIMIT
371 }
372
373
374 // In this constructor we need to consider interpolation
375 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
376 : parent(resultFS(left,right,op), resultShape(left,right,op)),
377 m_op(op),
378 m_SL(0), m_SM(0), m_SR(0)
379 {
380 cout << "Forming operator with " << left.get() << " " << right.get() << endl;
381 if ((getOpgroup(op)!=G_BINARY))
382 {
383 throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
384 }
385
386 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
387 {
388 FunctionSpace fs=getFunctionSpace();
389 Data ltemp(left);
390 Data tmp(ltemp,fs);
391 left=tmp.borrowDataPtr();
392 }
393 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
394 {
395 Data tmp(Data(right),getFunctionSpace());
396 right=tmp.borrowDataPtr();
397 cout << "Right interpolation required " << right.get() << endl;
398 }
399 left->operandCheck(*right);
400
401 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
402 {
403 m_left=dynamic_pointer_cast<DataLazy>(left);
404 cout << "Left is " << m_left->toString() << endl;
405 }
406 else
407 {
408 m_left=DataLazy_ptr(new DataLazy(left));
409 cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;
410 }
411 if (right->isLazy())
412 {
413 m_right=dynamic_pointer_cast<DataLazy>(right);
414 cout << "Right is " << m_right->toString() << endl;
415 }
416 else
417 {
418 m_right=DataLazy_ptr(new DataLazy(right));
419 cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;
420 }
421 char lt=m_left->m_readytype;
422 char rt=m_right->m_readytype;
423 if (lt=='E' || rt=='E')
424 {
425 m_readytype='E';
426 }
427 else if (lt=='T' || rt=='T')
428 {
429 m_readytype='T';
430 }
431 else
432 {
433 m_readytype='C';
434 }
435 m_samplesize=getNumDPPSample()*getNoValues();
436 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
437 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
438 m_children=m_left->m_children+m_right->m_children+2;
439 m_height=max(m_left->m_height,m_right->m_height)+1;
440 SIZELIMIT
441 LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
442 }
443
444 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
445 : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
446 m_op(op),
447 m_axis_offset(axis_offset),
448 m_transpose(transpose)
449 {
450 if ((getOpgroup(op)!=G_TENSORPROD))
451 {
452 throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
453 }
454 if ((transpose>2) || (transpose<0))
455 {
456 throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
457 }
458 if (getFunctionSpace()!=left->getFunctionSpace()) // left needs to be interpolated
459 {
460 FunctionSpace fs=getFunctionSpace();
461 Data ltemp(left);
462 Data tmp(ltemp,fs);
463 left=tmp.borrowDataPtr();
464 }
465 if (getFunctionSpace()!=right->getFunctionSpace()) // right needs to be interpolated
466 {
467 Data tmp(Data(right),getFunctionSpace());
468 right=tmp.borrowDataPtr();
469 }
470 // left->operandCheck(*right);
471
472 if (left->isLazy()) // the children need to be DataLazy. Wrap them in IDENTITY if required
473 {
474 m_left=dynamic_pointer_cast<DataLazy>(left);
475 }
476 else
477 {
478 m_left=DataLazy_ptr(new DataLazy(left));
479 }
480 if (right->isLazy())
481 {
482 m_right=dynamic_pointer_cast<DataLazy>(right);
483 }
484 else
485 {
486 m_right=DataLazy_ptr(new DataLazy(right));
487 }
488 char lt=m_left->m_readytype;
489 char rt=m_right->m_readytype;
490 if (lt=='E' || rt=='E')
491 {
492 m_readytype='E';
493 }
494 else if (lt=='T' || rt=='T')
495 {
496 m_readytype='T';
497 }
498 else
499 {
500 m_readytype='C';
501 }
502 m_samplesize=getNumDPPSample()*getNoValues();
503 m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());
504 m_buffsRequired=calcBuffs(m_left, m_right,m_op);
505 m_children=m_left->m_children+m_right->m_children+2;
506 m_height=max(m_left->m_height,m_right->m_height)+1;
507 SIZELIMIT
508 LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
509 }
510
511
512 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
513 : parent(left->getFunctionSpace(), resultShape(left,op)),
514 m_op(op),
515 m_axis_offset(axis_offset),
516 m_transpose(0),
517 m_tol(0)
518 {
519 if ((getOpgroup(op)!=G_NP1OUT_P))
520 {
521 throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
522 }
523 DataLazy_ptr lleft;
524 if (!left->isLazy())
525 {
526 lleft=DataLazy_ptr(new DataLazy(left));
527 }
528 else
529 {
530 lleft=dynamic_pointer_cast<DataLazy>(left);
531 }
532 m_readytype=lleft->m_readytype;
533 m_left=lleft;
534 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
535 m_samplesize=getNumDPPSample()*getNoValues();
536 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
537 m_children=m_left->m_children+1;
538 m_height=m_left->m_height+1;
539 SIZELIMIT
540 LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
541 }
542
543 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
544 : parent(left->getFunctionSpace(), left->getShape()),
545 m_op(op),
546 m_axis_offset(0),
547 m_transpose(0),
548 m_tol(tol)
549 {
550 if ((getOpgroup(op)!=G_UNARY_P))
551 {
552 throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
553 }
554 DataLazy_ptr lleft;
555 if (!left->isLazy())
556 {
557 lleft=DataLazy_ptr(new DataLazy(left));
558 }
559 else
560 {
561 lleft=dynamic_pointer_cast<DataLazy>(left);
562 }
563 m_readytype=lleft->m_readytype;
564 m_left=lleft;
565 m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
566 m_samplesize=getNumDPPSample()*getNoValues();
567 m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
568 m_children=m_left->m_children+1;
569 m_height=m_left->m_height+1;
570 SIZELIMIT
571 LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572 }
573
574 DataLazy::~DataLazy()
575 {
576 }
577
578
579 int
580 DataLazy::getBuffsRequired() const
581 {
582 return m_buffsRequired;
583 }
584
585
586 size_t
587 DataLazy::getMaxSampleSize() const
588 {
589 return m_maxsamplesize;
590 }
591
592 /*
593 \brief Evaluates the expression using methods on Data.
594 This does the work for the collapse method.
595 For reasons of efficiency do not call this method on DataExpanded nodes.
596 */
597 DataReady_ptr
598 DataLazy::collapseToReady()
599 {
600 if (m_readytype=='E')
601 { // this is more an efficiency concern than anything else
602 throw DataException("Programmer Error - do not use collapse on Expanded data.");
603 }
604 if (m_op==IDENTITY)
605 {
606 return m_id;
607 }
608 DataReady_ptr pleft=m_left->collapseToReady();
609 Data left(pleft);
610 Data right;
611 if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612 {
613 right=Data(m_right->collapseToReady());
614 }
615 Data result;
616 switch(m_op)
617 {
618 case ADD:
619 result=left+right;
620 break;
621 case SUB:
622 result=left-right;
623 break;
624 case MUL:
625 result=left*right;
626 break;
627 case DIV:
628 result=left/right;
629 break;
630 case SIN:
631 result=left.sin();
632 break;
633 case COS:
634 result=left.cos();
635 break;
636 case TAN:
637 result=left.tan();
638 break;
639 case ASIN:
640 result=left.asin();
641 break;
642 case ACOS:
643 result=left.acos();
644 break;
645 case ATAN:
646 result=left.atan();
647 break;
648 case SINH:
649 result=left.sinh();
650 break;
651 case COSH:
652 result=left.cosh();
653 break;
654 case TANH:
655 result=left.tanh();
656 break;
657 case ERF:
658 result=left.erf();
659 break;
660 case ASINH:
661 result=left.asinh();
662 break;
663 case ACOSH:
664 result=left.acosh();
665 break;
666 case ATANH:
667 result=left.atanh();
668 break;
669 case LOG10:
670 result=left.log10();
671 break;
672 case LOG:
673 result=left.log();
674 break;
675 case SIGN:
676 result=left.sign();
677 break;
678 case ABS:
679 result=left.abs();
680 break;
681 case NEG:
682 result=left.neg();
683 break;
684 case POS:
685 // it doesn't mean anything for delayed.
686 // it will just trigger a deep copy of the lazy object
687 throw DataException("Programmer error - POS not supported for lazy data.");
688 break;
689 case EXP:
690 result=left.exp();
691 break;
692 case SQRT:
693 result=left.sqrt();
694 break;
695 case RECIP:
696 result=left.oneOver();
697 break;
698 case GZ:
699 result=left.wherePositive();
700 break;
701 case LZ:
702 result=left.whereNegative();
703 break;
704 case GEZ:
705 result=left.whereNonNegative();
706 break;
707 case LEZ:
708 result=left.whereNonPositive();
709 break;
710 case NEZ:
711 result=left.whereNonZero(m_tol);
712 break;
713 case EZ:
714 result=left.whereZero(m_tol);
715 break;
716 case SYM:
717 result=left.symmetric();
718 break;
719 case NSYM:
720 result=left.nonsymmetric();
721 break;
722 case PROD:
723 result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724 break;
725 case TRANS:
726 result=left.transpose(m_axis_offset);
727 break;
728 case TRACE:
729 result=left.trace(m_axis_offset);
730 break;
731 default:
732 throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733 }
734 return result.borrowReadyPtr();
735 }
736
737 /*
738 \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739 This method uses the original methods on the Data class to evaluate the expressions.
740 For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741 the purpose of using DataLazy in the first place).
742 */
743 void
744 DataLazy::collapse()
745 {
746 if (m_op==IDENTITY)
747 {
748 return;
749 }
750 if (m_readytype=='E')
751 { // this is more an efficiency concern than anything else
752 throw DataException("Programmer Error - do not use collapse on Expanded data.");
753 }
754 m_id=collapseToReady();
755 m_op=IDENTITY;
756 }
757
758 /*
759 \brief Compute the value of the expression (unary operation) for the given sample.
760 \return Vector which stores the value of the subexpression for the given sample.
761 \param v A vector to store intermediate results.
762 \param offset Index in v to begin storing results.
763 \param sampleNo Sample number to evaluate.
764 \param roffset (output parameter) the offset in the return vector where the result begins.
765
766 The return value will be an existing vector so do not deallocate it.
767 If the result is stored in v it should be stored at the offset given.
768 Everything from offset to the end of v should be considered available for this method to use.
769 */
770 DataTypes::ValueType*
771 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772 {
773 // we assume that any collapsing has been done before we get here
774 // since we only have one argument we don't need to think about only
775 // processing single points.
776 if (m_readytype!='E')
777 {
778 throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779 }
780 const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781 const double* left=&((*vleft)[roffset]);
782 double* result=&(v[offset]);
783 roffset=offset;
784 switch (m_op)
785 {
786 case SIN:
787 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788 break;
789 case COS:
790 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791 break;
792 case TAN:
793 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794 break;
795 case ASIN:
796 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797 break;
798 case ACOS:
799 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800 break;
801 case ATAN:
802 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803 break;
804 case SINH:
805 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806 break;
807 case COSH:
808 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809 break;
810 case TANH:
811 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812 break;
813 case ERF:
814 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815 throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816 #else
817 tensor_unary_operation(m_samplesize, left, result, ::erf);
818 break;
819 #endif
820 case ASINH:
821 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822 tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823 #else
824 tensor_unary_operation(m_samplesize, left, result, ::asinh);
825 #endif
826 break;
827 case ACOSH:
828 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829 tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830 #else
831 tensor_unary_operation(m_samplesize, left, result, ::acosh);
832 #endif
833 break;
834 case ATANH:
835 #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836 tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837 #else
838 tensor_unary_operation(m_samplesize, left, result, ::atanh);
839 #endif
840 break;
841 case LOG10:
842 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843 break;
844 case LOG:
845 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846 break;
847 case SIGN:
848 tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849 break;
850 case ABS:
851 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852 break;
853 case NEG:
854 tensor_unary_operation(m_samplesize, left, result, negate<double>());
855 break;
856 case POS:
857 // it doesn't mean anything for delayed.
858 // it will just trigger a deep copy of the lazy object
859 throw DataException("Programmer error - POS not supported for lazy data.");
860 break;
861 case EXP:
862 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863 break;
864 case SQRT:
865 tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866 break;
867 case RECIP:
868 tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
869 break;
870 case GZ:
871 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
872 break;
873 case LZ:
874 tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
875 break;
876 case GEZ:
877 tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
878 break;
879 case LEZ:
880 tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881 break;
882 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883 case NEZ:
884 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885 break;
886 case EZ:
887 tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888 break;
889
890 default:
891 throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892 }
893 return &v;
894 }
895
896
897
898
899
900
901 /*
902 \brief Compute the value of the expression (unary operation) for the given sample.
903 \return Vector which stores the value of the subexpression for the given sample.
904 \param v A vector to store intermediate results.
905 \param offset Index in v to begin storing results.
906 \param sampleNo Sample number to evaluate.
907 \param roffset (output parameter) the offset in the return vector where the result begins.
908
909 The return value will be an existing vector so do not deallocate it.
910 If the result is stored in v it should be stored at the offset given.
911 Everything from offset to the end of v should be considered available for this method to use.
912 */
913 DataTypes::ValueType*
914 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915 {
916 // we assume that any collapsing has been done before we get here
917 // since we only have one argument we don't need to think about only
918 // processing single points.
919 if (m_readytype!='E')
920 {
921 throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922 }
923 // since we can't write the result over the input, we need a result offset further along
924 size_t subroffset=roffset+m_samplesize;
925 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927 roffset=offset;
928 size_t loop=0;
929 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930 size_t step=getNoValues();
931 switch (m_op)
932 {
933 case SYM:
934 for (loop=0;loop<numsteps;++loop)
935 {
936 DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937 subroffset+=step;
938 offset+=step;
939 }
940 break;
941 case NSYM:
942 for (loop=0;loop<numsteps;++loop)
943 {
944 DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945 subroffset+=step;
946 offset+=step;
947 }
948 break;
949 default:
950 throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951 }
952 return &v;
953 }
954
955 /*
956 \brief Compute the value of the expression (unary operation) for the given sample.
957 \return Vector which stores the value of the subexpression for the given sample.
958 \param v A vector to store intermediate results.
959 \param offset Index in v to begin storing results.
960 \param sampleNo Sample number to evaluate.
961 \param roffset (output parameter) the offset in the return vector where the result begins.
962
963 The return value will be an existing vector so do not deallocate it.
964 If the result is stored in v it should be stored at the offset given.
965 Everything from offset to the end of v should be considered available for this method to use.
966 */
967 DataTypes::ValueType*
968 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969 {
970 // we assume that any collapsing has been done before we get here
971 // since we only have one argument we don't need to think about only
972 // processing single points.
973 if (m_readytype!='E')
974 {
975 throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976 }
977 // since we can't write the result over the input, we need a result offset further along
978 size_t subroffset;
979 const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
981 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
982 roffset=offset;
983 size_t loop=0;
984 size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
985 size_t outstep=getNoValues();
986 size_t instep=m_left->getNoValues();
987 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
988 switch (m_op)
989 {
990 case TRACE:
991 for (loop=0;loop<numsteps;++loop)
992 {
993 size_t zz=sampleNo*getNumDPPSample()+loop;
994 if (zz==5800)
995 {
996 LAZYDEBUG(cerr << "point=" << zz<< endl;)
997 LAZYDEBUG(cerr << "Input to trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
998 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
999 LAZYDEBUG(cerr << subroffset << endl;)
1000 LAZYDEBUG(cerr << "output=" << offset << endl;)
1001 }
1002 DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1003 if (zz==5800)
1004 {
1005 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1006 }
1007 subroffset+=instep;
1008 offset+=outstep;
1009 }
1010 break;
1011 case TRANS:
1012 for (loop=0;loop<numsteps;++loop)
1013 {
1014 DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1015 subroffset+=instep;
1016 offset+=outstep;
1017 }
1018 break;
1019 default:
1020 throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1021 }
1022 return &v;
1023 }
1024
1025
1026 #define PROC_OP(TYPE,X) \
1027 for (int j=0;j<onumsteps;++j)\
1028 {\
1029 for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1030 { \
1031 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1032 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1033 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1034 LAZYDEBUG(cout << " result= " << resultp[0] << endl;) \
1035 lroffset+=leftstep; \
1036 rroffset+=rightstep; \
1037 }\
1038 lroffset+=oleftstep;\
1039 rroffset+=orightstep;\
1040 }
1041
1042 /*
1043 \brief Compute the value of the expression (binary operation) for the given sample.
1044 \return Vector which stores the value of the subexpression for the given sample.
1045 \param v A vector to store intermediate results.
1046 \param offset Index in v to begin storing results.
1047 \param sampleNo Sample number to evaluate.
1048 \param roffset (output parameter) the offset in the return vector where the result begins.
1049
1050 The return value will be an existing vector so do not deallocate it.
1051 If the result is stored in v it should be stored at the offset given.
1052 Everything from offset to the end of v should be considered available for this method to use.
1053 */
1054 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1055 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1056 // If both children are expanded, then we can process them in a single operation (we treat
1057 // the whole sample as one big datapoint.
1058 // If one of the children is not expanded, then we need to treat each point in the sample
1059 // individually.
1060 // There is an additional complication when scalar operations are considered.
1061 // For example, 2+Vector.
1062 // In this case each double within the point is treated individually
1063 DataTypes::ValueType*
1064 DataLazy::resolveBinary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1065 {
1066 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1067
1068 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1069 // first work out which of the children are expanded
1070 bool leftExp=(m_left->m_readytype=='E');
1071 bool rightExp=(m_right->m_readytype=='E');
1072 if (!leftExp && !rightExp)
1073 {
1074 throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1075 }
1076 bool leftScalar=(m_left->getRank()==0);
1077 bool rightScalar=(m_right->getRank()==0);
1078 if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1079 {
1080 throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1081 }
1082 size_t leftsize=m_left->getNoValues();
1083 size_t rightsize=m_right->getNoValues();
1084 size_t chunksize=1; // how many doubles will be processed in one go
1085 int leftstep=0; // how far should the left offset advance after each step
1086 int rightstep=0;
1087 int numsteps=0; // total number of steps for the inner loop
1088 int oleftstep=0; // the o variables refer to the outer loop
1089 int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1090 int onumsteps=1;
1091
1092 bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1093 bool RES=(rightExp && rightScalar);
1094 bool LS=(!leftExp && leftScalar); // left is a single scalar
1095 bool RS=(!rightExp && rightScalar);
1096 bool LN=(!leftExp && !leftScalar); // left is a single non-scalar
1097 bool RN=(!rightExp && !rightScalar);
1098 bool LEN=(leftExp && !leftScalar); // left is an expanded non-scalar
1099 bool REN=(rightExp && !rightScalar);
1100
1101 if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1102 {
1103 chunksize=m_left->getNumDPPSample()*leftsize;
1104 leftstep=0;
1105 rightstep=0;
1106 numsteps=1;
1107 }
1108 else if (LES || RES)
1109 {
1110 chunksize=1;
1111 if (LES) // left is an expanded scalar
1112 {
1113 if (RS)
1114 {
1115 leftstep=1;
1116 rightstep=0;
1117 numsteps=m_left->getNumDPPSample();
1118 }
1119 else // RN or REN
1120 {
1121 leftstep=0;
1122 oleftstep=1;
1123 rightstep=1;
1124 orightstep=(RN ? -(int)rightsize : 0);
1125 numsteps=rightsize;
1126 onumsteps=m_left->getNumDPPSample();
1127 }
1128 }
1129 else // right is an expanded scalar
1130 {
1131 if (LS)
1132 {
1133 rightstep=1;
1134 leftstep=0;
1135 numsteps=m_right->getNumDPPSample();
1136 }
1137 else
1138 {
1139 rightstep=0;
1140 orightstep=1;
1141 leftstep=1;
1142 oleftstep=(LN ? -(int)leftsize : 0);
1143 numsteps=leftsize;
1144 onumsteps=m_right->getNumDPPSample();
1145 }
1146 }
1147 }
1148 else // this leaves (LEN, RS), (LEN, RN) and their transposes
1149 {
1150 if (LEN) // and Right will be a single value
1151 {
1152 chunksize=rightsize;
1153 leftstep=rightsize;
1154 rightstep=0;
1155 numsteps=m_left->getNumDPPSample();
1156 if (RS)
1157 {
1158 numsteps*=leftsize;
1159 }
1160 }
1161 else // REN
1162 {
1163 chunksize=leftsize;
1164 rightstep=leftsize;
1165 leftstep=0;
1166 numsteps=m_right->getNumDPPSample();
1167 if (LS)
1168 {
1169 numsteps*=rightsize;
1170 }
1171 }
1172 }
1173
1174 int resultStep=max(leftstep,rightstep); // only one (at most) should be !=0
1175 // Get the values of sub-expressions
1176 const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1177 // calcBufss for why we can't put offset as the 2nd param above
1178 const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1179 // the right child starts further along.
1180 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1181 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1182 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1183 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1184 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1185 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1186 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN << endl;)
1187 // LAZYDEBUG(
1188 // cout << "Results of bin" << endl;
1189 // cout << "Left=";
1190 // for (int i=lroffset;i<lroffset+m_left->m_samplesize;++i)
1191 // {
1192 // cout << (*left)[i] << " ";
1193 // }
1194 // cout << endl << "Right=";
1195 // for (int i=rroffset;i<rroffset+(m_right->m_readytype=='E'?m_right->m_samplesize:m_right->getNoValues());++i)
1196 // {
1197 // cout << (*right)[i] << " ";
1198 // }
1199 // cout << endl;
1200 // )
1201
1202 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1203 switch(m_op)
1204 {
1205 case ADD:
1206 PROC_OP(NO_ARG,plus<double>());
1207 break;
1208 case SUB:
1209 PROC_OP(NO_ARG,minus<double>());
1210 break;
1211 case MUL:
1212 PROC_OP(NO_ARG,multiplies<double>());
1213 break;
1214 case DIV:
1215 PROC_OP(NO_ARG,divides<double>());
1216 break;
1217 case POW:
1218 PROC_OP(double (double,double),::pow);
1219 break;
1220 default:
1221 throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1222 }
1223 roffset=offset;
1224 return &v;
1225 }
1226
1227
1228
1229 /*
1230 \brief Compute the value of the expression (tensor product) for the given sample.
1231 \return Vector which stores the value of the subexpression for the given sample.
1232 \param v A vector to store intermediate results.
1233 \param offset Index in v to begin storing results.
1234 \param sampleNo Sample number to evaluate.
1235 \param roffset (output parameter) the offset in the return vector where the result begins.
1236
1237 The return value will be an existing vector so do not deallocate it.
1238 If the result is stored in v it should be stored at the offset given.
1239 Everything from offset to the end of v should be considered available for this method to use.
1240 */
1241 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1242 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1243 // unlike the other resolve helpers, we must treat these datapoints separately.
1244 DataTypes::ValueType*
1245 DataLazy::resolveTProd(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1246 {
1247 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << " to offset " << offset<< endl;)
1248
1249 size_t lroffset=0, rroffset=0; // offsets in the left and right result vectors
1250 // first work out which of the children are expanded
1251 bool leftExp=(m_left->m_readytype=='E');
1252 bool rightExp=(m_right->m_readytype=='E');
1253 int steps=getNumDPPSample();
1254 /* int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1255 int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1256 int leftStep=(leftExp? m_left->getNoValues() : 0); // do not have scalars as input to this method
1257 int rightStep=(rightExp?m_right->getNoValues() : 0);
1258
1259 int resultStep=getNoValues();
1260 // int resultStep=max(leftStep,rightStep); // only one (at most) should be !=0
1261 // Get the values of sub-expressions (leave a gap of one sample for the result).
1262 int gap=offset+m_left->getMaxSampleSize(); // actually only needs to be m_left->m_samplesize
1263
1264 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1265
1266 const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1267 gap+=m_right->getMaxSampleSize();
1268
1269
1270 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1271
1272
1273 const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1274
1275 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) << endl;
1276 cout << getNoValues() << endl;)
1277 LAZYDEBUG(cerr << "Result of left=";)
1278 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1279 for (int i=lroffset;i<lroffset+m_left->getNoValues();++i)
1280 {
1281 cout << (*left)[i] << " ";
1282 })
1283 LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1284 LAZYDEBUG(cerr << "[" << rroffset << " .. " << rroffset+m_right->m_samplesize << "]" << endl;
1285 for (int i=rroffset;i<rroffset+m_right->m_samplesize;++i)
1286 {
1287 cerr << (*right)[i] << " ";
1288 }
1289 cerr << endl;
1290 )
1291 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1292 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1293 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1294 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1295 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1296 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1297 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1298
1299 double* resultp=&(v[offset]); // results are stored at the vector offset we recieved
1300 switch(m_op)
1301 {
1302 case PROD:
1303 for (int i=0;i<steps;++i,resultp+=resultStep)
1304 {
1305 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1306 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1307 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1308 const double *ptr_0 = &((*left)[lroffset]);
1309 const double *ptr_1 = &((*right)[rroffset]);
1310 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1311 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1312 matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1313 LAZYDEBUG(cout << "Results--\n";
1314 for (int z=0;z<getNoValues();++z)
1315 {
1316 cout << resultp[z] << " ";
1317 }
1318 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1319 )
1320 lroffset+=leftStep;
1321 rroffset+=rightStep;
1322 }
1323 break;
1324 default:
1325 throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1326 }
1327 roffset=offset;
1328 return &v;
1329 }
1330
1331
1332
1333 /*
1334 \brief Compute the value of the expression for the given sample.
1335 \return Vector which stores the value of the subexpression for the given sample.
1336 \param v A vector to store intermediate results.
1337 \param offset Index in v to begin storing results.
1338 \param sampleNo Sample number to evaluate.
1339 \param roffset (output parameter) the offset in the return vector where the result begins.
1340
1341 The return value will be an existing vector so do not deallocate it.
1342 */
1343 // the vector and the offset are a place where the method could write its data if it wishes
1344 // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1345 // Hence the return value to indicate where the data is actually stored.
1346 // Regardless, the storage should be assumed to be used, even if it isn't.
1347
1348 // the roffset is the offset within the returned vector where the data begins
1349 const DataTypes::ValueType*
1350 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1351 {
1352 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1353 // collapse so we have a 'E' node or an IDENTITY for some other type
1354 if (m_readytype!='E' && m_op!=IDENTITY)
1355 {
1356 collapse();
1357 }
1358 if (m_op==IDENTITY)
1359 {
1360 const ValueType& vec=m_id->getVector();
1361 if (m_readytype=='C')
1362 {
1363 roffset=0;
1364 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1365 return &(vec);
1366 }
1367 roffset=m_id->getPointOffset(sampleNo, 0);
1368 LAZYDEBUG(cout << "Finish sample " << toString() << endl;)
1369 return &(vec);
1370 }
1371 if (m_readytype!='E')
1372 {
1373 throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1374 }
1375 switch (getOpgroup(m_op))
1376 {
1377 case G_UNARY:
1378 case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1379 case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1380 case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1381 case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1382 case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1383 default:
1384 throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1385 }
1386
1387 }
1388
1389 // This needs to do the work of the idenity constructor
1390 void
1391 DataLazy::resolveToIdentity()
1392 {
1393 if (m_op==IDENTITY)
1394 return;
1395 DataReady_ptr p=resolve();
1396 makeIdentity(p);
1397 }
1398
1399 void DataLazy::makeIdentity(const DataReady_ptr& p)
1400 {
1401 m_op=IDENTITY;
1402 m_axis_offset=0;
1403 m_transpose=0;
1404 m_SL=m_SM=m_SR=0;
1405 m_children=m_height=0;
1406 m_id=p;
1407 if(p->isConstant()) {m_readytype='C';}
1408 else if(p->isExpanded()) {m_readytype='E';}
1409 else if (p->isTagged()) {m_readytype='T';}
1410 else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1411 m_buffsRequired=1;
1412 m_samplesize=p->getNumDPPSample()*p->getNoValues();
1413 m_maxsamplesize=m_samplesize;
1414 m_left.reset();
1415 m_right.reset();
1416 }
1417
1418 // To simplify the memory management, all threads operate on one large vector, rather than one each.
1419 // Each sample is evaluated independently and copied into the result DataExpanded.
1420 DataReady_ptr
1421 DataLazy::resolve()
1422 {
1423
1424 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1425 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1426
1427 if (m_readytype!='E') // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1428 {
1429 collapse();
1430 }
1431 if (m_op==IDENTITY) // So a lazy expression of Constant or Tagged data will be returned here.
1432 {
1433 return m_id;
1434 }
1435 // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1436 size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1437 // storage to evaluate its expression
1438 int numthreads=1;
1439 #ifdef _OPENMP
1440 numthreads=getNumberOfThreads();
1441 #endif
1442 ValueType v(numthreads*threadbuffersize);
1443 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1444 DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(), ValueType(getNoValues()));
1445 ValueType& resvec=result->getVector();
1446 DataReady_ptr resptr=DataReady_ptr(result);
1447 int sample;
1448 size_t outoffset; // offset in the output data
1449 int totalsamples=getNumSamples();
1450 const ValueType* res=0; // Vector storing the answer
1451 size_t resoffset=0; // where in the vector to find the answer
1452 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1453 #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1454 for (sample=0;sample<totalsamples;++sample)
1455 {
1456 if (sample==0) {ENABLEDEBUG}
1457
1458 // if (sample==5800/getNumDPPSample()) {ENABLEDEBUG}
1459 LAZYDEBUG(cout << "################################# " << sample << endl;)
1460 #ifdef _OPENMP
1461 res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1462 #else
1463 res=resolveSample(v,0,sample,resoffset); // res would normally be v, but not if its a single IDENTITY op.
1464 #endif
1465 LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1466 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1467 outoffset=result->getPointOffset(sample,0);
1468 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1469 for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset) // copy values into the output vector
1470 {
1471 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1472 resvec[outoffset]=(*res)[resoffset];
1473 }
1474 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1475 LAZYDEBUG(cerr << "*********************************" << endl;)
1476 DISABLEDEBUG
1477 }
1478 return resptr;
1479 }
1480
1481 std::string
1482 DataLazy::toString() const
1483 {
1484 ostringstream oss;
1485 oss << "Lazy Data:";
1486 intoString(oss);
1487 return oss.str();
1488 }
1489
1490
1491 void
1492 DataLazy::intoString(ostringstream& oss) const
1493 {
1494 // oss << "[" << m_children <<";"<<m_height <<"]";
1495 switch (getOpgroup(m_op))
1496 {
1497 case G_IDENTITY:
1498 if (m_id->isExpanded())
1499 {
1500 oss << "E";
1501 }
1502 else if (m_id->isTagged())
1503 {
1504 oss << "T";
1505 }
1506 else if (m_id->isConstant())
1507 {
1508 oss << "C";
1509 }
1510 else
1511 {
1512 oss << "?";
1513 }
1514 oss << '@' << m_id.get();
1515 break;
1516 case G_BINARY:
1517 oss << '(';
1518 m_left->intoString(oss);
1519 oss << ' ' << opToString(m_op) << ' ';
1520 m_right->intoString(oss);
1521 oss << ')';
1522 break;
1523 case G_UNARY:
1524 case G_UNARY_P:
1525 case G_NP1OUT:
1526 case G_NP1OUT_P:
1527 oss << opToString(m_op) << '(';
1528 m_left->intoString(oss);
1529 oss << ')';
1530 break;
1531 case G_TENSORPROD:
1532 oss << opToString(m_op) << '(';
1533 m_left->intoString(oss);
1534 oss << ", ";
1535 m_right->intoString(oss);
1536 oss << ')';
1537 break;
1538 default:
1539 oss << "UNKNOWN";
1540 }
1541 }
1542
1543 DataAbstract*
1544 DataLazy::deepCopy()
1545 {
1546 switch (getOpgroup(m_op))
1547 {
1548 case G_IDENTITY: return new DataLazy(m_id->deepCopy()->getPtr());
1549 case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1550 case G_BINARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1551 case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1552 case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1553 default:
1554 throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1555 }
1556 }
1557
1558
1559 // There is no single, natural interpretation of getLength on DataLazy.
1560 // Instances of DataReady can look at the size of their vectors.
1561 // For lazy though, it could be the size the data would be if it were resolved;
1562 // or it could be some function of the lengths of the DataReady instances which
1563 // form part of the expression.
1564 // Rather than have people making assumptions, I have disabled the method.
1565 DataTypes::ValueType::size_type
1566 DataLazy::getLength() const
1567 {
1568 throw DataException("getLength() does not make sense for lazy data.");
1569 }
1570
1571
1572 DataAbstract*
1573 DataLazy::getSlice(const DataTypes::RegionType& region) const
1574 {
1575 throw DataException("getSlice - not implemented for Lazy objects.");
1576 }
1577
1578
1579 // To do this we need to rely on our child nodes
1580 DataTypes::ValueType::size_type
1581 DataLazy::getPointOffset(int sampleNo,
1582 int dataPointNo)
1583 {
1584 if (m_op==IDENTITY)
1585 {
1586 return m_id->getPointOffset(sampleNo,dataPointNo);
1587 }
1588 if (m_readytype!='E')
1589 {
1590 collapse();
1591 return m_id->getPointOffset(sampleNo,dataPointNo);
1592 }
1593 // at this point we do not have an identity node and the expression will be Expanded
1594 // so we only need to know which child to ask
1595 if (m_left->m_readytype=='E')
1596 {
1597 return m_left->getPointOffset(sampleNo,dataPointNo);
1598 }
1599 else
1600 {
1601 return m_right->getPointOffset(sampleNo,dataPointNo);
1602 }
1603 }
1604
1605 // To do this we need to rely on our child nodes
1606 DataTypes::ValueType::size_type
1607 DataLazy::getPointOffset(int sampleNo,
1608 int dataPointNo) const
1609 {
1610 if (m_op==IDENTITY)
1611 {
1612 return m_id->getPointOffset(sampleNo,dataPointNo);
1613 }
1614 if (m_readytype=='E')
1615 {
1616 // at this point we do not have an identity node and the expression will be Expanded
1617 // so we only need to know which child to ask
1618 if (m_left->m_readytype=='E')
1619 {
1620 return m_left->getPointOffset(sampleNo,dataPointNo);
1621 }
1622 else
1623 {
1624 return m_right->getPointOffset(sampleNo,dataPointNo);
1625 }
1626 }
1627 if (m_readytype=='C')
1628 {
1629 return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1630 }
1631 throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1632 }
1633
1634 // It would seem that DataTagged will need to be treated differently since even after setting all tags
1635 // to zero, all the tags from all the DataTags would be in the result.
1636 // However since they all have the same value (0) whether they are there or not should not matter.
1637 // So I have decided that for all types this method will create a constant 0.
1638 // It can be promoted up as required.
1639 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1640 // but we can deal with that if it arrises.
1641 void
1642 DataLazy::setToZero()
1643 {
1644 DataTypes::ValueType v(getNoValues(),0);
1645 m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1646 m_op=IDENTITY;
1647 m_right.reset();
1648 m_left.reset();
1649 m_readytype='C';
1650 m_buffsRequired=1;
1651 }
1652
1653 } // end namespace

  ViewVC Help
Powered by ViewVC 1.1.26